Coverage for python/lsst/ap/verify/workspace.py : 43%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1#
2# This file is part of ap_verify.
3#
4# Developed for the LSST Data Management System.
5# This product includes software developed by the LSST Project
6# (http://www.lsst.org).
7# See the COPYRIGHT file at the top-level directory of this distribution
8# for details of code ownership.
9#
10# This program is free software: you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation, either version 3 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program. If not, see <http://www.gnu.org/licenses/>.
22#
24__all__ = ["Workspace", "WorkspaceGen2", "WorkspaceGen3"]
26import abc
27import os
28import pathlib
29import re
30import stat
32import lsst.skymap
33import lsst.daf.persistence as dafPersist
34import lsst.daf.butler as dafButler
35import lsst.obs.base as obsBase
38class Workspace(metaclass=abc.ABCMeta):
39 """A directory used by ``ap_verify`` to handle data and outputs.
41 Any object of this class represents a working directory containing
42 (possibly empty) subdirectories for various purposes. Subclasses are
43 typically specialized for particular workflows. Keeping such details in
44 separate classes makes it easier to provide guarantees without forcing
45 awkward directory structures on users.
47 All Workspace classes must guarantee the existence of any subdirectories
48 inside the workspace. Directories corresponding to repositories do not need
49 to be initialized, since creating a valid repository usually requires
50 external information.
52 Parameters
53 ----------
54 location : `str`
55 The location on disk where the workspace will be set up. Will be
56 created if it does not already exist.
58 Raises
59 ------
60 EnvironmentError
61 Raised if ``location`` is not readable or not writeable
62 """
63 def __init__(self, location):
64 # Properties must be `str` for backwards compatibility
65 self._location = str(pathlib.Path(location).resolve())
67 self.mkdir(self._location)
68 self.mkdir(self.configDir)
70 @staticmethod
71 def mkdir(directory):
72 """Create a directory for the workspace.
74 This method is intended to be called only by subclasses, and should
75 not be used by external code.
77 Parameters
78 ----------
79 directory : `str`
80 The directory to create.
81 """
82 mode = stat.S_IRWXU | stat.S_IRGRP | stat.S_IROTH # a+r, u+rwx
83 pathlib.Path(directory).mkdir(parents=True, exist_ok=True, mode=mode)
85 def __eq__(self, other):
86 """Test whether two workspaces are of the same type and have the
87 same location.
88 """
89 return type(self) == type(other) and self.workDir == other.workDir
91 def __repr__(self):
92 """A string representation that can be used to reconstruct the Workspace.
93 """
94 return f"{type(self).__name__}({self.workDir!r})"
96 @property
97 def workDir(self):
98 """The absolute location of the workspace as a whole
99 (`str`, read-only).
100 """
101 return self._location
103 @property
104 def configDir(self):
105 """The absolute location of a directory containing custom Task config
106 files for use with the data (`str`, read-only).
107 """
108 return os.path.join(self._location, 'config')
110 @property
111 @abc.abstractmethod
112 def dbLocation(self):
113 """The default absolute location of the source association database to
114 be created or updated by the pipeline (`str`, read-only).
116 Shall be a pathname to a database suitable for the backend of `Apdb`.
117 """
119 @property
120 @abc.abstractmethod
121 def alertLocation(self):
122 """The absolute location of an output directory for persisted
123 alert packets (`str`, read-only).
124 """
126 @property
127 @abc.abstractmethod
128 def workButler(self):
129 """A Butler that can produce pipeline inputs and outputs (read-only).
130 The type is class-dependent.
131 """
133 @property
134 @abc.abstractmethod
135 def analysisButler(self):
136 """A Butler that can read pipeline outputs (read-only).
137 The type is class-dependent.
139 The Butler should be read-only, if its type supports the restriction.
140 """
143class WorkspaceGen2(Workspace):
144 """A directory used by ``ap_verify`` to handle data.
146 Any object of this class represents a working directory containing
147 (possibly empty) subdirectories for repositories. Constructing a
148 WorkspaceGen2 does not *initialize* its repositories, as this requires
149 external information.
151 Parameters
152 ----------
153 location : `str`
154 The location on disk where the workspace will be set up. Will be
155 created if it does not already exist.
157 Raises
158 ------
159 EnvironmentError
160 Raised if ``location`` is not readable or not writeable
161 """
163 def __init__(self, location):
164 super().__init__(location)
166 self.mkdir(self.dataRepo)
167 self.mkdir(self.calibRepo)
168 self.mkdir(self.templateRepo)
169 self.mkdir(self.outputRepo)
171 # Lazy evaluation to optimize butlers
172 self._workButler = None
173 self._analysisButler = None
175 @property
176 def dataRepo(self):
177 """The absolute path/URI to a Butler repo for science data
178 (`str`, read-only).
179 """
180 return os.path.join(self._location, 'ingested')
182 @property
183 def calibRepo(self):
184 """The absolute path/URI to a Butler repo for calibration data
185 (`str`, read-only).
186 """
187 return os.path.join(self._location, 'calibingested')
189 @property
190 def templateRepo(self):
191 """The absolute path/URI to a Butler repo for precomputed templates
192 (`str`, read-only).
193 """
194 return self.dataRepo
196 @property
197 def outputRepo(self):
198 """The absolute path/URI to a Butler repo for AP pipeline products
199 (`str`, read-only).
200 """
201 return os.path.join(self._location, 'output')
203 @property
204 def dbLocation(self):
205 return os.path.join(self._location, 'association.db')
207 @property
208 def alertLocation(self):
209 return os.path.join(self._location, 'alerts')
211 @property
212 def workButler(self):
213 """A Butler that can produce pipeline inputs and outputs
214 (`lsst.daf.persistence.Butler`, read-only).
215 """
216 if self._workButler is None:
217 self._workButler = self._makeButler()
218 return self._workButler
220 def _makeButler(self):
221 """Create a butler for accessing the entire workspace.
223 Returns
224 -------
225 butler : `lsst.daf.persistence.Butler`
226 A butler accepting `dataRepo`, `calibRepo`, and `templateRepo` as
227 inputs, and `outputRepo` as an output.
229 Notes
230 -----
231 Assumes all `*Repo` properties have been initialized.
232 """
233 # common arguments for butler elements
234 mapperArgs = {"calibRoot": os.path.abspath(self.calibRepo)}
236 inputs = [{"root": self.dataRepo, "mapperArgs": mapperArgs}]
237 outputs = [{"root": self.outputRepo, "mode": "rw", "mapperArgs": mapperArgs}]
239 if not os.path.samefile(self.dataRepo, self.templateRepo):
240 inputs.append({'root': self.templateRepo, 'mode': 'r', 'mapperArgs': mapperArgs})
242 return dafPersist.Butler(inputs=inputs, outputs=outputs)
244 @property
245 def analysisButler(self):
246 """A Butler that can read pipeline outputs (`lsst.daf.persistence.Butler`, read-only).
247 """
248 if self._analysisButler is None:
249 self._analysisButler = dafPersist.Butler(inputs={"root": self.outputRepo, "mode": "r"})
250 return self._analysisButler
253class WorkspaceGen3(Workspace):
254 """A directory used by ``ap_verify`` to handle data.
256 Any object of this class represents a working directory containing
257 subdirectories for a repository and for non-repository files. Constructing
258 a WorkspaceGen3 does not *initialize* its repository, as this requires
259 external information.
261 Parameters
262 ----------
263 location : `str`
264 The location on disk where the workspace will be set up. Will be
265 created if it does not already exist.
267 Raises
268 ------
269 EnvironmentError
270 Raised if ``location`` is not readable or not writeable
271 """
273 def __init__(self, location):
274 super().__init__(location)
276 self.mkdir(self.repo)
277 self.mkdir(self.pipelineDir)
279 # Gen 3 name of the output
280 self.outputName = "ap_verify-output"
282 # Lazy evaluation to optimize butlers
283 self._workButler = None
284 self._analysisButler = None
286 @property
287 def repo(self):
288 """The absolute path/URI to a Butler repo for AP pipeline processing
289 (`str`, read-only).
290 """
291 return os.path.join(self._location, 'repo')
293 @property
294 def pipelineDir(self):
295 """The absolute location of a directory containing custom pipeline
296 files for use with the data (`str`, read-only).
297 """
298 return os.path.join(self._location, 'pipelines')
300 @property
301 def dbLocation(self):
302 return os.path.join(self._location, 'association.db')
304 @property
305 def alertLocation(self):
306 return os.path.join(self._location, 'alerts')
308 def _ensureCollection(self, registry, name, collectionType):
309 """Add a collection to a repository if it does not already exist.
311 Parameters
312 ----------
313 registry : `lsst.daf.butler.Registry`
314 The repository to which to add the collection.
315 name : `str`
316 The name of the collection to test for and add.
317 collectionType : `lsst.daf.butler.CollectionType`
318 The type of collection to add. This field is ignored when
319 testing if a collection exists.
320 """
321 matchingCollections = list(registry.queryCollections(re.compile(name)))
322 if not matchingCollections:
323 registry.registerCollection(name, type=collectionType)
325 @property
326 def workButler(self):
327 """A Butler that can read and write to a Gen 3 repository (`lsst.daf.butler.Butler`, read-only).
329 Notes
330 -----
331 Assumes `repo` has been initialized.
332 """
333 if self._workButler is None:
334 try:
335 # Hard-code the collection names because it's hard to infer the inputs from the Butler
336 queryButler = dafButler.Butler(self.repo, writeable=True) # writeable for _workButler
337 inputs = {
338 lsst.skymap.BaseSkyMap.SKYMAP_RUN_COLLECTION_NAME,
339 }
340 for dimension in queryButler.registry.queryDataIds('instrument'):
341 instrument = obsBase.Instrument.fromName(dimension["instrument"], queryButler.registry)
342 rawName = instrument.makeDefaultRawIngestRunName()
343 inputs.add(rawName)
344 self._ensureCollection(queryButler.registry, rawName, dafButler.CollectionType.RUN)
345 inputs.add(instrument.makeCalibrationCollectionName())
346 inputs.add(instrument.makeRefCatCollectionName())
347 inputs.update(queryButler.registry.queryCollections(re.compile(r"templates/\w+")))
349 # Create an output chain here, so that workButler can see it.
350 # Definition does not conflict with what pipetask --output uses.
351 # Regex is workaround for DM-25945.
352 if not list(queryButler.registry.queryCollections(re.compile(self.outputName))):
353 queryButler.registry.registerCollection(self.outputName,
354 dafButler.CollectionType.CHAINED)
355 queryButler.registry.setCollectionChain(self.outputName, inputs)
357 self._workButler = dafButler.Butler(butler=queryButler, collections=self.outputName)
358 except OSError as e:
359 raise RuntimeError(f"{self.repo} is not a Gen 3 repository") from e
360 return self._workButler
362 @property
363 def analysisButler(self):
364 """A Butler that can read from a Gen 3 repository with outputs (`lsst.daf.butler.Butler`, read-only).
366 Notes
367 -----
368 Assumes `repo` has been initialized.
369 """
370 if self._analysisButler is None:
371 try:
372 self._analysisButler = dafButler.Butler(self.repo, collections=self.outputName,
373 writeable=False)
374 except OSError as e:
375 raise RuntimeError(f"{self.repo} is not a Gen 3 repository") from e
376 return self._analysisButler