Coverage for python/lsst/ap/verify/workspace.py: 43%
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1#
2# This file is part of ap_verify.
3#
4# Developed for the LSST Data Management System.
5# This product includes software developed by the LSST Project
6# (http://www.lsst.org).
7# See the COPYRIGHT file at the top-level directory of this distribution
8# for details of code ownership.
9#
10# This program is free software: you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation, either version 3 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program. If not, see <http://www.gnu.org/licenses/>.
22#
24__all__ = ["Workspace", "WorkspaceGen2", "WorkspaceGen3"]
26import abc
27import os
28import pathlib
29import re
30import stat
32import lsst.skymap
33import lsst.daf.persistence as dafPersist
34import lsst.daf.butler as dafButler
35import lsst.obs.base as obsBase
38class Workspace(metaclass=abc.ABCMeta):
39 """A directory used by ``ap_verify`` to handle data and outputs.
41 Any object of this class represents a working directory containing
42 (possibly empty) subdirectories for various purposes. Subclasses are
43 typically specialized for particular workflows. Keeping such details in
44 separate classes makes it easier to provide guarantees without forcing
45 awkward directory structures on users.
47 All Workspace classes must guarantee the existence of any subdirectories
48 inside the workspace. Directories corresponding to repositories do not need
49 to be initialized, since creating a valid repository usually requires
50 external information.
52 Parameters
53 ----------
54 location : `str`
55 The location on disk where the workspace will be set up. Will be
56 created if it does not already exist.
58 Raises
59 ------
60 EnvironmentError
61 Raised if ``location`` is not readable or not writeable
62 """
63 def __init__(self, location):
64 # Properties must be `str` for backwards compatibility
65 self._location = str(pathlib.Path(location).resolve())
67 self.mkdir(self._location)
68 self.mkdir(self.configDir)
70 @staticmethod
71 def mkdir(directory):
72 """Create a directory for the workspace.
74 This method is intended to be called only by subclasses, and should
75 not be used by external code.
77 Parameters
78 ----------
79 directory : `str`
80 The directory to create.
81 """
82 mode = stat.S_IRWXU | stat.S_IRGRP | stat.S_IROTH # a+r, u+rwx
83 pathlib.Path(directory).mkdir(parents=True, exist_ok=True, mode=mode)
85 def __eq__(self, other):
86 """Test whether two workspaces are of the same type and have the
87 same location.
88 """
89 return type(self) == type(other) and self.workDir == other.workDir
91 def __repr__(self):
92 """A string representation that can be used to reconstruct the Workspace.
93 """
94 return f"{type(self).__name__}({self.workDir!r})"
96 @property
97 def workDir(self):
98 """The absolute location of the workspace as a whole
99 (`str`, read-only).
100 """
101 return self._location
103 @property
104 def configDir(self):
105 """The absolute location of a directory containing custom Task config
106 files for use with the data (`str`, read-only).
107 """
108 return os.path.join(self._location, 'config')
110 @property
111 @abc.abstractmethod
112 def dbLocation(self):
113 """The default absolute location of the source association database to
114 be created or updated by the pipeline (`str`, read-only).
116 Shall be a pathname to a database suitable for the backend of `Apdb`.
117 """
119 @property
120 @abc.abstractmethod
121 def alertLocation(self):
122 """The absolute location of an output directory for persisted
123 alert packets (`str`, read-only).
124 """
126 @property
127 @abc.abstractmethod
128 def workButler(self):
129 """A Butler that can produce pipeline inputs and outputs (read-only).
130 The type is class-dependent.
131 """
133 @property
134 @abc.abstractmethod
135 def analysisButler(self):
136 """A Butler that can read pipeline outputs (read-only).
137 The type is class-dependent.
139 The Butler should be read-only, if its type supports the restriction.
140 """
143class WorkspaceGen2(Workspace):
144 """A directory used by ``ap_verify`` to handle data.
146 Any object of this class represents a working directory containing
147 (possibly empty) subdirectories for repositories. Constructing a
148 WorkspaceGen2 does not *initialize* its repositories, as this requires
149 external information.
151 Parameters
152 ----------
153 location : `str`
154 The location on disk where the workspace will be set up. Will be
155 created if it does not already exist.
157 Raises
158 ------
159 EnvironmentError
160 Raised if ``location`` is not readable or not writeable
161 """
163 def __init__(self, location):
164 super().__init__(location)
166 self.mkdir(self.dataRepo)
167 self.mkdir(self.calibRepo)
168 self.mkdir(self.templateRepo)
169 self.mkdir(self.outputRepo)
171 # Lazy evaluation to optimize butlers
172 self._workButler = None
173 self._analysisButler = None
175 @property
176 def dataRepo(self):
177 """The absolute path/URI to a Butler repo for science data
178 (`str`, read-only).
179 """
180 return os.path.join(self._location, 'ingested')
182 @property
183 def calibRepo(self):
184 """The absolute path/URI to a Butler repo for calibration data
185 (`str`, read-only).
186 """
187 return os.path.join(self._location, 'calibingested')
189 @property
190 def templateRepo(self):
191 """The absolute path/URI to a Butler repo for precomputed templates
192 (`str`, read-only).
193 """
194 return self.dataRepo
196 @property
197 def outputRepo(self):
198 """The absolute path/URI to a Butler repo for AP pipeline products
199 (`str`, read-only).
200 """
201 return os.path.join(self._location, 'output')
203 @property
204 def dbLocation(self):
205 return os.path.join(self._location, 'association.db')
207 @property
208 def alertLocation(self):
209 return os.path.join(self._location, 'alerts')
211 @property
212 def workButler(self):
213 """A Butler that can produce pipeline inputs and outputs
214 (`lsst.daf.persistence.Butler`, read-only).
215 """
216 if self._workButler is None:
217 self._workButler = self._makeButler()
218 return self._workButler
220 def _makeButler(self):
221 """Create a butler for accessing the entire workspace.
223 Returns
224 -------
225 butler : `lsst.daf.persistence.Butler`
226 A butler accepting `dataRepo`, `calibRepo`, and `templateRepo` as
227 inputs, and `outputRepo` as an output.
229 Notes
230 -----
231 Assumes all `*Repo` properties have been initialized.
232 """
233 # common arguments for butler elements
234 mapperArgs = {"calibRoot": os.path.abspath(self.calibRepo)}
236 inputs = [{"root": self.dataRepo, "mapperArgs": mapperArgs}]
237 outputs = [{"root": self.outputRepo, "mode": "rw", "mapperArgs": mapperArgs}]
239 if not os.path.samefile(self.dataRepo, self.templateRepo):
240 inputs.append({'root': self.templateRepo, 'mode': 'r', 'mapperArgs': mapperArgs})
242 return dafPersist.Butler(inputs=inputs, outputs=outputs)
244 @property
245 def analysisButler(self):
246 """A Butler that can read pipeline outputs (`lsst.daf.persistence.Butler`, read-only).
247 """
248 if self._analysisButler is None:
249 self._analysisButler = dafPersist.Butler(inputs={"root": self.outputRepo, "mode": "r"})
250 return self._analysisButler
253class WorkspaceGen3(Workspace):
254 """A directory used by ``ap_verify`` to handle data.
256 Any object of this class represents a working directory containing
257 subdirectories for a repository and for non-repository files. Constructing
258 a WorkspaceGen3 does not *initialize* its repository, as this requires
259 external information.
261 Parameters
262 ----------
263 location : `str`
264 The location on disk where the workspace will be set up. Will be
265 created if it does not already exist.
267 Raises
268 ------
269 EnvironmentError
270 Raised if ``location`` is not readable or not writeable
271 """
273 def __init__(self, location):
274 super().__init__(location)
276 self.mkdir(self.repo)
278 # Gen 3 name of the output
279 self.outputName = "ap_verify-output"
281 # Lazy evaluation to optimize butlers
282 self._workButler = None
283 self._analysisButler = None
285 @property
286 def repo(self):
287 """The absolute path/URI to a Butler repo for AP pipeline processing
288 (`str`, read-only).
289 """
290 return os.path.join(self._location, 'repo')
292 @property
293 def dbLocation(self):
294 return os.path.join(self._location, 'association.db')
296 @property
297 def alertLocation(self):
298 return os.path.join(self._location, 'alerts')
300 def _ensureCollection(self, registry, name, collectionType):
301 """Add a collection to a repository if it does not already exist.
303 Parameters
304 ----------
305 registry : `lsst.daf.butler.Registry`
306 The repository to which to add the collection.
307 name : `str`
308 The name of the collection to test for and add.
309 collectionType : `lsst.daf.butler.CollectionType`
310 The type of collection to add. This field is ignored when
311 testing if a collection exists.
312 """
313 matchingCollections = list(registry.queryCollections(re.compile(name)))
314 if not matchingCollections:
315 registry.registerCollection(name, type=collectionType)
317 @property
318 def workButler(self):
319 """A Butler that can read and write to a Gen 3 repository (`lsst.daf.butler.Butler`, read-only).
321 Notes
322 -----
323 Assumes `repo` has been initialized.
324 """
325 if self._workButler is None:
326 try:
327 # Hard-code the collection names because it's hard to infer the inputs from the Butler
328 queryButler = dafButler.Butler(self.repo, writeable=True) # writeable for _workButler
329 inputs = {
330 lsst.skymap.BaseSkyMap.SKYMAP_RUN_COLLECTION_NAME,
331 }
332 for dimension in queryButler.registry.queryDataIds('instrument'):
333 instrument = obsBase.Instrument.fromName(dimension["instrument"], queryButler.registry)
334 rawName = instrument.makeDefaultRawIngestRunName()
335 inputs.add(rawName)
336 self._ensureCollection(queryButler.registry, rawName, dafButler.CollectionType.RUN)
337 inputs.add(instrument.makeCalibrationCollectionName())
338 inputs.add(instrument.makeRefCatCollectionName())
339 inputs.update(queryButler.registry.queryCollections(re.compile(r"templates/\w+")))
341 # Create an output chain here, so that workButler can see it.
342 # Definition does not conflict with what pipetask --output uses.
343 # Regex is workaround for DM-25945.
344 if not list(queryButler.registry.queryCollections(re.compile(self.outputName))):
345 queryButler.registry.registerCollection(self.outputName,
346 dafButler.CollectionType.CHAINED)
347 queryButler.registry.setCollectionChain(self.outputName, inputs)
349 self._workButler = dafButler.Butler(butler=queryButler, collections=self.outputName)
350 except OSError as e:
351 raise RuntimeError(f"{self.repo} is not a Gen 3 repository") from e
352 return self._workButler
354 @property
355 def analysisButler(self):
356 """A Butler that can read from a Gen 3 repository with outputs (`lsst.daf.butler.Butler`, read-only).
358 Notes
359 -----
360 Assumes `repo` has been initialized.
361 """
362 if self._analysisButler is None:
363 try:
364 self._analysisButler = dafButler.Butler(self.repo, collections=self.outputName,
365 writeable=False)
366 except OSError as e:
367 raise RuntimeError(f"{self.repo} is not a Gen 3 repository") from e
368 return self._analysisButler