Coverage for python/lsst/ap/verify/workspace.py : 42%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1#
2# This file is part of ap_verify.
3#
4# Developed for the LSST Data Management System.
5# This product includes software developed by the LSST Project
6# (http://www.lsst.org).
7# See the COPYRIGHT file at the top-level directory of this distribution
8# for details of code ownership.
9#
10# This program is free software: you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation, either version 3 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program. If not, see <http://www.gnu.org/licenses/>.
22#
24import abc
25import os
26import pathlib
27import re
28import stat
30import lsst.skymap
31import lsst.daf.persistence as dafPersist
32import lsst.daf.butler as dafButler
33import lsst.obs.base as obsBase
36class Workspace(metaclass=abc.ABCMeta):
37 """A directory used by ``ap_verify`` to handle data and outputs.
39 Any object of this class represents a working directory containing
40 (possibly empty) subdirectories for various purposes. Subclasses are
41 typically specialized for particular workflows. Keeping such details in
42 separate classes makes it easier to provide guarantees without forcing
43 awkward directory structures on users.
45 All Workspace classes must guarantee the existence of any subdirectories
46 inside the workspace. Directories corresponding to repositories do not need
47 to be initialized, since creating a valid repository usually requires
48 external information.
50 Parameters
51 ----------
52 location : `str`
53 The location on disk where the workspace will be set up. Will be
54 created if it does not already exist.
56 Raises
57 ------
58 EnvironmentError
59 Raised if ``location`` is not readable or not writeable
60 """
61 def __init__(self, location):
62 # Properties must be `str` for backwards compatibility
63 self._location = str(pathlib.Path(location).resolve())
65 self.mkdir(self._location)
66 self.mkdir(self.configDir)
68 @staticmethod
69 def mkdir(directory):
70 """Create a directory for the workspace.
72 This method is intended to be called only by subclasses, and should
73 not be used by external code.
75 Parameters
76 ----------
77 directory : `str`
78 The directory to create.
79 """
80 mode = stat.S_IRWXU | stat.S_IRGRP | stat.S_IROTH # a+r, u+rwx
81 pathlib.Path(directory).mkdir(parents=True, exist_ok=True, mode=mode)
83 def __eq__(self, other):
84 """Test whether two workspaces are of the same type and have the
85 same location.
86 """
87 return type(self) == type(other) and self.workDir == other.workDir
89 def __repr__(self):
90 """A string representation that can be used to reconstruct the Workspace.
91 """
92 return f"{type(self).__name__}({self.workDir!r})"
94 @property
95 def workDir(self):
96 """The absolute location of the workspace as a whole
97 (`str`, read-only).
98 """
99 return self._location
101 @property
102 def configDir(self):
103 """The absolute location of a directory containing custom Task config
104 files for use with the data (`str`, read-only).
105 """
106 return os.path.join(self._location, 'config')
108 @property
109 @abc.abstractmethod
110 def dbLocation(self):
111 """The default absolute location of the source association database to
112 be created or updated by the pipeline (`str`, read-only).
114 Shall be a pathname to a database suitable for the backend of `Apdb`.
115 """
117 @property
118 @abc.abstractmethod
119 def alertLocation(self):
120 """The absolute location of an output directory for persisted
121 alert packets (`str`, read-only).
122 """
124 @property
125 @abc.abstractmethod
126 def workButler(self):
127 """A Butler that can produce pipeline inputs and outputs (read-only).
128 The type is class-dependent.
129 """
131 @property
132 @abc.abstractmethod
133 def analysisButler(self):
134 """A Butler that can read pipeline outputs (read-only).
135 The type is class-dependent.
137 The Butler should be read-only, if its type supports the restriction.
138 """
141class WorkspaceGen2(Workspace):
142 """A directory used by ``ap_verify`` to handle data.
144 Any object of this class represents a working directory containing
145 (possibly empty) subdirectories for repositories. Constructing a
146 WorkspaceGen2 does not *initialize* its repositories, as this requires
147 external information.
149 Parameters
150 ----------
151 location : `str`
152 The location on disk where the workspace will be set up. Will be
153 created if it does not already exist.
155 Raises
156 ------
157 EnvironmentError
158 Raised if ``location`` is not readable or not writeable
159 """
161 def __init__(self, location):
162 super().__init__(location)
164 self.mkdir(self.dataRepo)
165 self.mkdir(self.calibRepo)
166 self.mkdir(self.templateRepo)
167 self.mkdir(self.outputRepo)
169 # Lazy evaluation to optimize butlers
170 self._workButler = None
171 self._analysisButler = None
173 @property
174 def dataRepo(self):
175 """The absolute path/URI to a Butler repo for science data
176 (`str`, read-only).
177 """
178 return os.path.join(self._location, 'ingested')
180 @property
181 def calibRepo(self):
182 """The absolute path/URI to a Butler repo for calibration data
183 (`str`, read-only).
184 """
185 return os.path.join(self._location, 'calibingested')
187 @property
188 def templateRepo(self):
189 """The absolute path/URI to a Butler repo for precomputed templates
190 (`str`, read-only).
191 """
192 return self.dataRepo
194 @property
195 def outputRepo(self):
196 """The absolute path/URI to a Butler repo for AP pipeline products
197 (`str`, read-only).
198 """
199 return os.path.join(self._location, 'output')
201 @property
202 def dbLocation(self):
203 return os.path.join(self._location, 'association.db')
205 @property
206 def alertLocation(self):
207 return os.path.join(self._location, 'alerts')
209 @property
210 def workButler(self):
211 """A Butler that can produce pipeline inputs and outputs
212 (`lsst.daf.persistence.Butler`, read-only).
213 """
214 if self._workButler is None:
215 self._workButler = self._makeButler()
216 return self._workButler
218 def _makeButler(self):
219 """Create a butler for accessing the entire workspace.
221 Returns
222 -------
223 butler : `lsst.daf.persistence.Butler`
224 A butler accepting `dataRepo`, `calibRepo`, and `templateRepo` as
225 inputs, and `outputRepo` as an output.
227 Notes
228 -----
229 Assumes all `*Repo` properties have been initialized.
230 """
231 # common arguments for butler elements
232 mapperArgs = {"calibRoot": os.path.abspath(self.calibRepo)}
234 inputs = [{"root": self.dataRepo, "mapperArgs": mapperArgs}]
235 outputs = [{"root": self.outputRepo, "mode": "rw", "mapperArgs": mapperArgs}]
237 if not os.path.samefile(self.dataRepo, self.templateRepo):
238 inputs.append({'root': self.templateRepo, 'mode': 'r', 'mapperArgs': mapperArgs})
240 return dafPersist.Butler(inputs=inputs, outputs=outputs)
242 @property
243 def analysisButler(self):
244 """A Butler that can read pipeline outputs (`lsst.daf.persistence.Butler`, read-only).
245 """
246 if self._analysisButler is None:
247 self._analysisButler = dafPersist.Butler(inputs={"root": self.outputRepo, "mode": "r"})
248 return self._analysisButler
251class WorkspaceGen3(Workspace):
252 """A directory used by ``ap_verify`` to handle data.
254 Any object of this class represents a working directory containing
255 subdirectories for a repository and for non-repository files. Constructing
256 a WorkspaceGen3 does not *initialize* its repository, as this requires
257 external information.
259 Parameters
260 ----------
261 location : `str`
262 The location on disk where the workspace will be set up. Will be
263 created if it does not already exist.
265 Raises
266 ------
267 EnvironmentError
268 Raised if ``location`` is not readable or not writeable
269 """
271 def __init__(self, location):
272 super().__init__(location)
274 self.mkdir(self.repo)
276 # Gen 3 name of the output
277 self.outputName = "ap_verify-output"
279 # Lazy evaluation to optimize butlers
280 self._workButler = None
281 self._analysisButler = None
283 @property
284 def repo(self):
285 """The absolute path/URI to a Butler repo for AP pipeline processing
286 (`str`, read-only).
287 """
288 return os.path.join(self._location, 'repo')
290 @property
291 def dbLocation(self):
292 return os.path.join(self._location, 'association.db')
294 @property
295 def alertLocation(self):
296 return os.path.join(self._location, 'alerts')
298 def _ensureCollection(self, registry, name, collectionType):
299 """Add a collection to a repository if it does not already exist.
301 Parameters
302 ----------
303 registry : `lsst.daf.butler.Registry`
304 The repository to which to add the collection.
305 name : `str`
306 The name of the collection to test for and add.
307 collectionType : `lsst.daf.butler.CollectionType`
308 The type of collection to add. This field is ignored when
309 testing if a collection exists.
310 """
311 matchingCollections = list(registry.queryCollections(re.compile(name)))
312 if not matchingCollections:
313 registry.registerCollection(name, type=collectionType)
315 @property
316 def workButler(self):
317 """A Butler that can read and write to a Gen 3 repository (`lsst.daf.butler.Butler`, read-only).
319 Notes
320 -----
321 Assumes `repo` has been initialized.
322 """
323 if self._workButler is None:
324 try:
325 # Hard-code the collection names because it's hard to infer the inputs from the Butler
326 queryButler = dafButler.Butler(self.repo, writeable=True) # writeable for _workButler
327 inputs = {
328 lsst.skymap.BaseSkyMap.SKYMAP_RUN_COLLECTION_NAME,
329 }
330 for dimension in queryButler.registry.queryDataIds('instrument'):
331 instrument = obsBase.Instrument.fromName(dimension["instrument"], queryButler.registry)
332 rawName = instrument.makeDefaultRawIngestRunName()
333 inputs.add(rawName)
334 self._ensureCollection(queryButler.registry, rawName, dafButler.CollectionType.RUN)
335 inputs.add(instrument.makeCalibrationCollectionName())
336 inputs.add(instrument.makeRefCatCollectionName())
337 inputs.update(queryButler.registry.queryCollections(re.compile(r"templates/\w+")))
339 # Create an output chain here, so that workButler can see it.
340 # Definition does not conflict with what pipetask --output uses.
341 # Regex is workaround for DM-25945.
342 if not list(queryButler.registry.queryCollections(re.compile(self.outputName))):
343 queryButler.registry.registerCollection(self.outputName,
344 dafButler.CollectionType.CHAINED)
345 queryButler.registry.setCollectionChain(self.outputName, inputs)
347 self._workButler = dafButler.Butler(butler=queryButler, collections=self.outputName)
348 except OSError as e:
349 raise RuntimeError(f"{self.repo} is not a Gen 3 repository") from e
350 return self._workButler
352 @property
353 def analysisButler(self):
354 """A Butler that can read from a Gen 3 repository with outputs (`lsst.daf.butler.Butler`, read-only).
356 Notes
357 -----
358 Assumes `repo` has been initialized.
359 """
360 if self._analysisButler is None:
361 try:
362 self._analysisButler = dafButler.Butler(self.repo, collections=self.outputName,
363 writeable=False)
364 except OSError as e:
365 raise RuntimeError(f"{self.repo} is not a Gen 3 repository") from e
366 return self._analysisButler