Coverage for python/lsst/ap/verify/workspace.py : 42%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1#
2# This file is part of ap_verify.
3#
4# Developed for the LSST Data Management System.
5# This product includes software developed by the LSST Project
6# (http://www.lsst.org).
7# See the COPYRIGHT file at the top-level directory of this distribution
8# for details of code ownership.
9#
10# This program is free software: you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation, either version 3 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program. If not, see <http://www.gnu.org/licenses/>.
22#
24import abc
25import os
26import pathlib
27import re
28import stat
30import lsst.daf.persistence as dafPersist
31import lsst.daf.butler as dafButler
32import lsst.obs.base as obsBase
35class Workspace(metaclass=abc.ABCMeta):
36 """A directory used by ``ap_verify`` to handle data and outputs.
38 Any object of this class represents a working directory containing
39 (possibly empty) subdirectories for various purposes. Subclasses are
40 typically specialized for particular workflows. Keeping such details in
41 separate classes makes it easier to provide guarantees without forcing
42 awkward directory structures on users.
44 All Workspace classes must guarantee the existence of any subdirectories
45 inside the workspace. Directories corresponding to repositories do not need
46 to be initialized, since creating a valid repository usually requires
47 external information.
49 Parameters
50 ----------
51 location : `str`
52 The location on disk where the workspace will be set up. Will be
53 created if it does not already exist.
55 Raises
56 ------
57 EnvironmentError
58 Raised if ``location`` is not readable or not writeable
59 """
60 def __init__(self, location):
61 # Properties must be `str` for backwards compatibility
62 self._location = str(pathlib.Path(location).resolve())
64 self.mkdir(self._location)
65 self.mkdir(self.configDir)
67 @staticmethod
68 def mkdir(directory):
69 """Create a directory for the workspace.
71 This method is intended to be called only by subclasses, and should
72 not be used by external code.
74 Parameters
75 ----------
76 directory : `str`
77 The directory to create.
78 """
79 mode = stat.S_IRWXU | stat.S_IRGRP | stat.S_IROTH # a+r, u+rwx
80 pathlib.Path(directory).mkdir(parents=True, exist_ok=True, mode=mode)
82 def __eq__(self, other):
83 """Test whether two workspaces are of the same type and have the
84 same location.
85 """
86 return type(self) == type(other) and self.workDir == other.workDir
88 def __repr__(self):
89 """A string representation that can be used to reconstruct the Workspace.
90 """
91 return f"{type(self).__name__}({self.workDir!r})"
93 @property
94 def workDir(self):
95 """The absolute location of the workspace as a whole
96 (`str`, read-only).
97 """
98 return self._location
100 @property
101 def configDir(self):
102 """The absolute location of a directory containing custom Task config
103 files for use with the data (`str`, read-only).
104 """
105 return os.path.join(self._location, 'config')
107 @property
108 @abc.abstractmethod
109 def dbLocation(self):
110 """The default absolute location of the source association database to
111 be created or updated by the pipeline (`str`, read-only).
113 Shall be a pathname to a database suitable for the backend of `Apdb`.
114 """
116 @property
117 @abc.abstractmethod
118 def alertLocation(self):
119 """The absolute location of an output directory for persisted
120 alert packets (`str`, read-only).
121 """
123 @property
124 @abc.abstractmethod
125 def workButler(self):
126 """A Butler that can produce pipeline inputs and outputs (read-only).
127 The type is class-dependent.
128 """
130 @property
131 @abc.abstractmethod
132 def analysisButler(self):
133 """A Butler that can read pipeline outputs (read-only).
134 The type is class-dependent.
136 The Butler should be read-only, if its type supports the restriction.
137 """
140class WorkspaceGen2(Workspace):
141 """A directory used by ``ap_verify`` to handle data.
143 Any object of this class represents a working directory containing
144 (possibly empty) subdirectories for repositories. Constructing a
145 WorkspaceGen2 does not *initialize* its repositories, as this requires
146 external information.
148 Parameters
149 ----------
150 location : `str`
151 The location on disk where the workspace will be set up. Will be
152 created if it does not already exist.
154 Raises
155 ------
156 EnvironmentError
157 Raised if ``location`` is not readable or not writeable
158 """
160 def __init__(self, location):
161 super().__init__(location)
163 self.mkdir(self.dataRepo)
164 self.mkdir(self.calibRepo)
165 self.mkdir(self.templateRepo)
166 self.mkdir(self.outputRepo)
168 # Lazy evaluation to optimize butlers
169 self._workButler = None
170 self._analysisButler = None
172 @property
173 def dataRepo(self):
174 """The absolute path/URI to a Butler repo for science data
175 (`str`, read-only).
176 """
177 return os.path.join(self._location, 'ingested')
179 @property
180 def calibRepo(self):
181 """The absolute path/URI to a Butler repo for calibration data
182 (`str`, read-only).
183 """
184 return os.path.join(self._location, 'calibingested')
186 @property
187 def templateRepo(self):
188 """The absolute path/URI to a Butler repo for precomputed templates
189 (`str`, read-only).
190 """
191 return self.dataRepo
193 @property
194 def outputRepo(self):
195 """The absolute path/URI to a Butler repo for AP pipeline products
196 (`str`, read-only).
197 """
198 return os.path.join(self._location, 'output')
200 @property
201 def dbLocation(self):
202 return os.path.join(self._location, 'association.db')
204 @property
205 def alertLocation(self):
206 return os.path.join(self._location, 'alerts')
208 @property
209 def workButler(self):
210 """A Butler that can produce pipeline inputs and outputs
211 (`lsst.daf.persistence.Butler`, read-only).
212 """
213 if self._workButler is None:
214 self._workButler = self._makeButler()
215 return self._workButler
217 def _makeButler(self):
218 """Create a butler for accessing the entire workspace.
220 Returns
221 -------
222 butler : `lsst.daf.persistence.Butler`
223 A butler accepting `dataRepo`, `calibRepo`, and `templateRepo` as
224 inputs, and `outputRepo` as an output.
226 Notes
227 -----
228 Assumes all `*Repo` properties have been initialized.
229 """
230 # common arguments for butler elements
231 mapperArgs = {"calibRoot": os.path.abspath(self.calibRepo)}
233 inputs = [{"root": self.dataRepo, "mapperArgs": mapperArgs}]
234 outputs = [{"root": self.outputRepo, "mode": "rw", "mapperArgs": mapperArgs}]
236 if not os.path.samefile(self.dataRepo, self.templateRepo):
237 inputs.append({'root': self.templateRepo, 'mode': 'r', 'mapperArgs': mapperArgs})
239 return dafPersist.Butler(inputs=inputs, outputs=outputs)
241 @property
242 def analysisButler(self):
243 """A Butler that can read pipeline outputs (`lsst.daf.persistence.Butler`, read-only).
244 """
245 if self._analysisButler is None:
246 self._analysisButler = dafPersist.Butler(inputs={"root": self.outputRepo, "mode": "r"})
247 return self._analysisButler
250class WorkspaceGen3(Workspace):
251 """A directory used by ``ap_verify`` to handle data.
253 Any object of this class represents a working directory containing
254 subdirectories for a repository and for non-repository files. Constructing
255 a WorkspaceGen3 does not *initialize* its repository, as this requires
256 external information.
258 Parameters
259 ----------
260 location : `str`
261 The location on disk where the workspace will be set up. Will be
262 created if it does not already exist.
264 Raises
265 ------
266 EnvironmentError
267 Raised if ``location`` is not readable or not writeable
268 """
270 def __init__(self, location):
271 super().__init__(location)
273 self.mkdir(self.repo)
275 # Gen 3 name of the output
276 self.outputName = "ap_verify-output"
278 # Lazy evaluation to optimize butlers
279 self._workButler = None
280 self._analysisButler = None
282 @property
283 def repo(self):
284 """The absolute path/URI to a Butler repo for AP pipeline processing
285 (`str`, read-only).
286 """
287 return os.path.join(self._location, 'repo')
289 @property
290 def dbLocation(self):
291 return os.path.join(self._location, 'association.db')
293 @property
294 def alertLocation(self):
295 return os.path.join(self._location, 'alerts')
297 def _ensureCollection(self, registry, name, collectionType):
298 """Add a collection to a repository if it does not already exist.
300 Parameters
301 ----------
302 registry : `lsst.daf.butler.Registry`
303 The repository to which to add the collection.
304 name : `str`
305 The name of the collection to test for and add.
306 collectionType : `lsst.daf.butler.CollectionType`
307 The type of collection to add. This field is ignored when
308 testing if a collection exists.
309 """
310 matchingCollections = list(registry.queryCollections(re.compile(name)))
311 if not matchingCollections:
312 registry.registerCollection(name, type=collectionType)
314 @property
315 def workButler(self):
316 """A Butler that can read and write to a Gen 3 repository (`lsst.daf.butler.Butler`, read-only).
318 Notes
319 -----
320 Assumes `repo` has been initialized.
321 """
322 if self._workButler is None:
323 try:
324 # Hard-code the collection names because it's hard to infer the inputs from the Butler
325 queryButler = dafButler.Butler(self.repo, writeable=True) # writeable for _workButler
326 inputs = {"skymaps", "refcats"}
327 for dimension in queryButler.registry.queryDataIds('instrument'):
328 instrument = obsBase.Instrument.fromName(dimension["instrument"], queryButler.registry)
329 rawName = instrument.makeDefaultRawIngestRunName()
330 inputs.add(rawName)
331 self._ensureCollection(queryButler.registry, rawName, dafButler.CollectionType.RUN)
332 inputs.add(instrument.makeCalibrationCollectionName())
333 inputs.update(queryButler.registry.queryCollections(re.compile(r"templates/\w+")))
335 # Create an output chain here, so that workButler can see it.
336 # Definition does not conflict with what pipetask --output uses.
337 # Regex is workaround for DM-25945.
338 if not list(queryButler.registry.queryCollections(re.compile(self.outputName))):
339 queryButler.registry.registerCollection(self.outputName,
340 dafButler.CollectionType.CHAINED)
341 queryButler.registry.setCollectionChain(self.outputName, inputs)
343 self._workButler = dafButler.Butler(butler=queryButler, collections=self.outputName)
344 except OSError as e:
345 raise RuntimeError(f"{self.repo} is not a Gen 3 repository") from e
346 return self._workButler
348 @property
349 def analysisButler(self):
350 """A Butler that can read from a Gen 3 repository with outputs (`lsst.daf.butler.Butler`, read-only).
352 Notes
353 -----
354 Assumes `repo` has been initialized.
355 """
356 if self._analysisButler is None:
357 try:
358 self._analysisButler = dafButler.Butler(self.repo, collections=self.outputName,
359 writeable=False)
360 except OSError as e:
361 raise RuntimeError(f"{self.repo} is not a Gen 3 repository") from e
362 return self._analysisButler