Coverage for python/lsst/ap/verify/workspace.py : 46%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1#
2# This file is part of ap_verify.
3#
4# Developed for the LSST Data Management System.
5# This product includes software developed by the LSST Project
6# (http://www.lsst.org).
7# See the COPYRIGHT file at the top-level directory of this distribution
8# for details of code ownership.
9#
10# This program is free software: you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation, either version 3 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program. If not, see <http://www.gnu.org/licenses/>.
22#
24import abc
25import os
26import pathlib
27import re
28import stat
30import lsst.daf.persistence as dafPersist
31import lsst.daf.butler as dafButler
32import lsst.obs.base as obsBase
35class Workspace(metaclass=abc.ABCMeta):
36 """A directory used by ``ap_verify`` to handle data and outputs.
38 Any object of this class represents a working directory containing
39 (possibly empty) subdirectories for various purposes. Subclasses are
40 typically specialized for particular workflows. Keeping such details in
41 separate classes makes it easier to provide guarantees without forcing
42 awkward directory structures on users.
44 All Workspace classes must guarantee the existence of any subdirectories
45 inside the workspace. Directories corresponding to repositories do not need
46 to be initialized, since creating a valid repository usually requires
47 external information.
49 Parameters
50 ----------
51 location : `str`
52 The location on disk where the workspace will be set up. Will be
53 created if it does not already exist.
55 Raises
56 ------
57 EnvironmentError
58 Raised if ``location`` is not readable or not writeable
59 """
60 def __init__(self, location):
61 # Properties must be `str` for backwards compatibility
62 self._location = str(pathlib.Path(location).resolve())
64 self.mkdir(self._location)
65 self.mkdir(self.configDir)
67 @staticmethod
68 def mkdir(directory):
69 """Create a directory for the workspace.
71 This method is intended to be called only by subclasses, and should
72 not be used by external code.
74 Parameters
75 ----------
76 directory : `str`
77 The directory to create.
78 """
79 mode = stat.S_IRWXU | stat.S_IRGRP | stat.S_IROTH # a+r, u+rwx
80 pathlib.Path(directory).mkdir(parents=True, exist_ok=True, mode=mode)
82 def __eq__(self, other):
83 """Test whether two workspaces are of the same type and have the
84 same location.
85 """
86 return type(self) == type(other) and self.workDir == other.workDir
88 def __repr__(self):
89 """A string representation that can be used to reconstruct the Workspace.
90 """
91 return f"{type(self).__name__}({self.workDir!r})"
93 @property
94 def workDir(self):
95 """The absolute location of the workspace as a whole
96 (`str`, read-only).
97 """
98 return self._location
100 @property
101 def configDir(self):
102 """The absolute location of a directory containing custom Task config
103 files for use with the data (`str`, read-only).
104 """
105 return os.path.join(self._location, 'config')
107 @property
108 @abc.abstractmethod
109 def dbLocation(self):
110 """The default absolute location of the source association database to
111 be created or updated by the pipeline (`str`, read-only).
113 Shall be a pathname to a database suitable for the backend of `Apdb`.
114 """
116 @property
117 @abc.abstractmethod
118 def alertLocation(self):
119 """The absolute location of an output directory for persisted
120 alert packets (`str`, read-only).
121 """
123 @property
124 @abc.abstractmethod
125 def workButler(self):
126 """A Butler that can produce pipeline inputs and outputs (read-only).
127 The type is class-dependent.
128 """
130 @property
131 @abc.abstractmethod
132 def analysisButler(self):
133 """A Butler that can read pipeline outputs (read-only).
134 The type is class-dependent.
136 The Butler should be read-only, if its type supports the restriction.
137 """
140class WorkspaceGen2(Workspace):
141 """A directory used by ``ap_verify`` to handle data.
143 Any object of this class represents a working directory containing
144 (possibly empty) subdirectories for repositories. Constructing a
145 WorkspaceGen2 does not *initialize* its repositories, as this requires
146 external information.
148 Parameters
149 ----------
150 location : `str`
151 The location on disk where the workspace will be set up. Will be
152 created if it does not already exist.
154 Raises
155 ------
156 EnvironmentError
157 Raised if ``location`` is not readable or not writeable
158 """
160 def __init__(self, location):
161 super().__init__(location)
163 self.mkdir(self.dataRepo)
164 self.mkdir(self.calibRepo)
165 self.mkdir(self.templateRepo)
166 self.mkdir(self.outputRepo)
168 # Lazy evaluation to optimize butlers
169 self._workButler = None
170 self._analysisButler = None
172 @property
173 def dataRepo(self):
174 """The absolute path/URI to a Butler repo for science data
175 (`str`, read-only).
176 """
177 return os.path.join(self._location, 'ingested')
179 @property
180 def calibRepo(self):
181 """The absolute path/URI to a Butler repo for calibration data
182 (`str`, read-only).
183 """
184 return os.path.join(self._location, 'calibingested')
186 @property
187 def templateRepo(self):
188 """The absolute path/URI to a Butler repo for precomputed templates
189 (`str`, read-only).
190 """
191 return self.dataRepo
193 @property
194 def outputRepo(self):
195 """The absolute path/URI to a Butler repo for AP pipeline products
196 (`str`, read-only).
197 """
198 return os.path.join(self._location, 'output')
200 @property
201 def dbLocation(self):
202 return os.path.join(self._location, 'association.db')
204 @property
205 def alertLocation(self):
206 return os.path.join(self._location, 'alerts')
208 @property
209 def workButler(self):
210 """A Butler that can produce pipeline inputs and outputs
211 (`lsst.daf.persistence.Butler`, read-only).
212 """
213 if self._workButler is None:
214 self._workButler = self._makeButler()
215 return self._workButler
217 def _makeButler(self):
218 """Create a butler for accessing the entire workspace.
220 Returns
221 -------
222 butler : `lsst.daf.persistence.Butler`
223 A butler accepting `dataRepo`, `calibRepo`, and `templateRepo` as
224 inputs, and `outputRepo` as an output.
226 Notes
227 -----
228 Assumes all `*Repo` properties have been initialized.
229 """
230 # common arguments for butler elements
231 mapperArgs = {"calibRoot": os.path.abspath(self.calibRepo)}
233 inputs = [{"root": self.dataRepo, "mapperArgs": mapperArgs}]
234 outputs = [{"root": self.outputRepo, "mode": "rw", "mapperArgs": mapperArgs}]
236 if not os.path.samefile(self.dataRepo, self.templateRepo):
237 inputs.append({'root': self.templateRepo, 'mode': 'r', 'mapperArgs': mapperArgs})
239 return dafPersist.Butler(inputs=inputs, outputs=outputs)
241 @property
242 def analysisButler(self):
243 """A Butler that can read pipeline outputs (`lsst.daf.persistence.Butler`, read-only).
244 """
245 if self._analysisButler is None:
246 self._analysisButler = dafPersist.Butler(inputs={"root": self.outputRepo, "mode": "r"})
247 return self._analysisButler
250class WorkspaceGen3(Workspace):
251 """A directory used by ``ap_verify`` to handle data.
253 Any object of this class represents a working directory containing
254 subdirectories for a repository and for non-repository files. Constructing
255 a WorkspaceGen3 does not *initialize* its repository, as this requires
256 external information.
258 Parameters
259 ----------
260 location : `str`
261 The location on disk where the workspace will be set up. Will be
262 created if it does not already exist.
264 Raises
265 ------
266 EnvironmentError
267 Raised if ``location`` is not readable or not writeable
268 """
270 def __init__(self, location):
271 super().__init__(location)
273 self.mkdir(self.repo)
275 # Gen 3 name of the output run
276 self.runName = "ap_verify-output"
278 # Lazy evaluation to optimize butlers
279 self._workButler = None
280 self._analysisButler = None
282 @property
283 def repo(self):
284 """The absolute path/URI to a Butler repo for AP pipeline processing
285 (`str`, read-only).
286 """
287 return os.path.join(self._location, 'repo')
289 @property
290 def dbLocation(self):
291 return os.path.join(self._location, 'association.db')
293 @property
294 def alertLocation(self):
295 return os.path.join(self._location, 'alerts')
297 @property
298 def workButler(self):
299 """A Butler that can read and write to a Gen 3 repository (`lsst.daf.butler.Butler`, read-only).
301 Notes
302 -----
303 Assumes `repo` has been initialized.
304 """
305 if self._workButler is None:
306 try:
307 # Hard-code the collection names because it's hard to infer the inputs from the Butler
308 queryButler = dafButler.Butler(self.repo, writeable=True) # writeable for _workButler
309 inputs = {"skymaps", "refcats"}
310 for dimension in queryButler.registry.queryDataIds('instrument'):
311 instrument = obsBase.Instrument.fromName(dimension["instrument"], queryButler.registry)
312 inputs.add(instrument.makeDefaultRawIngestRunName())
313 inputs.add(instrument.makeCalibrationCollectionName())
314 inputs.update(queryButler.registry.queryCollections(re.compile(r"templates/\w+")))
316 # should set run=self.runName, but this breaks quantum graph generation (DM-26246)
317 self._workButler = dafButler.Butler(butler=queryButler, collections=inputs)
318 except OSError as e:
319 raise RuntimeError(f"{self.repo} is not a Gen 3 repository") from e
320 return self._workButler
322 @property
323 def analysisButler(self):
324 """A Butler that can read from a Gen 3 repository with outputs (`lsst.daf.butler.Butler`, read-only).
326 Notes
327 -----
328 Assumes `repo` has been initialized.
329 """
330 if self._analysisButler is None:
331 try:
332 self._analysisButler = dafButler.Butler(self.repo, collections=self.runName,
333 writeable=False)
334 except OSError as e:
335 raise RuntimeError(f"{self.repo} is not a Gen 3 repository") from e
336 return self._analysisButler