Coverage for python/lsst/ap/verify/workspace.py : 36%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1#
2# This file is part of ap_verify.
3#
4# Developed for the LSST Data Management System.
5# This product includes software developed by the LSST Project
6# (http://www.lsst.org).
7# See the COPYRIGHT file at the top-level directory of this distribution
8# for details of code ownership.
9#
10# This program is free software: you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation, either version 3 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program. If not, see <http://www.gnu.org/licenses/>.
22#
24import abc
25import os
26import pathlib
27import stat
29import lsst.daf.persistence as dafPersist
30import lsst.daf.butler as dafButler
31import lsst.obs.base as obsBase
34class Workspace(metaclass=abc.ABCMeta):
35 """A directory used by ``ap_verify`` to handle data and outputs.
37 Any object of this class represents a working directory containing
38 (possibly empty) subdirectories for various purposes. Subclasses are
39 typically specialized for particular workflows. Keeping such details in
40 separate classes makes it easier to provide guarantees without forcing
41 awkward directory structures on users.
43 All Workspace classes must guarantee the existence of any subdirectories
44 inside the workspace. Directories corresponding to repositories do not need
45 to be initialized, since creating a valid repository usually requires
46 external information.
48 Parameters
49 ----------
50 location : `str`
51 The location on disk where the workspace will be set up. Will be
52 created if it does not already exist.
54 Raises
55 ------
56 EnvironmentError
57 Raised if ``location`` is not readable or not writeable
58 """
59 def __init__(self, location):
60 # Properties must be `str` for backwards compatibility
61 self._location = str(pathlib.Path(location).resolve())
63 self.mkdir(self._location)
64 self.mkdir(self.configDir)
66 @staticmethod
67 def mkdir(directory):
68 """Create a directory for the workspace.
70 This method is intended to be called only by subclasses, and should
71 not be used by external code.
73 Parameters
74 ----------
75 directory : `str`
76 The directory to create.
77 """
78 mode = stat.S_IRWXU | stat.S_IRGRP | stat.S_IROTH # a+r, u+rwx
79 pathlib.Path(directory).mkdir(parents=True, exist_ok=True, mode=mode)
81 def __eq__(self, other):
82 """Test whether two workspaces are of the same type and have the
83 same location.
84 """
85 return type(self) == type(other) and self.workDir == other.workDir
87 def __repr__(self):
88 """A string representation that can be used to reconstruct the Workspace.
89 """
90 return f"{type(self).__name__}({self.workDir!r})"
92 @property
93 def workDir(self):
94 """The absolute location of the workspace as a whole
95 (`str`, read-only).
96 """
97 return self._location
99 @property
100 def configDir(self):
101 """The absolute location of a directory containing custom Task config
102 files for use with the data (`str`, read-only).
103 """
104 return os.path.join(self._location, 'config')
106 @property
107 @abc.abstractmethod
108 def dbLocation(self):
109 """The default absolute location of the source association database to
110 be created or updated by the pipeline (`str`, read-only).
112 Shall be a pathname to a database suitable for the backend of `Apdb`.
113 """
115 @property
116 @abc.abstractmethod
117 def alertLocation(self):
118 """The absolute location of an output directory for persisted
119 alert packets (`str`, read-only).
120 """
122 @property
123 @abc.abstractmethod
124 def workButler(self):
125 """A Butler that can produce pipeline inputs and outputs (read-only).
126 The type is class-dependent.
127 """
129 @property
130 @abc.abstractmethod
131 def analysisButler(self):
132 """A Butler that can read pipeline outputs (read-only).
133 The type is class-dependent.
135 The Butler should be read-only, if its type supports the restriction.
136 """
139class WorkspaceGen2(Workspace):
140 """A directory used by ``ap_verify`` to handle data.
142 Any object of this class represents a working directory containing
143 (possibly empty) subdirectories for repositories. Constructing a
144 WorkspaceGen2 does not *initialize* its repositories, as this requires
145 external information.
147 Parameters
148 ----------
149 location : `str`
150 The location on disk where the workspace will be set up. Will be
151 created if it does not already exist.
153 Raises
154 ------
155 EnvironmentError
156 Raised if ``location`` is not readable or not writeable
157 """
159 def __init__(self, location):
160 super().__init__(location)
162 self.mkdir(self.dataRepo)
163 self.mkdir(self.calibRepo)
164 self.mkdir(self.templateRepo)
165 self.mkdir(self.outputRepo)
167 # Lazy evaluation to optimize butlers
168 self._workButler = None
169 self._analysisButler = None
171 @property
172 def dataRepo(self):
173 """The absolute path/URI to a Butler repo for science data
174 (`str`, read-only).
175 """
176 return os.path.join(self._location, 'ingested')
178 @property
179 def calibRepo(self):
180 """The absolute path/URI to a Butler repo for calibration data
181 (`str`, read-only).
182 """
183 return os.path.join(self._location, 'calibingested')
185 @property
186 def templateRepo(self):
187 """The absolute path/URI to a Butler repo for precomputed templates
188 (`str`, read-only).
189 """
190 return self.dataRepo
192 @property
193 def outputRepo(self):
194 """The absolute path/URI to a Butler repo for AP pipeline products
195 (`str`, read-only).
196 """
197 return os.path.join(self._location, 'output')
199 @property
200 def dbLocation(self):
201 return os.path.join(self._location, 'association.db')
203 @property
204 def alertLocation(self):
205 return os.path.join(self._location, 'alerts')
207 @property
208 def workButler(self):
209 """A Butler that can produce pipeline inputs and outputs
210 (`lsst.daf.persistence.Butler`, read-only).
211 """
212 if self._workButler is None:
213 self._workButler = self._makeButler()
214 return self._workButler
216 def _makeButler(self):
217 """Create a butler for accessing the entire workspace.
219 Returns
220 -------
221 butler : `lsst.daf.persistence.Butler`
222 A butler accepting `dataRepo`, `calibRepo`, and `templateRepo` as
223 inputs, and `outputRepo` as an output.
225 Notes
226 -----
227 Assumes all `*Repo` properties have been initialized.
228 """
229 # common arguments for butler elements
230 mapperArgs = {"calibRoot": os.path.abspath(self.calibRepo)}
232 inputs = [{"root": self.dataRepo, "mapperArgs": mapperArgs}]
233 outputs = [{"root": self.outputRepo, "mode": "rw", "mapperArgs": mapperArgs}]
235 if not os.path.samefile(self.dataRepo, self.templateRepo):
236 inputs.append({'root': self.templateRepo, 'mode': 'r', 'mapperArgs': mapperArgs})
238 return dafPersist.Butler(inputs=inputs, outputs=outputs)
240 @property
241 def analysisButler(self):
242 """A Butler that can read pipeline outputs (`lsst.daf.persistence.Butler`, read-only).
243 """
244 if self._analysisButler is None:
245 self._analysisButler = dafPersist.Butler(inputs={"root": self.outputRepo, "mode": "r"})
246 return self._analysisButler
249class WorkspaceGen3(Workspace):
250 """A directory used by ``ap_verify`` to handle data.
252 Any object of this class represents a working directory containing
253 subdirectories for a repository and for non-repository files. Constructing
254 a WorkspaceGen3 does not *initialize* its repository, as this requires
255 external information.
257 Parameters
258 ----------
259 location : `str`
260 The location on disk where the workspace will be set up. Will be
261 created if it does not already exist.
263 Raises
264 ------
265 EnvironmentError
266 Raised if ``location`` is not readable or not writeable
267 """
269 def __init__(self, location):
270 super().__init__(location)
272 self.mkdir(self.repo)
274 # Gen 3 name of the output run
275 self.runName = "ap_verify-output"
277 # Lazy evaluation to optimize butlers
278 self._workButler = None
279 self._analysisButler = None
281 @property
282 def repo(self):
283 """The absolute path/URI to a Butler repo for AP pipeline processing
284 (`str`, read-only).
285 """
286 return os.path.join(self._location, 'repo')
288 @property
289 def dbLocation(self):
290 return os.path.join(self._location, 'association.db')
292 @property
293 def alertLocation(self):
294 return os.path.join(self._location, 'alerts')
296 @property
297 def workButler(self):
298 """A Butler that can read and write to a Gen 3 repository (`lsst.daf.butler.Butler`, read-only).
300 Notes
301 -----
302 Assumes `repo` has been initialized.
303 """
304 if self._workButler is None:
305 try:
306 # All Gen 3 collection names subject to change; don't hardcode them
307 queryButler = dafButler.Butler(self.repo, writeable=True) # writeable for _workButler
308 inputs = set(queryButler.registry.queryCollections(
309 collectionType=dafButler.CollectionType.RUN))
310 for dimension in queryButler.registry.queryDataIds('instrument'):
311 instrument = obsBase.Instrument.fromName(dimension["instrument"], queryButler.registry)
312 inputs.add(instrument.makeDefaultRawIngestRunName())
314 # should set run=self.runName, but this breaks quantum graph generation (DM-26246)
315 self._workButler = dafButler.Butler(butler=queryButler, collections=inputs)
316 except OSError as e:
317 raise RuntimeError(f"{self.repo} is not a Gen 3 repository") from e
318 return self._workButler
320 @property
321 def analysisButler(self):
322 """A Butler that can read from a Gen 3 repository with outputs (`lsst.daf.butler.Butler`, read-only).
324 Notes
325 -----
326 Assumes `repo` has been initialized.
327 """
328 if self._analysisButler is None:
329 try:
330 self._analysisButler = dafButler.Butler(self.repo, collections=self.runName,
331 writeable=False)
332 except OSError as e:
333 raise RuntimeError(f"{self.repo} is not a Gen 3 repository") from e
334 return self._analysisButler