Coverage for python/lsst/ap/verify/workspace.py : 36%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1#
2# This file is part of ap_verify.
3#
4# Developed for the LSST Data Management System.
5# This product includes software developed by the LSST Project
6# (http://www.lsst.org).
7# See the COPYRIGHT file at the top-level directory of this distribution
8# for details of code ownership.
9#
10# This program is free software: you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation, either version 3 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program. If not, see <http://www.gnu.org/licenses/>.
22#
24import abc
25import os
26import pathlib
27import stat
29import lsst.daf.persistence as dafPersist
30import lsst.daf.butler as dafButler
31import lsst.obs.base as obsBase
34class Workspace(metaclass=abc.ABCMeta):
35 """A directory used by ``ap_verify`` to handle data and outputs.
37 Any object of this class represents a working directory containing
38 (possibly empty) subdirectories for various purposes. Subclasses are
39 typically specialized for particular workflows. Keeping such details in
40 separate classes makes it easier to provide guarantees without forcing
41 awkward directory structures on users.
43 All Workspace classes must guarantee the existence of any subdirectories
44 inside the workspace. Directories corresponding to repositories do not need
45 to be initialized, since creating a valid repository usually requires
46 external information.
48 Parameters
49 ----------
50 location : `str`
51 The location on disk where the workspace will be set up. Will be
52 created if it does not already exist.
54 Raises
55 ------
56 EnvironmentError
57 Raised if ``location`` is not readable or not writeable
58 """
59 def __init__(self, location):
60 # Properties must be `str` for backwards compatibility
61 self._location = str(pathlib.Path(location).resolve())
63 self.mkdir(self._location)
64 self.mkdir(self.configDir)
66 @staticmethod
67 def mkdir(directory):
68 """Create a directory for the workspace.
70 This method is intended to be called only by subclasses, and should
71 not be used by external code.
73 Parameters
74 ----------
75 directory : `str`
76 The directory to create.
77 """
78 mode = stat.S_IRWXU | stat.S_IRGRP | stat.S_IROTH # a+r, u+rwx
79 pathlib.Path(directory).mkdir(parents=True, exist_ok=True, mode=mode)
81 @property
82 def workDir(self):
83 """The absolute location of the workspace as a whole
84 (`str`, read-only).
85 """
86 return self._location
88 @property
89 def configDir(self):
90 """The absolute location of a directory containing custom Task config
91 files for use with the data (`str`, read-only).
92 """
93 return os.path.join(self._location, 'config')
95 @property
96 @abc.abstractmethod
97 def dbLocation(self):
98 """The default absolute location of the source association database to
99 be created or updated by the pipeline (`str`, read-only).
101 Shall be a pathname to a database suitable for the backend of `Apdb`.
102 """
104 @property
105 @abc.abstractmethod
106 def alertLocation(self):
107 """The absolute location of an output directory for persisted
108 alert packets (`str`, read-only).
109 """
111 @property
112 @abc.abstractmethod
113 def workButler(self):
114 """A Butler that can produce pipeline inputs and outputs (read-only).
115 The type is class-dependent.
116 """
118 @property
119 @abc.abstractmethod
120 def analysisButler(self):
121 """A Butler that can read pipeline outputs (read-only).
122 The type is class-dependent.
124 The Butler should be read-only, if its type supports the restriction.
125 """
128class WorkspaceGen2(Workspace):
129 """A directory used by ``ap_verify`` to handle data.
131 Any object of this class represents a working directory containing
132 (possibly empty) subdirectories for repositories. Constructing a
133 WorkspaceGen2 does not *initialize* its repositories, as this requires
134 external information.
136 Parameters
137 ----------
138 location : `str`
139 The location on disk where the workspace will be set up. Will be
140 created if it does not already exist.
142 Raises
143 ------
144 EnvironmentError
145 Raised if ``location`` is not readable or not writeable
146 """
148 def __init__(self, location):
149 super().__init__(location)
151 self.mkdir(self.dataRepo)
152 self.mkdir(self.calibRepo)
153 self.mkdir(self.templateRepo)
154 self.mkdir(self.outputRepo)
156 # Lazy evaluation to optimize butlers
157 self._workButler = None
158 self._analysisButler = None
160 @property
161 def dataRepo(self):
162 """The absolute path/URI to a Butler repo for science data
163 (`str`, read-only).
164 """
165 return os.path.join(self._location, 'ingested')
167 @property
168 def calibRepo(self):
169 """The absolute path/URI to a Butler repo for calibration data
170 (`str`, read-only).
171 """
172 return os.path.join(self._location, 'calibingested')
174 @property
175 def templateRepo(self):
176 """The absolute path/URI to a Butler repo for precomputed templates
177 (`str`, read-only).
178 """
179 return self.dataRepo
181 @property
182 def outputRepo(self):
183 """The absolute path/URI to a Butler repo for AP pipeline products
184 (`str`, read-only).
185 """
186 return os.path.join(self._location, 'output')
188 @property
189 def dbLocation(self):
190 return os.path.join(self._location, 'association.db')
192 @property
193 def alertLocation(self):
194 return os.path.join(self._location, 'alerts')
196 @property
197 def workButler(self):
198 """A Butler that can produce pipeline inputs and outputs
199 (`lsst.daf.persistence.Butler`, read-only).
200 """
201 if self._workButler is None:
202 self._workButler = self._makeButler()
203 return self._workButler
205 def _makeButler(self):
206 """Create a butler for accessing the entire workspace.
208 Returns
209 -------
210 butler : `lsst.daf.persistence.Butler`
211 A butler accepting `dataRepo`, `calibRepo`, and `templateRepo` as
212 inputs, and `outputRepo` as an output.
214 Notes
215 -----
216 Assumes all `*Repo` properties have been initialized.
217 """
218 # common arguments for butler elements
219 mapperArgs = {"calibRoot": os.path.abspath(self.calibRepo)}
221 inputs = [{"root": self.dataRepo, "mapperArgs": mapperArgs}]
222 outputs = [{"root": self.outputRepo, "mode": "rw", "mapperArgs": mapperArgs}]
224 if not os.path.samefile(self.dataRepo, self.templateRepo):
225 inputs.append({'root': self.templateRepo, 'mode': 'r', 'mapperArgs': mapperArgs})
227 return dafPersist.Butler(inputs=inputs, outputs=outputs)
229 @property
230 def analysisButler(self):
231 """A Butler that can read pipeline outputs (`lsst.daf.persistence.Butler`, read-only).
232 """
233 if self._analysisButler is None:
234 self._analysisButler = dafPersist.Butler(inputs={"root": self.outputRepo, "mode": "r"})
235 return self._analysisButler
238class WorkspaceGen3(Workspace):
239 """A directory used by ``ap_verify`` to handle data.
241 Any object of this class represents a working directory containing
242 subdirectories for a repository and for non-repository files. Constructing
243 a WorkspaceGen3 does not *initialize* its repository, as this requires
244 external information.
246 Parameters
247 ----------
248 location : `str`
249 The location on disk where the workspace will be set up. Will be
250 created if it does not already exist.
252 Raises
253 ------
254 EnvironmentError
255 Raised if ``location`` is not readable or not writeable
256 """
258 def __init__(self, location):
259 super().__init__(location)
261 self.mkdir(self.repo)
263 # Gen 3 name of the output run
264 self.runName = "ap_verify-output"
266 # Lazy evaluation to optimize butlers
267 self._workButler = None
268 self._analysisButler = None
270 @property
271 def repo(self):
272 """The absolute path/URI to a Butler repo for AP pipeline processing
273 (`str`, read-only).
274 """
275 return os.path.join(self._location, 'repo')
277 @property
278 def dbLocation(self):
279 return os.path.join(self._location, 'association.db')
281 @property
282 def alertLocation(self):
283 return os.path.join(self._location, 'alerts')
285 @property
286 def workButler(self):
287 """A Butler that can read and write to a Gen 3 repository (`lsst.daf.butler.Butler`, read-only).
289 Notes
290 -----
291 Assumes `repo` has been initialized.
292 """
293 if self._workButler is None:
294 try:
295 # All Gen 3 collection names subject to change; don't hardcode them
296 queryButler = dafButler.Butler(self.repo, writeable=True) # writeable for _workButler
297 inputs = set(queryButler.registry.queryCollections(
298 collectionType=dafButler.CollectionType.RUN))
299 for dimension in queryButler.registry.queryDataIds('instrument'):
300 instrument = obsBase.Instrument.fromName(dimension["instrument"], queryButler.registry)
301 inputs.add(instrument.makeDefaultRawIngestRunName())
303 # should set run=self.runName, but this breaks quantum graph generation (DM-26246)
304 self._workButler = dafButler.Butler(butler=queryButler, collections=inputs)
305 except OSError as e:
306 raise RuntimeError(f"{self.repo} is not a Gen 3 repository") from e
307 return self._workButler
309 @property
310 def analysisButler(self):
311 """A Butler that can read from a Gen 3 repository with outputs (`lsst.daf.butler.Butler`, read-only).
313 Notes
314 -----
315 Assumes `repo` has been initialized.
316 """
317 if self._analysisButler is None:
318 try:
319 self._analysisButler = dafButler.Butler(self.repo, collections=self.runName,
320 writeable=False)
321 except OSError as e:
322 raise RuntimeError(f"{self.repo} is not a Gen 3 repository") from e
323 return self._analysisButler