Coverage for python/lsst/ap/verify/workspace.py : 34%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1#
2# This file is part of ap_verify.
3#
4# Developed for the LSST Data Management System.
5# This product includes software developed by the LSST Project
6# (http://www.lsst.org).
7# See the COPYRIGHT file at the top-level directory of this distribution
8# for details of code ownership.
9#
10# This program is free software: you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation, either version 3 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program. If not, see <http://www.gnu.org/licenses/>.
22#
24import abc
25import os
26import pathlib
27import stat
29import lsst.daf.persistence as dafPersist
30import lsst.daf.butler as dafButler
31import lsst.obs.base as obsBase
34class Workspace(metaclass=abc.ABCMeta):
35 """A directory used by ``ap_verify`` to handle data and outputs.
37 Any object of this class represents a working directory containing
38 (possibly empty) subdirectories for various purposes. Subclasses are
39 typically specialized for particular workflows. Keeping such details in
40 separate classes makes it easier to provide guarantees without forcing
41 awkward directory structures on users.
43 All Workspace classes must guarantee the existence of any subdirectories
44 inside the workspace. Directories corresponding to repositories do not need
45 to be initialized, since creating a valid repository usually requires
46 external information.
48 Parameters
49 ----------
50 location : `str`
51 The location on disk where the workspace will be set up. Will be
52 created if it does not already exist.
54 Raises
55 ------
56 EnvironmentError
57 Raised if ``location`` is not readable or not writeable
58 """
59 def __init__(self, location):
60 # Properties must be `str` for backwards compatibility
61 self._location = str(pathlib.Path(location).resolve())
63 self.mkdir(self._location)
64 self.mkdir(self.configDir)
66 @staticmethod
67 def mkdir(directory):
68 """Create a directory for the workspace.
70 This method is intended to be called only by subclasses, and should
71 not be used by external code.
73 Parameters
74 ----------
75 directory : `str`
76 The directory to create.
77 """
78 mode = stat.S_IRWXU | stat.S_IRGRP | stat.S_IROTH # a+r, u+rwx
79 pathlib.Path(directory).mkdir(parents=True, exist_ok=True, mode=mode)
81 @property
82 def workDir(self):
83 """The absolute location of the workspace as a whole
84 (`str`, read-only).
85 """
86 return self._location
88 @property
89 def configDir(self):
90 """The absolute location of a directory containing custom Task config
91 files for use with the data (`str`, read-only).
92 """
93 return os.path.join(self._location, 'config')
95 @property
96 @abc.abstractmethod
97 def dbLocation(self):
98 """The default absolute location of the source association database to
99 be created or updated by the pipeline (`str`, read-only).
101 Shall be a pathname to a database suitable for the backend of `Apdb`.
102 """
104 @property
105 @abc.abstractmethod
106 def workButler(self):
107 """A Butler that can produce pipeline inputs and outputs (read-only).
108 The type is class-dependent.
109 """
111 @property
112 @abc.abstractmethod
113 def analysisButler(self):
114 """A Butler that can read pipeline outputs (read-only).
115 The type is class-dependent.
117 The Butler should be read-only, if its type supports the restriction.
118 """
121class WorkspaceGen2(Workspace):
122 """A directory used by ``ap_verify`` to handle data.
124 Any object of this class represents a working directory containing
125 (possibly empty) subdirectories for repositories. Constructing a
126 WorkspaceGen2 does not *initialize* its repositories, as this requires
127 external information.
129 Parameters
130 ----------
131 location : `str`
132 The location on disk where the workspace will be set up. Will be
133 created if it does not already exist.
135 Raises
136 ------
137 EnvironmentError
138 Raised if ``location`` is not readable or not writeable
139 """
141 def __init__(self, location):
142 super().__init__(location)
144 self.mkdir(self.dataRepo)
145 self.mkdir(self.calibRepo)
146 self.mkdir(self.templateRepo)
147 self.mkdir(self.outputRepo)
149 # Lazy evaluation to optimize butlers
150 self._workButler = None
151 self._analysisButler = None
153 @property
154 def dataRepo(self):
155 """The absolute path/URI to a Butler repo for science data
156 (`str`, read-only).
157 """
158 return os.path.join(self._location, 'ingested')
160 @property
161 def calibRepo(self):
162 """The absolute path/URI to a Butler repo for calibration data
163 (`str`, read-only).
164 """
165 return os.path.join(self._location, 'calibingested')
167 @property
168 def templateRepo(self):
169 """The absolute path/URI to a Butler repo for precomputed templates
170 (`str`, read-only).
171 """
172 return self.dataRepo
174 @property
175 def outputRepo(self):
176 """The absolute path/URI to a Butler repo for AP pipeline products
177 (`str`, read-only).
178 """
179 return os.path.join(self._location, 'output')
181 @property
182 def dbLocation(self):
183 return os.path.join(self._location, 'association.db')
185 @property
186 def workButler(self):
187 """A Butler that can produce pipeline inputs and outputs
188 (`lsst.daf.persistence.Butler`, read-only).
189 """
190 if self._workButler is None:
191 self._workButler = self._makeButler()
192 return self._workButler
194 def _makeButler(self):
195 """Create a butler for accessing the entire workspace.
197 Returns
198 -------
199 butler : `lsst.daf.persistence.Butler`
200 A butler accepting `dataRepo`, `calibRepo`, and `templateRepo` as
201 inputs, and `outputRepo` as an output.
203 Notes
204 -----
205 Assumes all `*Repo` properties have been initialized.
206 """
207 # common arguments for butler elements
208 mapperArgs = {"calibRoot": os.path.abspath(self.calibRepo)}
210 inputs = [{"root": self.dataRepo, "mapperArgs": mapperArgs}]
211 outputs = [{"root": self.outputRepo, "mode": "rw", "mapperArgs": mapperArgs}]
213 if not os.path.samefile(self.dataRepo, self.templateRepo):
214 inputs.append({'root': self.templateRepo, 'mode': 'r', 'mapperArgs': mapperArgs})
216 return dafPersist.Butler(inputs=inputs, outputs=outputs)
218 @property
219 def analysisButler(self):
220 """A Butler that can read pipeline outputs (`lsst.daf.persistence.Butler`, read-only).
221 """
222 if self._analysisButler is None:
223 self._analysisButler = dafPersist.Butler(inputs={"root": self.outputRepo, "mode": "r"})
224 return self._analysisButler
227class WorkspaceGen3(Workspace):
228 """A directory used by ``ap_verify`` to handle data.
230 Any object of this class represents a working directory containing
231 subdirectories for a repository and for non-repository files. Constructing
232 a WorkspaceGen3 does not *initialize* its repository, as this requires
233 external information.
235 Parameters
236 ----------
237 location : `str`
238 The location on disk where the workspace will be set up. Will be
239 created if it does not already exist.
241 Raises
242 ------
243 EnvironmentError
244 Raised if ``location`` is not readable or not writeable
245 """
247 def __init__(self, location):
248 super().__init__(location)
250 self.mkdir(self.repo)
252 # Gen 3 name of the output run
253 self.runName = "ap_verify-output"
255 # Lazy evaluation to optimize butlers
256 self._workButler = None
257 self._analysisButler = None
259 @property
260 def repo(self):
261 """The absolute path/URI to a Butler repo for AP pipeline processing
262 (`str`, read-only).
263 """
264 return os.path.join(self._location, 'repo')
266 @property
267 def dbLocation(self):
268 return os.path.join(self._location, 'association.db')
270 @property
271 def workButler(self):
272 """A Butler that can read and write to a Gen 3 repository (`lsst.daf.butler.Butler`, read-only).
274 Notes
275 -----
276 Assumes `repo` has been initialized.
277 """
278 if self._workButler is None:
279 try:
280 # All Gen 3 collection names subject to change; don't hardcode them
281 queryButler = dafButler.Butler(self.repo, writeable=True) # writeable for _workButler
282 inputs = set(queryButler.registry.queryCollections(
283 collectionType=dafButler.CollectionType.RUN))
284 for dimension in queryButler.registry.queryDimensions('instrument'):
285 instrument = obsBase.Instrument.fromName(dimension["instrument"], queryButler.registry)
286 inputs.add(instrument.makeDefaultRawIngestRunName())
288 self._workButler = dafButler.Butler(butler=queryButler, collections=inputs, run=self.runName)
289 except OSError as e:
290 raise RuntimeError(f"{self.repo} is not a Gen 3 repository") from e
291 return self._workButler
293 @property
294 def analysisButler(self):
295 """A Butler that can read from a Gen 3 repository with outputs (`lsst.daf.butler.Butler`, read-only).
297 Notes
298 -----
299 Assumes `repo` has been initialized.
300 """
301 if self._analysisButler is None:
302 try:
303 self._analysisButler = dafButler.Butler(self.repo, collections=self.runName,
304 writeable=False)
305 except OSError as e:
306 raise RuntimeError(f"{self.repo} is not a Gen 3 repository") from e
307 return self._analysisButler