Coverage for python/lsst/ap/verify/workspace.py : 35%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1#
2# This file is part of ap_verify.
3#
4# Developed for the LSST Data Management System.
5# This product includes software developed by the LSST Project
6# (http://www.lsst.org).
7# See the COPYRIGHT file at the top-level directory of this distribution
8# for details of code ownership.
9#
10# This program is free software: you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation, either version 3 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program. If not, see <http://www.gnu.org/licenses/>.
22#
24import abc
25import os
26import pathlib
27import stat
29import lsst.daf.persistence as dafPersist
30import lsst.daf.butler as dafButler
33class Workspace(metaclass=abc.ABCMeta):
34 """A directory used by ``ap_verify`` to handle data and outputs.
36 Any object of this class represents a working directory containing
37 (possibly empty) subdirectories for various purposes. Subclasses are
38 typically specialized for particular workflows. Keeping such details in
39 separate classes makes it easier to provide guarantees without forcing
40 awkward directory structures on users.
42 All Workspace classes must guarantee the existence of any subdirectories
43 inside the workspace. Directories corresponding to repositories do not need
44 to be initialized, since creating a valid repository usually requires
45 external information.
47 Parameters
48 ----------
49 location : `str`
50 The location on disk where the workspace will be set up. Will be
51 created if it does not already exist.
53 Raises
54 ------
55 EnvironmentError
56 Raised if ``location`` is not readable or not writeable
57 """
58 def __init__(self, location):
59 # Properties must be `str` for backwards compatibility
60 self._location = str(pathlib.Path(location).resolve())
62 self.mkdir(self._location)
63 self.mkdir(self.configDir)
65 @staticmethod
66 def mkdir(directory):
67 """Create a directory for the workspace.
69 This method is intended to be called only by subclasses, and should
70 not be used by external code.
72 Parameters
73 ----------
74 directory : `str`
75 The directory to create.
76 """
77 mode = stat.S_IRWXU | stat.S_IRGRP | stat.S_IROTH # a+r, u+rwx
78 pathlib.Path(directory).mkdir(parents=True, exist_ok=True, mode=mode)
80 @property
81 def workDir(self):
82 """The absolute location of the workspace as a whole
83 (`str`, read-only).
84 """
85 return self._location
87 @property
88 def configDir(self):
89 """The absolute location of a directory containing custom Task config
90 files for use with the data (`str`, read-only).
91 """
92 return os.path.join(self._location, 'config')
94 @property
95 @abc.abstractmethod
96 def dbLocation(self):
97 """The default absolute location of the source association database to
98 be created or updated by the pipeline (`str`, read-only).
100 Shall be a pathname to a database suitable for the backend of `Apdb`.
101 """
103 @property
104 @abc.abstractmethod
105 def workButler(self):
106 """A Butler that can produce pipeline inputs and outputs (read-only).
107 The type is class-dependent.
108 """
110 @property
111 @abc.abstractmethod
112 def analysisButler(self):
113 """A Butler that can read pipeline outputs (read-only).
114 The type is class-dependent.
116 The Butler should be read-only, if its type supports the restriction.
117 """
120class WorkspaceGen2(Workspace):
121 """A directory used by ``ap_verify`` to handle data.
123 Any object of this class represents a working directory containing
124 (possibly empty) subdirectories for repositories. Constructing a
125 WorkspaceGen2 does not *initialize* its repositories, as this requires
126 external information.
128 Parameters
129 ----------
130 location : `str`
131 The location on disk where the workspace will be set up. Will be
132 created if it does not already exist.
134 Raises
135 ------
136 EnvironmentError
137 Raised if ``location`` is not readable or not writeable
138 """
140 def __init__(self, location):
141 super().__init__(location)
143 self.mkdir(self.dataRepo)
144 self.mkdir(self.calibRepo)
145 self.mkdir(self.templateRepo)
146 self.mkdir(self.outputRepo)
148 # Lazy evaluation to optimize butlers
149 self._workButler = None
150 self._analysisButler = None
152 @property
153 def dataRepo(self):
154 """The absolute path/URI to a Butler repo for science data
155 (`str`, read-only).
156 """
157 return os.path.join(self._location, 'ingested')
159 @property
160 def calibRepo(self):
161 """The absolute path/URI to a Butler repo for calibration data
162 (`str`, read-only).
163 """
164 return os.path.join(self._location, 'calibingested')
166 @property
167 def templateRepo(self):
168 """The absolute path/URI to a Butler repo for precomputed templates
169 (`str`, read-only).
170 """
171 return self.dataRepo
173 @property
174 def outputRepo(self):
175 """The absolute path/URI to a Butler repo for AP pipeline products
176 (`str`, read-only).
177 """
178 return os.path.join(self._location, 'output')
180 @property
181 def dbLocation(self):
182 return os.path.join(self._location, 'association.db')
184 @property
185 def workButler(self):
186 """A Butler that can produce pipeline inputs and outputs
187 (`lsst.daf.persistence.Butler`, read-only).
188 """
189 if self._workButler is None:
190 self._workButler = self._makeButler()
191 return self._workButler
193 def _makeButler(self):
194 """Create a butler for accessing the entire workspace.
196 Returns
197 -------
198 butler : `lsst.daf.persistence.Butler`
199 A butler accepting `dataRepo`, `calibRepo`, and `templateRepo` as
200 inputs, and `outputRepo` as an output.
202 Notes
203 -----
204 Assumes all `*Repo` properties have been initialized.
205 """
206 # common arguments for butler elements
207 mapperArgs = {"calibRoot": os.path.abspath(self.calibRepo)}
209 inputs = [{"root": self.dataRepo, "mapperArgs": mapperArgs}]
210 outputs = [{"root": self.outputRepo, "mode": "rw", "mapperArgs": mapperArgs}]
212 if not os.path.samefile(self.dataRepo, self.templateRepo):
213 inputs.append({'root': self.templateRepo, 'mode': 'r', 'mapperArgs': mapperArgs})
215 return dafPersist.Butler(inputs=inputs, outputs=outputs)
217 @property
218 def analysisButler(self):
219 """A Butler that can read pipeline outputs (`lsst.daf.persistence.Butler`, read-only).
220 """
221 if self._analysisButler is None:
222 self._analysisButler = dafPersist.Butler(inputs={"root": self.outputRepo, "mode": "r"})
223 return self._analysisButler
226class WorkspaceGen3(Workspace):
227 """A directory used by ``ap_verify`` to handle data.
229 Any object of this class represents a working directory containing
230 subdirectories for a repository and for non-repository files. Constructing
231 a WorkspaceGen3 does not *initialize* its repository, as this requires
232 external information.
234 Parameters
235 ----------
236 location : `str`
237 The location on disk where the workspace will be set up. Will be
238 created if it does not already exist.
240 Raises
241 ------
242 EnvironmentError
243 Raised if ``location`` is not readable or not writeable
244 """
246 def __init__(self, location):
247 super().__init__(location)
249 self.mkdir(self.repo)
251 # Gen 3 name of the output run
252 self.runName = "ap_verify-output"
254 # Lazy evaluation to optimize butlers
255 self._workButler = None
256 self._analysisButler = None
258 @property
259 def repo(self):
260 """The absolute path/URI to a Butler repo for AP pipeline processing
261 (`str`, read-only).
262 """
263 return os.path.join(self._location, 'repo')
265 @property
266 def dbLocation(self):
267 return os.path.join(self._location, 'association.db')
269 @property
270 def workButler(self):
271 """A Butler that can read and write to a Gen 3 repository (`lsst.daf.butler.Butler`, read-only).
273 Notes
274 -----
275 Assumes `repo` has been initialized.
276 """
277 if self._workButler is None:
278 try:
279 self._workButler = dafButler.Butler(self.repo, run=self.runName)
280 except OSError as e:
281 raise RuntimeError(f"{self.repo} is not a Gen 3 repository") from e
282 return self._workButler
284 @property
285 def analysisButler(self):
286 """A Butler that can read from a Gen 3 repository with outputs (`lsst.daf.butler.Butler`, read-only).
288 Notes
289 -----
290 Assumes `repo` has been initialized.
291 """
292 if self._analysisButler is None:
293 try:
294 self._analysisButler = dafButler.Butler(self.repo, collections=self.runName,
295 writeable=False)
296 except OSError as e:
297 raise RuntimeError(f"{self.repo} is not a Gen 3 repository") from e
298 return self._analysisButler