Coverage for python/lsst/ap/verify/workspace.py: 64%
97 statements
« prev ^ index » next coverage.py v7.5.1, created at 2024-05-08 04:28 -0700
« prev ^ index » next coverage.py v7.5.1, created at 2024-05-08 04:28 -0700
1#
2# This file is part of ap_verify.
3#
4# Developed for the LSST Data Management System.
5# This product includes software developed by the LSST Project
6# (http://www.lsst.org).
7# See the COPYRIGHT file at the top-level directory of this distribution
8# for details of code ownership.
9#
10# This program is free software: you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation, either version 3 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program. If not, see <http://www.gnu.org/licenses/>.
22#
24__all__ = ["Workspace", "WorkspaceGen3"]
26import abc
27import os
28import pathlib
29import re
30import stat
32import lsst.daf.butler as dafButler
33import lsst.obs.base as obsBase
36class Workspace(metaclass=abc.ABCMeta):
37 """A directory used by ``ap_verify`` to handle data and outputs.
39 Any object of this class represents a working directory containing
40 (possibly empty) subdirectories for various purposes. Subclasses are
41 typically specialized for particular workflows. Keeping such details in
42 separate classes makes it easier to provide guarantees without forcing
43 awkward directory structures on users.
45 All Workspace classes must guarantee the existence of any subdirectories
46 inside the workspace. Directories corresponding to repositories do not need
47 to be initialized, since creating a valid repository usually requires
48 external information.
50 Parameters
51 ----------
52 location : `str`
53 The location on disk where the workspace will be set up. Will be
54 created if it does not already exist.
56 Raises
57 ------
58 EnvironmentError
59 Raised if ``location`` is not readable or not writeable
60 """
61 def __init__(self, location):
62 # Properties must be `str` for backwards compatibility
63 self._location = str(pathlib.Path(location).resolve())
65 self.mkdir(self._location)
66 self.mkdir(self.configDir)
68 @staticmethod
69 def mkdir(directory):
70 """Create a directory for the workspace.
72 This method is intended to be called only by subclasses, and should
73 not be used by external code.
75 Parameters
76 ----------
77 directory : `str`
78 The directory to create.
79 """
80 mode = stat.S_IRWXU | stat.S_IRGRP | stat.S_IROTH # a+r, u+rwx
81 pathlib.Path(directory).mkdir(parents=True, exist_ok=True, mode=mode)
83 def __eq__(self, other):
84 """Test whether two workspaces are of the same type and have the
85 same location.
86 """
87 return type(self) == type(other) and self.workDir == other.workDir
89 def __repr__(self):
90 """A string representation that can be used to reconstruct the Workspace.
91 """
92 return f"{type(self).__name__}({self.workDir!r})"
94 @property
95 def workDir(self):
96 """The absolute location of the workspace as a whole
97 (`str`, read-only).
98 """
99 return self._location
101 @property
102 def configDir(self):
103 """The absolute location of a directory containing custom Task config
104 files for use with the data (`str`, read-only).
105 """
106 return os.path.join(self._location, 'config')
108 @property
109 @abc.abstractmethod
110 def dbLocation(self):
111 """The default absolute location of the source association database to
112 be created or updated by the pipeline (`str`, read-only).
114 Shall be a pathname to a database suitable for the backend of `Apdb`.
115 """
117 @property
118 @abc.abstractmethod
119 def dbConfigLocation(self):
120 """The absolute location of the config file for the source association
121 database to be created or updated by the pipeline (`str`, read-only).
123 The location is assumed to be a Python (`lsst.pex.config.Config`) file.
124 """
126 @property
127 @abc.abstractmethod
128 def alertLocation(self):
129 """The absolute location of an output directory for persisted
130 alert packets (`str`, read-only).
131 """
133 @property
134 @abc.abstractmethod
135 def workButler(self):
136 """A Butler that can produce pipeline inputs and outputs (read-only).
137 The type is class-dependent.
138 """
140 @property
141 @abc.abstractmethod
142 def analysisButler(self):
143 """A Butler that can read pipeline outputs (read-only).
144 The type is class-dependent.
146 The Butler should be read-only, if its type supports the restriction.
147 """
150class WorkspaceGen3(Workspace):
151 """A directory used by ``ap_verify`` to handle data.
153 Any object of this class represents a working directory containing
154 subdirectories for a repository and for non-repository files. Constructing
155 a WorkspaceGen3 does not *initialize* its repository, as this requires
156 external information.
158 Parameters
159 ----------
160 location : `str`
161 The location on disk where the workspace will be set up. Will be
162 created if it does not already exist.
164 Raises
165 ------
166 EnvironmentError
167 Raised if ``location`` is not readable or not writeable
168 """
170 def __init__(self, location):
171 super().__init__(location)
173 self.mkdir(self.repo)
174 self.mkdir(self.pipelineDir)
176 # Gen 3 name of the output
177 self.outputName = "ap_verify-output"
179 # Lazy evaluation to optimize butlers
180 self._workButler = None
181 self._analysisButler = None
183 @property
184 def repo(self):
185 """The absolute path/URI to a Butler repo for AP pipeline processing
186 (`str`, read-only).
187 """
188 return os.path.join(self._location, 'repo')
190 @property
191 def pipelineDir(self):
192 """The absolute location of a directory containing custom pipeline
193 files for use with the data (`str`, read-only).
194 """
195 return os.path.join(self._location, 'pipelines')
197 @property
198 def dbLocation(self):
199 return os.path.join(self._location, 'association.db')
201 @property
202 def dbConfigLocation(self):
203 return os.path.join(self._location, 'apdb.py')
205 @property
206 def alertLocation(self):
207 return os.path.join(self._location, 'alerts')
209 def _ensureCollection(self, registry, name, collectionType):
210 """Add a collection to a repository if it does not already exist.
212 Parameters
213 ----------
214 registry : `lsst.daf.butler.Registry`
215 The repository to which to add the collection.
216 name : `str`
217 The name of the collection to test for and add.
218 collectionType : `lsst.daf.butler.CollectionType`
219 The type of collection to add. This field is ignored when
220 testing if a collection exists.
221 """
222 matchingCollections = list(registry.queryCollections(re.compile(name)))
223 if not matchingCollections:
224 registry.registerCollection(name, type=collectionType)
226 @property
227 def workButler(self):
228 """A Butler that can read and write to a Gen 3 repository (`lsst.daf.butler.Butler`, read-only).
230 Notes
231 -----
232 Assumes `repo` has been initialized.
233 """
234 if self._workButler is None:
235 try:
236 # Dataset generation puts all preloaded datasets in <instrument>/defaults.
237 # However, this definition excludes raws, which are not preloaded.
238 queryButler = dafButler.Butler(self.repo, writeable=True) # writeable for _workButler
239 inputs = []
240 for dimension in queryButler.registry.queryDataIds('instrument'):
241 instrument = obsBase.Instrument.fromName(dimension["instrument"], queryButler.registry)
242 defaultName = instrument.makeCollectionName("defaults")
243 inputs.append(defaultName)
244 rawName = instrument.makeDefaultRawIngestRunName()
245 inputs.append(rawName)
246 self._ensureCollection(queryButler.registry, rawName, dafButler.CollectionType.RUN)
248 # Create an output chain here, so that workButler can see it.
249 # Definition does not conflict with what pipetask --output uses.
250 # Regex is workaround for DM-25945.
251 if not list(queryButler.registry.queryCollections(re.compile(self.outputName))):
252 queryButler.registry.registerCollection(self.outputName,
253 dafButler.CollectionType.CHAINED)
254 queryButler.registry.setCollectionChain(self.outputName, inputs)
256 self._workButler = dafButler.Butler(butler=queryButler, collections=self.outputName)
257 except OSError as e:
258 raise RuntimeError(f"{self.repo} is not a Gen 3 repository") from e
259 return self._workButler
261 @property
262 def analysisButler(self):
263 """A Butler that can read from a Gen 3 repository with outputs (`lsst.daf.butler.Butler`, read-only).
265 Notes
266 -----
267 Assumes `repo` has been initialized.
268 """
269 if self._analysisButler is None:
270 try:
271 self._analysisButler = dafButler.Butler(self.repo, collections=self.outputName,
272 writeable=False)
273 except OSError as e:
274 raise RuntimeError(f"{self.repo} is not a Gen 3 repository") from e
275 return self._analysisButler