Coverage for python/lsst/ap/verify/workspace.py: 47%
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1#
2# This file is part of ap_verify.
3#
4# Developed for the LSST Data Management System.
5# This product includes software developed by the LSST Project
6# (http://www.lsst.org).
7# See the COPYRIGHT file at the top-level directory of this distribution
8# for details of code ownership.
9#
10# This program is free software: you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation, either version 3 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program. If not, see <http://www.gnu.org/licenses/>.
22#
24__all__ = ["Workspace", "WorkspaceGen3"]
26import abc
27import os
28import pathlib
29import re
30import stat
32import lsst.skymap
33import lsst.daf.butler as dafButler
34import lsst.obs.base as obsBase
37class Workspace(metaclass=abc.ABCMeta):
38 """A directory used by ``ap_verify`` to handle data and outputs.
40 Any object of this class represents a working directory containing
41 (possibly empty) subdirectories for various purposes. Subclasses are
42 typically specialized for particular workflows. Keeping such details in
43 separate classes makes it easier to provide guarantees without forcing
44 awkward directory structures on users.
46 All Workspace classes must guarantee the existence of any subdirectories
47 inside the workspace. Directories corresponding to repositories do not need
48 to be initialized, since creating a valid repository usually requires
49 external information.
51 Parameters
52 ----------
53 location : `str`
54 The location on disk where the workspace will be set up. Will be
55 created if it does not already exist.
57 Raises
58 ------
59 EnvironmentError
60 Raised if ``location`` is not readable or not writeable
61 """
62 def __init__(self, location):
63 # Properties must be `str` for backwards compatibility
64 self._location = str(pathlib.Path(location).resolve())
66 self.mkdir(self._location)
67 self.mkdir(self.configDir)
69 @staticmethod
70 def mkdir(directory):
71 """Create a directory for the workspace.
73 This method is intended to be called only by subclasses, and should
74 not be used by external code.
76 Parameters
77 ----------
78 directory : `str`
79 The directory to create.
80 """
81 mode = stat.S_IRWXU | stat.S_IRGRP | stat.S_IROTH # a+r, u+rwx
82 pathlib.Path(directory).mkdir(parents=True, exist_ok=True, mode=mode)
84 def __eq__(self, other):
85 """Test whether two workspaces are of the same type and have the
86 same location.
87 """
88 return type(self) == type(other) and self.workDir == other.workDir
90 def __repr__(self):
91 """A string representation that can be used to reconstruct the Workspace.
92 """
93 return f"{type(self).__name__}({self.workDir!r})"
95 @property
96 def workDir(self):
97 """The absolute location of the workspace as a whole
98 (`str`, read-only).
99 """
100 return self._location
102 @property
103 def configDir(self):
104 """The absolute location of a directory containing custom Task config
105 files for use with the data (`str`, read-only).
106 """
107 return os.path.join(self._location, 'config')
109 @property
110 @abc.abstractmethod
111 def dbLocation(self):
112 """The default absolute location of the source association database to
113 be created or updated by the pipeline (`str`, read-only).
115 Shall be a pathname to a database suitable for the backend of `Apdb`.
116 """
118 @property
119 @abc.abstractmethod
120 def alertLocation(self):
121 """The absolute location of an output directory for persisted
122 alert packets (`str`, read-only).
123 """
125 @property
126 @abc.abstractmethod
127 def workButler(self):
128 """A Butler that can produce pipeline inputs and outputs (read-only).
129 The type is class-dependent.
130 """
132 @property
133 @abc.abstractmethod
134 def analysisButler(self):
135 """A Butler that can read pipeline outputs (read-only).
136 The type is class-dependent.
138 The Butler should be read-only, if its type supports the restriction.
139 """
142class WorkspaceGen3(Workspace):
143 """A directory used by ``ap_verify`` to handle data.
145 Any object of this class represents a working directory containing
146 subdirectories for a repository and for non-repository files. Constructing
147 a WorkspaceGen3 does not *initialize* its repository, as this requires
148 external information.
150 Parameters
151 ----------
152 location : `str`
153 The location on disk where the workspace will be set up. Will be
154 created if it does not already exist.
156 Raises
157 ------
158 EnvironmentError
159 Raised if ``location`` is not readable or not writeable
160 """
162 def __init__(self, location):
163 super().__init__(location)
165 self.mkdir(self.repo)
166 self.mkdir(self.pipelineDir)
168 # Gen 3 name of the output
169 self.outputName = "ap_verify-output"
171 # Lazy evaluation to optimize butlers
172 self._workButler = None
173 self._analysisButler = None
175 @property
176 def repo(self):
177 """The absolute path/URI to a Butler repo for AP pipeline processing
178 (`str`, read-only).
179 """
180 return os.path.join(self._location, 'repo')
182 @property
183 def pipelineDir(self):
184 """The absolute location of a directory containing custom pipeline
185 files for use with the data (`str`, read-only).
186 """
187 return os.path.join(self._location, 'pipelines')
189 @property
190 def dbLocation(self):
191 return os.path.join(self._location, 'association.db')
193 @property
194 def alertLocation(self):
195 return os.path.join(self._location, 'alerts')
197 def _ensureCollection(self, registry, name, collectionType):
198 """Add a collection to a repository if it does not already exist.
200 Parameters
201 ----------
202 registry : `lsst.daf.butler.Registry`
203 The repository to which to add the collection.
204 name : `str`
205 The name of the collection to test for and add.
206 collectionType : `lsst.daf.butler.CollectionType`
207 The type of collection to add. This field is ignored when
208 testing if a collection exists.
209 """
210 matchingCollections = list(registry.queryCollections(re.compile(name)))
211 if not matchingCollections:
212 registry.registerCollection(name, type=collectionType)
214 @property
215 def workButler(self):
216 """A Butler that can read and write to a Gen 3 repository (`lsst.daf.butler.Butler`, read-only).
218 Notes
219 -----
220 Assumes `repo` has been initialized.
221 """
222 if self._workButler is None:
223 try:
224 # Hard-code the collection names because it's hard to infer the inputs from the Butler
225 queryButler = dafButler.Butler(self.repo, writeable=True) # writeable for _workButler
226 inputs = {
227 lsst.skymap.BaseSkyMap.SKYMAP_RUN_COLLECTION_NAME,
228 }
229 for dimension in queryButler.registry.queryDataIds('instrument'):
230 instrument = obsBase.Instrument.fromName(dimension["instrument"], queryButler.registry)
231 rawName = instrument.makeDefaultRawIngestRunName()
232 inputs.add(rawName)
233 self._ensureCollection(queryButler.registry, rawName, dafButler.CollectionType.RUN)
234 inputs.add(instrument.makeCalibrationCollectionName())
235 inputs.add(instrument.makeRefCatCollectionName())
236 inputs.update(queryButler.registry.queryCollections(re.compile(r"templates/\w+")))
238 # Create an output chain here, so that workButler can see it.
239 # Definition does not conflict with what pipetask --output uses.
240 # Regex is workaround for DM-25945.
241 if not list(queryButler.registry.queryCollections(re.compile(self.outputName))):
242 queryButler.registry.registerCollection(self.outputName,
243 dafButler.CollectionType.CHAINED)
244 queryButler.registry.setCollectionChain(self.outputName, inputs)
246 self._workButler = dafButler.Butler(butler=queryButler, collections=self.outputName)
247 except OSError as e:
248 raise RuntimeError(f"{self.repo} is not a Gen 3 repository") from e
249 return self._workButler
251 @property
252 def analysisButler(self):
253 """A Butler that can read from a Gen 3 repository with outputs (`lsst.daf.butler.Butler`, read-only).
255 Notes
256 -----
257 Assumes `repo` has been initialized.
258 """
259 if self._analysisButler is None:
260 try:
261 self._analysisButler = dafButler.Butler(self.repo, collections=self.outputName,
262 writeable=False)
263 except OSError as e:
264 raise RuntimeError(f"{self.repo} is not a Gen 3 repository") from e
265 return self._analysisButler