Coverage for python/lsst/ap/verify/dataset.py: 36%
110 statements
« prev ^ index » next coverage.py v7.2.1, created at 2023-03-12 03:57 -0700
« prev ^ index » next coverage.py v7.2.1, created at 2023-03-12 03:57 -0700
1#
2# This file is part of ap_verify.
3#
4# Developed for the LSST Data Management System.
5# This product includes software developed by the LSST Project
6# (http://www.lsst.org).
7# See the COPYRIGHT file at the top-level directory of this distribution
8# for details of code ownership.
9#
10# This program is free software: you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation, either version 3 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program. If not, see <http://www.gnu.org/licenses/>.
22#
24__all__ = ["Dataset"]
26import os
27import warnings
29from deprecated.sphinx import deprecated
31import lsst.daf.persistence as dafPersistence
32import lsst.daf.butler as dafButler
33import lsst.obs.base as obsBase
34from lsst.utils import getPackageDir
36from .config import Config
39class Dataset:
40 """A dataset supported by ``ap_verify``.
42 Any object of this class is guaranteed to represent a ready-for-use
43 ap_verify dataset, barring concurrent changes to the file system or EUPS
44 operations. Constructing a Dataset does not create a compatible output
45 repository(ies), which can be done by calling `makeCompatibleRepo`.
47 Parameters
48 ----------
49 datasetId : `str`
50 The name of the dataset package. A tag identifying the dataset is also
51 accepted, but this usage is deprecated.
53 Raises
54 ------
55 RuntimeError
56 Raised if `datasetId` exists, but is not correctly organized or incomplete
57 ValueError
58 Raised if `datasetId` could not be loaded.
59 """
61 def __init__(self, datasetId):
62 self._id = datasetId
63 # daf.persistence.Policy's behavior on missing keys is apparently undefined
64 # test for __getattr__ *either* raising KeyError or returning None
65 try:
66 datasetPackage = self._getDatasetInfo()[datasetId]
67 if datasetPackage is None:
68 raise KeyError
69 else:
70 warnings.warn(f"The {datasetId} name is deprecated, and will be removed after v24.0. "
71 f"Use {datasetPackage} instead.", category=FutureWarning)
72 except KeyError:
73 # if datasetId not known, assume it's a package name
74 datasetPackage = datasetId
76 try:
77 self._dataRootDir = getPackageDir(datasetPackage)
78 except LookupError as e:
79 error = f"Cannot find the {datasetPackage} package; is it set up?"
80 raise ValueError(error) from e
81 else:
82 self._validatePackage()
84 self._initPackage(datasetPackage)
86 def _initPackage(self, name):
87 """Prepare the package backing this ap_verify dataset.
89 Parameters
90 ----------
91 name : `str`
92 The EUPS package identifier for the desired package.
93 """
94 # No initialization required at present
95 pass
97 # TODO: remove in DM-29042
98 @staticmethod
99 @deprecated(reason="The concept of 'supported' datasets is deprecated. This "
100 "method will be removed after v24.0.", version="v22.0", category=FutureWarning)
101 def getSupportedDatasets():
102 """The ap_verify dataset IDs that can be passed to this class's constructor.
104 Returns
105 -------
106 datasets : `set` of `str`
107 the set of IDs that will be accepted
109 Raises
110 ------
111 IoError
112 Raised if the config file does not exist or is not readable
113 RuntimeError
114 Raised if the config file exists, but does not contain the expected data
115 """
116 return Dataset._getDatasetInfo().keys()
118 # TODO: remove in DM-29042
119 @staticmethod
120 def _getDatasetInfo():
121 """Return external data on supported ap_verify datasets.
123 If an exception is raised, the program state shall be unchanged.
125 Returns
126 -------
127 datasetToPackage : `dict`-like
128 a map from dataset IDs to package names.
130 Raises
131 ------
132 RuntimeError
133 Raised if the config file exists, but does not contain the expected data
134 """
135 return Config.instance['datasets']
137 @property
138 def datasetRoot(self):
139 """The parent directory containing everything related to the
140 ap_verify dataset (`str`, read-only).
141 """
142 return self._dataRootDir
144 @property
145 def rawLocation(self):
146 """The directory containing the "raw" input data (`str`, read-only).
147 """
148 return os.path.join(self.datasetRoot, 'raw')
150 @property
151 def calibLocation(self):
152 """The directory containing the calibration data (`str`, read-only).
153 """
154 return os.path.join(self.datasetRoot, 'calib')
156 @property
157 def refcatsLocation(self):
158 """The directory containing external astrometric and photometric
159 reference catalogs (`str`, read-only).
160 """
161 return os.path.join(self.datasetRoot, 'refcats')
163 @property
164 def templateLocation(self):
165 """The directory containing the image subtraction templates (`str`, read-only).
166 """
167 return os.path.join(self.datasetRoot, 'templates')
169 @property
170 def configLocation(self):
171 """The directory containing configs that can be used to process the data (`str`, read-only).
172 """
173 return os.path.join(self.datasetRoot, 'config')
175 @property
176 def obsPackage(self):
177 """The name of the obs package associated with this data (`str`, read-only).
178 """
179 return dafPersistence.Butler.getMapperClass(self._stubInputRepo).getPackageName()
181 @property
182 def camera(self):
183 """The name of the Gen 2 camera associated with this data (`str`, read-only).
184 """
185 return dafPersistence.Butler.getMapperClass(self._stubInputRepo).getCameraName()
187 @property
188 def instrument(self):
189 """The Gen 3 instrument associated with this data (`lsst.obs.base.Instrument`, read-only).
190 """
191 butler = dafButler.Butler(self._preloadedRepo, writeable=False)
192 instruments = list(butler.registry.queryDataIds('instrument'))
193 if len(instruments) != 1:
194 raise RuntimeError(f"Dataset does not have exactly one instrument; got {instruments}.")
195 else:
196 return obsBase.Instrument.fromName(instruments[0]["instrument"], butler.registry)
198 @property
199 def _stubInputRepo(self):
200 """The directory containing the data set's input stub (`str`, read-only).
201 """
202 return os.path.join(self.datasetRoot, 'repo')
204 @property
205 def _preloadedRepo(self):
206 """The directory containing the pre-ingested Gen 3 repo (`str`, read-only).
207 """
208 return os.path.join(self.datasetRoot, 'preloaded')
210 @property
211 def _preloadedExport(self):
212 """The file containing an exported registry of `_preloadedRepo` (`str`, read-only).
213 """
214 return os.path.join(self.configLocation, 'export.yaml')
216 def _validatePackage(self):
217 """Confirm that the dataset directory satisfies all assumptions.
219 Raises
220 ------
221 RuntimeError
222 Raised if the package represented by this object does not conform to the
223 dataset framework
225 Notes
226 -----
227 Requires that `self._dataRootDir` has been initialized.
228 """
229 if not os.path.exists(self.datasetRoot):
230 raise RuntimeError('Could not find dataset at ' + self.datasetRoot)
231 if not os.path.exists(self.rawLocation):
232 raise RuntimeError('Dataset at ' + self.datasetRoot + 'is missing data directory')
233 if not os.path.exists(self.calibLocation):
234 raise RuntimeError('Dataset at ' + self.datasetRoot + 'is missing calibration directory')
235 # Template and refcat directories might not be subdirectories of self.datasetRoot
236 if not os.path.exists(self.templateLocation):
237 raise RuntimeError('Dataset is missing template directory at ' + self.templateLocation)
238 if not os.path.exists(self.refcatsLocation):
239 raise RuntimeError('Dataset is missing reference catalog directory at ' + self.refcatsLocation)
240 if not os.path.exists(self._stubInputRepo):
241 raise RuntimeError('Dataset at ' + self.datasetRoot + 'is missing stub repo')
242 if not _isRepo(self._stubInputRepo):
243 raise RuntimeError('Stub repo at ' + self._stubInputRepo + 'is missing mapper file')
245 def __eq__(self, other):
246 """Test that two Dataset objects are equal.
248 Two objects are equal iff they refer to the same ap_verify dataset.
249 """
250 return self.datasetRoot == other.datasetRoot
252 def __repr__(self):
253 """A string representation that can be used to reconstruct the dataset.
254 """
255 return f"Dataset({self._id!r})"
257 def makeCompatibleRepo(self, repoDir, calibRepoDir):
258 """Set up a directory as a Gen 2 repository compatible with this ap_verify dataset.
260 If the directory already exists, any files required by the dataset will
261 be added if absent; otherwise the directory will remain unchanged.
263 Parameters
264 ----------
265 repoDir : `str`
266 The directory where the output repository will be created.
267 calibRepoDir : `str`
268 The directory where the output calibration repository will be created.
269 """
270 mapperArgs = {'mapperArgs': {'calibRoot': calibRepoDir}}
271 if _isRepo(self.templateLocation):
272 # Stub repo is not a parent because can't mix v1 and v2 repositories in parents list
273 dafPersistence.Butler(inputs=[{"root": self.templateLocation, "mode": "r"}],
274 outputs=[{"root": repoDir, "mode": "rw", **mapperArgs}])
275 else:
276 dafPersistence.Butler(inputs=[{"root": self._stubInputRepo, "mode": "r"}],
277 outputs=[{"root": repoDir, "mode": "rw", **mapperArgs}])
279 def makeCompatibleRepoGen3(self, repoDir):
280 """Set up a directory as a Gen 3 repository compatible with this ap_verify dataset.
282 If the repository already exists, this call has no effect.
284 Parameters
285 ----------
286 repoDir : `str`
287 The directory where the output repository will be created.
288 """
289 # No way to tell makeRepo "create only what's missing"
290 try:
291 seedConfig = dafButler.Config()
292 # Checksums greatly slow importing of large repositories
293 seedConfig["datastore", "checksum"] = False
294 repoConfig = dafButler.Butler.makeRepo(repoDir, config=seedConfig)
295 butler = dafButler.Butler(repoConfig, writeable=True)
296 butler.import_(directory=self._preloadedRepo, filename=self._preloadedExport,
297 transfer="auto")
298 except FileExistsError:
299 pass
302def _isRepo(repoDir):
303 """Test whether a directory has been set up as a repository.
304 """
305 return os.path.exists(os.path.join(repoDir, '_mapper')) \
306 or os.path.exists(os.path.join(repoDir, 'repositoryCfg.yaml'))