Coverage for python/lsst/ap/verify/dataset.py : 37%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1#
2# This file is part of ap_verify.
3#
4# Developed for the LSST Data Management System.
5# This product includes software developed by the LSST Project
6# (http://www.lsst.org).
7# See the COPYRIGHT file at the top-level directory of this distribution
8# for details of code ownership.
9#
10# This program is free software: you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation, either version 3 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program. If not, see <http://www.gnu.org/licenses/>.
22#
24__all__ = ["Dataset"]
26import os
27import warnings
29from deprecated.sphinx import deprecated
31import lsst.daf.persistence as dafPersistence
32import lsst.daf.butler as dafButler
33import lsst.obs.base as obsBase
34from lsst.utils import getPackageDir
36from .config import Config
39class Dataset:
40 """A dataset supported by ``ap_verify``.
42 Any object of this class is guaranteed to represent a ready-for-use
43 ap_verify dataset, barring concurrent changes to the file system or EUPS
44 operations. Constructing a Dataset does not create a compatible output
45 repository(ies), which can be done by calling `makeCompatibleRepo`.
47 Parameters
48 ----------
49 datasetId : `str`
50 The name of the dataset package. A tag identifying the dataset is also
51 accepted, but this usage is deprecated.
53 Raises
54 ------
55 RuntimeError
56 Raised if `datasetId` exists, but is not correctly organized or incomplete
57 ValueError
58 Raised if `datasetId` could not be loaded.
59 """
61 def __init__(self, datasetId):
62 self._id = datasetId
63 # daf.persistence.Policy's behavior on missing keys is apparently undefined
64 # test for __getattr__ *either* raising KeyError or returning None
65 try:
66 datasetPackage = self._getDatasetInfo()[datasetId]
67 if datasetPackage is None:
68 raise KeyError
69 else:
70 warnings.warn(f"The {datasetId} name is deprecated, and will be removed after v24.0. "
71 f"Use {datasetPackage} instead.", category=FutureWarning)
72 except KeyError:
73 # if datasetId not known, assume it's a package name
74 datasetPackage = datasetId
76 try:
77 self._dataRootDir = getPackageDir(datasetPackage)
78 except LookupError as e:
79 error = f"Cannot find the {datasetPackage} package; is it set up?"
80 raise ValueError(error) from e
81 else:
82 self._validatePackage()
84 self._initPackage(datasetPackage)
86 def _initPackage(self, name):
87 """Prepare the package backing this ap_verify dataset.
89 Parameters
90 ----------
91 name : `str`
92 The EUPS package identifier for the desired package.
93 """
94 # No initialization required at present
95 pass
97 # TODO: remove in DM-29042
98 @staticmethod
99 @deprecated(reason="The concept of 'supported' datasets is deprecated. This "
100 "method will be removed after v24.0.", version="v22.0", category=FutureWarning)
101 def getSupportedDatasets():
102 """The ap_verify dataset IDs that can be passed to this class's constructor.
104 Returns
105 -------
106 datasets : `set` of `str`
107 the set of IDs that will be accepted
109 Raises
110 ------
111 IoError
112 Raised if the config file does not exist or is not readable
113 RuntimeError
114 Raised if the config file exists, but does not contain the expected data
115 """
116 return Dataset._getDatasetInfo().keys()
118 # TODO: remove in DM-29042
119 @staticmethod
120 def _getDatasetInfo():
121 """Return external data on supported ap_verify datasets.
123 If an exception is raised, the program state shall be unchanged.
125 Returns
126 -------
127 datasetToPackage : `dict`-like
128 a map from dataset IDs to package names.
130 Raises
131 ------
132 RuntimeError
133 Raised if the config file exists, but does not contain the expected data
134 """
135 return Config.instance['datasets']
137 @property
138 def datasetRoot(self):
139 """The parent directory containing everything related to the
140 ap_verify dataset (`str`, read-only).
141 """
142 return self._dataRootDir
144 @property
145 def rawLocation(self):
146 """The directory containing the "raw" input data (`str`, read-only).
147 """
148 return os.path.join(self.datasetRoot, 'raw')
150 @property
151 def calibLocation(self):
152 """The directory containing the calibration data (`str`, read-only).
153 """
154 return os.path.join(self.datasetRoot, 'calib')
156 @property
157 def refcatsLocation(self):
158 """The directory containing external astrometric and photometric
159 reference catalogs (`str`, read-only).
160 """
161 return os.path.join(self.datasetRoot, 'refcats')
163 @property
164 def templateLocation(self):
165 """The directory containing the image subtraction templates (`str`, read-only).
166 """
167 return os.path.join(self.datasetRoot, 'templates')
169 @property
170 def configLocation(self):
171 """The directory containing configs that can be used to process the data (`str`, read-only).
172 """
173 return os.path.join(self.datasetRoot, 'config')
175 @property
176 def pipelineLocation(self):
177 """The directory containing pipelines that can be used to process the
178 data in Gen 3 (`str`, read-only).
179 """
180 return os.path.join(self.datasetRoot, 'pipelines')
182 @property
183 def obsPackage(self):
184 """The name of the obs package associated with this data (`str`, read-only).
185 """
186 return dafPersistence.Butler.getMapperClass(self._stubInputRepo).getPackageName()
188 @property
189 def camera(self):
190 """The name of the Gen 2 camera associated with this data (`str`, read-only).
191 """
192 return dafPersistence.Butler.getMapperClass(self._stubInputRepo).getCameraName()
194 @property
195 def instrument(self):
196 """The Gen 3 instrument associated with this data (`lsst.obs.base.Instrument`, read-only).
197 """
198 butler = dafButler.Butler(self._preloadedRepo, writeable=False)
199 instruments = list(butler.registry.queryDataIds('instrument'))
200 if len(instruments) != 1:
201 raise RuntimeError(f"Dataset does not have exactly one instrument; got {instruments}.")
202 else:
203 return obsBase.Instrument.fromName(instruments[0]["instrument"], butler.registry)
205 @property
206 def _stubInputRepo(self):
207 """The directory containing the data set's input stub (`str`, read-only).
208 """
209 return os.path.join(self.datasetRoot, 'repo')
211 @property
212 def _preloadedRepo(self):
213 """The directory containing the pre-ingested Gen 3 repo (`str`, read-only).
214 """
215 return os.path.join(self.datasetRoot, 'preloaded')
217 @property
218 def _preloadedExport(self):
219 """The file containing an exported registry of `_preloadedRepo` (`str`, read-only).
220 """
221 return os.path.join(self.configLocation, 'export.yaml')
223 def _validatePackage(self):
224 """Confirm that the dataset directory satisfies all assumptions.
226 Raises
227 ------
228 RuntimeError
229 Raised if the package represented by this object does not conform to the
230 dataset framework
232 Notes
233 -----
234 Requires that `self._dataRootDir` has been initialized.
235 """
236 if not os.path.exists(self.datasetRoot):
237 raise RuntimeError('Could not find dataset at ' + self.datasetRoot)
238 if not os.path.exists(self.rawLocation):
239 raise RuntimeError('Dataset at ' + self.datasetRoot + 'is missing data directory')
240 if not os.path.exists(self.calibLocation):
241 raise RuntimeError('Dataset at ' + self.datasetRoot + 'is missing calibration directory')
242 # Template and refcat directories might not be subdirectories of self.datasetRoot
243 if not os.path.exists(self.templateLocation):
244 raise RuntimeError('Dataset is missing template directory at ' + self.templateLocation)
245 if not os.path.exists(self.refcatsLocation):
246 raise RuntimeError('Dataset is missing reference catalog directory at ' + self.refcatsLocation)
247 if not os.path.exists(self._stubInputRepo):
248 raise RuntimeError('Dataset at ' + self.datasetRoot + 'is missing stub repo')
249 if not _isRepo(self._stubInputRepo):
250 raise RuntimeError('Stub repo at ' + self._stubInputRepo + 'is missing mapper file')
252 def __eq__(self, other):
253 """Test that two Dataset objects are equal.
255 Two objects are equal iff they refer to the same ap_verify dataset.
256 """
257 return self.datasetRoot == other.datasetRoot
259 def __repr__(self):
260 """A string representation that can be used to reconstruct the dataset.
261 """
262 return f"Dataset({self._id!r})"
264 def makeCompatibleRepo(self, repoDir, calibRepoDir):
265 """Set up a directory as a Gen 2 repository compatible with this ap_verify dataset.
267 If the directory already exists, any files required by the dataset will
268 be added if absent; otherwise the directory will remain unchanged.
270 Parameters
271 ----------
272 repoDir : `str`
273 The directory where the output repository will be created.
274 calibRepoDir : `str`
275 The directory where the output calibration repository will be created.
276 """
277 mapperArgs = {'mapperArgs': {'calibRoot': calibRepoDir}}
278 if _isRepo(self.templateLocation):
279 # Stub repo is not a parent because can't mix v1 and v2 repositories in parents list
280 dafPersistence.Butler(inputs=[{"root": self.templateLocation, "mode": "r"}],
281 outputs=[{"root": repoDir, "mode": "rw", **mapperArgs}])
282 else:
283 dafPersistence.Butler(inputs=[{"root": self._stubInputRepo, "mode": "r"}],
284 outputs=[{"root": repoDir, "mode": "rw", **mapperArgs}])
286 def makeCompatibleRepoGen3(self, repoDir):
287 """Set up a directory as a Gen 3 repository compatible with this ap_verify dataset.
289 If the repository already exists, this call has no effect.
291 Parameters
292 ----------
293 repoDir : `str`
294 The directory where the output repository will be created.
295 """
296 # No way to tell makeRepo "create only what's missing"
297 try:
298 seedConfig = dafButler.Config()
299 # Checksums greatly slow importing of large repositories
300 seedConfig["datastore", "checksum"] = False
301 repoConfig = dafButler.Butler.makeRepo(repoDir, config=seedConfig)
302 butler = dafButler.Butler(repoConfig, writeable=True)
303 butler.import_(directory=self._preloadedRepo, filename=self._preloadedExport,
304 transfer="auto")
305 except FileExistsError:
306 pass
309def _isRepo(repoDir):
310 """Test whether a directory has been set up as a repository.
311 """
312 return os.path.exists(os.path.join(repoDir, '_mapper')) \
313 or os.path.exists(os.path.join(repoDir, 'repositoryCfg.yaml'))