Coverage for python/lsst/ap/verify/dataset.py: 37%
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1#
2# This file is part of ap_verify.
3#
4# Developed for the LSST Data Management System.
5# This product includes software developed by the LSST Project
6# (http://www.lsst.org).
7# See the COPYRIGHT file at the top-level directory of this distribution
8# for details of code ownership.
9#
10# This program is free software: you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation, either version 3 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program. If not, see <http://www.gnu.org/licenses/>.
22#
24__all__ = ["Dataset"]
26import os
28import lsst.daf.persistence as dafPersistence
29import lsst.daf.butler as dafButler
30import lsst.obs.base as obsBase
31from lsst.utils import getPackageDir
34class Dataset:
35 """A dataset supported by ``ap_verify``.
37 Any object of this class is guaranteed to represent a ready-for-use
38 ap_verify dataset, barring concurrent changes to the file system or EUPS
39 operations. Constructing a Dataset does not create a compatible output
40 repository(ies), which can be done by calling `makeCompatibleRepo`.
42 Parameters
43 ----------
44 datasetId : `str`
45 The name of the dataset package. A tag identifying the dataset is also
46 accepted, but this usage is deprecated.
48 Raises
49 ------
50 RuntimeError
51 Raised if `datasetId` exists, but is not correctly organized or incomplete
52 ValueError
53 Raised if `datasetId` could not be loaded.
54 """
56 def __init__(self, datasetId):
57 self._id = datasetId
59 try:
60 self._dataRootDir = getPackageDir(datasetId)
61 except LookupError as e:
62 error = f"Cannot find the {datasetId} package; is it set up?"
63 raise ValueError(error) from e
64 else:
65 self._validatePackage()
67 self._initPackage(datasetId)
69 def _initPackage(self, name):
70 """Prepare the package backing this ap_verify dataset.
72 Parameters
73 ----------
74 name : `str`
75 The EUPS package identifier for the desired package.
76 """
77 # No initialization required at present
78 pass
80 @property
81 def datasetRoot(self):
82 """The parent directory containing everything related to the
83 ap_verify dataset (`str`, read-only).
84 """
85 return self._dataRootDir
87 @property
88 def rawLocation(self):
89 """The directory containing the "raw" input data (`str`, read-only).
90 """
91 return os.path.join(self.datasetRoot, 'raw')
93 @property
94 def calibLocation(self):
95 """The directory containing the calibration data (`str`, read-only).
96 """
97 return os.path.join(self.datasetRoot, 'calib')
99 @property
100 def refcatsLocation(self):
101 """The directory containing external astrometric and photometric
102 reference catalogs (`str`, read-only).
103 """
104 return os.path.join(self.datasetRoot, 'refcats')
106 @property
107 def templateLocation(self):
108 """The directory containing the image subtraction templates (`str`, read-only).
109 """
110 return os.path.join(self.datasetRoot, 'templates')
112 @property
113 def configLocation(self):
114 """The directory containing configs that can be used to process the data (`str`, read-only).
115 """
116 return os.path.join(self.datasetRoot, 'config')
118 @property
119 def pipelineLocation(self):
120 """The directory containing pipelines that can be used to process the
121 data in Gen 3 (`str`, read-only).
122 """
123 return os.path.join(self.datasetRoot, 'pipelines')
125 @property
126 def obsPackage(self):
127 """The name of the obs package associated with this data (`str`, read-only).
128 """
129 return dafPersistence.Butler.getMapperClass(self._stubInputRepo).getPackageName()
131 @property
132 def camera(self):
133 """The name of the Gen 2 camera associated with this data (`str`, read-only).
134 """
135 return dafPersistence.Butler.getMapperClass(self._stubInputRepo).getCameraName()
137 @property
138 def instrument(self):
139 """The Gen 3 instrument associated with this data (`lsst.obs.base.Instrument`, read-only).
140 """
141 butler = dafButler.Butler(self._preloadedRepo, writeable=False)
142 instruments = list(butler.registry.queryDataIds('instrument'))
143 if len(instruments) != 1:
144 raise RuntimeError(f"Dataset does not have exactly one instrument; got {instruments}.")
145 else:
146 return obsBase.Instrument.fromName(instruments[0]["instrument"], butler.registry)
148 @property
149 def _stubInputRepo(self):
150 """The directory containing the data set's input stub (`str`, read-only).
151 """
152 return os.path.join(self.datasetRoot, 'repo')
154 @property
155 def _preloadedRepo(self):
156 """The directory containing the pre-ingested Gen 3 repo (`str`, read-only).
157 """
158 return os.path.join(self.datasetRoot, 'preloaded')
160 @property
161 def _preloadedExport(self):
162 """The file containing an exported registry of `_preloadedRepo` (`str`, read-only).
163 """
164 return os.path.join(self.configLocation, 'export.yaml')
166 def _validatePackage(self):
167 """Confirm that the dataset directory satisfies all assumptions.
169 Raises
170 ------
171 RuntimeError
172 Raised if the package represented by this object does not conform to the
173 dataset framework
175 Notes
176 -----
177 Requires that `self._dataRootDir` has been initialized.
178 """
179 if not os.path.exists(self.datasetRoot):
180 raise RuntimeError('Could not find dataset at ' + self.datasetRoot)
181 if not os.path.exists(self.rawLocation):
182 raise RuntimeError('Dataset at ' + self.datasetRoot + 'is missing data directory')
183 if not os.path.exists(self.calibLocation):
184 raise RuntimeError('Dataset at ' + self.datasetRoot + 'is missing calibration directory')
185 # Template and refcat directories might not be subdirectories of self.datasetRoot
186 if not os.path.exists(self.templateLocation):
187 raise RuntimeError('Dataset is missing template directory at ' + self.templateLocation)
188 if not os.path.exists(self.refcatsLocation):
189 raise RuntimeError('Dataset is missing reference catalog directory at ' + self.refcatsLocation)
190 if not os.path.exists(self._stubInputRepo):
191 raise RuntimeError('Dataset at ' + self.datasetRoot + 'is missing stub repo')
192 if not _isRepo(self._stubInputRepo):
193 raise RuntimeError('Stub repo at ' + self._stubInputRepo + 'is missing mapper file')
195 def __eq__(self, other):
196 """Test that two Dataset objects are equal.
198 Two objects are equal iff they refer to the same ap_verify dataset.
199 """
200 return self.datasetRoot == other.datasetRoot
202 def __repr__(self):
203 """A string representation that can be used to reconstruct the dataset.
204 """
205 return f"Dataset({self._id!r})"
207 def makeCompatibleRepo(self, repoDir, calibRepoDir):
208 """Set up a directory as a Gen 2 repository compatible with this ap_verify dataset.
210 If the directory already exists, any files required by the dataset will
211 be added if absent; otherwise the directory will remain unchanged.
213 Parameters
214 ----------
215 repoDir : `str`
216 The directory where the output repository will be created.
217 calibRepoDir : `str`
218 The directory where the output calibration repository will be created.
219 """
220 mapperArgs = {'mapperArgs': {'calibRoot': calibRepoDir}}
221 if _isRepo(self.templateLocation):
222 # Stub repo is not a parent because can't mix v1 and v2 repositories in parents list
223 dafPersistence.Butler(inputs=[{"root": self.templateLocation, "mode": "r"}],
224 outputs=[{"root": repoDir, "mode": "rw", **mapperArgs}])
225 else:
226 dafPersistence.Butler(inputs=[{"root": self._stubInputRepo, "mode": "r"}],
227 outputs=[{"root": repoDir, "mode": "rw", **mapperArgs}])
229 def makeCompatibleRepoGen3(self, repoDir):
230 """Set up a directory as a Gen 3 repository compatible with this ap_verify dataset.
232 If the repository already exists, this call has no effect.
234 Parameters
235 ----------
236 repoDir : `str`
237 The directory where the output repository will be created.
238 """
239 # No way to tell makeRepo "create only what's missing"
240 try:
241 seedConfig = dafButler.Config()
242 # Checksums greatly slow importing of large repositories
243 seedConfig["datastore", "checksum"] = False
244 repoConfig = dafButler.Butler.makeRepo(repoDir, config=seedConfig)
245 butler = dafButler.Butler(repoConfig, writeable=True)
246 butler.import_(directory=self._preloadedRepo, filename=self._preloadedExport,
247 transfer="auto")
248 except FileExistsError:
249 pass
252def _isRepo(repoDir):
253 """Test whether a directory has been set up as a repository.
254 """
255 return os.path.exists(os.path.join(repoDir, '_mapper')) \
256 or os.path.exists(os.path.join(repoDir, 'repositoryCfg.yaml'))