Coverage for python/lsst/ap/verify/dataset.py: 50%
62 statements
« prev ^ index » next coverage.py v7.3.1, created at 2023-10-02 10:39 +0000
« prev ^ index » next coverage.py v7.3.1, created at 2023-10-02 10:39 +0000
1#
2# This file is part of ap_verify.
3#
4# Developed for the LSST Data Management System.
5# This product includes software developed by the LSST Project
6# (http://www.lsst.org).
7# See the COPYRIGHT file at the top-level directory of this distribution
8# for details of code ownership.
9#
10# This program is free software: you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation, either version 3 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program. If not, see <http://www.gnu.org/licenses/>.
22#
24__all__ = ["Dataset"]
26import os
28import lsst.daf.butler as dafButler
29import lsst.obs.base as obsBase
30from lsst.utils import getPackageDir
33class Dataset:
34 """A dataset supported by ``ap_verify``.
36 Any object of this class is guaranteed to represent a ready-for-use
37 ap_verify dataset, barring concurrent changes to the file system or EUPS
38 operations. Constructing a Dataset does not create a compatible output
39 repository(ies), which can be done by calling `makeCompatibleRepo`.
41 Parameters
42 ----------
43 datasetId : `str`
44 The name of the dataset package. A tag identifying the dataset is also
45 accepted, but this usage is deprecated.
47 Raises
48 ------
49 RuntimeError
50 Raised if `datasetId` exists, but is not correctly organized or incomplete
51 ValueError
52 Raised if `datasetId` could not be loaded.
53 """
55 def __init__(self, datasetId):
56 self._id = datasetId
58 try:
59 self._dataRootDir = getPackageDir(datasetId)
60 except LookupError as e:
61 error = f"Cannot find the {datasetId} package; is it set up?"
62 raise ValueError(error) from e
63 else:
64 self._validatePackage()
66 self._initPackage(datasetId)
68 def _initPackage(self, name):
69 """Prepare the package backing this ap_verify dataset.
71 Parameters
72 ----------
73 name : `str`
74 The EUPS package identifier for the desired package.
75 """
76 # No initialization required at present
77 pass
79 @property
80 def datasetRoot(self):
81 """The parent directory containing everything related to the
82 ap_verify dataset (`str`, read-only).
83 """
84 return self._dataRootDir
86 @property
87 def rawLocation(self):
88 """The directory containing the "raw" input data (`str`, read-only).
89 """
90 return os.path.join(self.datasetRoot, 'raw')
92 @property
93 def configLocation(self):
94 """The directory containing configs that can be used to process the data (`str`, read-only).
95 """
96 return os.path.join(self.datasetRoot, 'config')
98 @property
99 def pipelineLocation(self):
100 """The directory containing pipelines that can be used to process the
101 data in Gen 3 (`str`, read-only).
102 """
103 return os.path.join(self.datasetRoot, 'pipelines')
105 @property
106 def instrument(self):
107 """The Gen 3 instrument associated with this data (`lsst.obs.base.Instrument`, read-only).
108 """
109 butler = dafButler.Butler(self._preloadedRepo, writeable=False)
110 instruments = list(butler.registry.queryDataIds('instrument'))
111 if len(instruments) != 1:
112 raise RuntimeError(f"Dataset does not have exactly one instrument; got {instruments}.")
113 else:
114 return obsBase.Instrument.fromName(instruments[0]["instrument"], butler.registry)
116 @property
117 def _preloadedRepo(self):
118 """The directory containing the pre-ingested Gen 3 repo (`str`, read-only).
119 """
120 return os.path.join(self.datasetRoot, 'preloaded')
122 @property
123 def _preloadedExport(self):
124 """The file containing an exported registry of `_preloadedRepo` (`str`, read-only).
125 """
126 return os.path.join(self.configLocation, 'export.yaml')
128 def _validatePackage(self):
129 """Confirm that the dataset directory satisfies all assumptions.
131 Raises
132 ------
133 RuntimeError
134 Raised if the package represented by this object does not conform to the
135 dataset framework
137 Notes
138 -----
139 Requires that `self._dataRootDir` has been initialized.
140 """
141 if not os.path.exists(self.datasetRoot):
142 raise RuntimeError('Could not find dataset at ' + self.datasetRoot)
143 if not os.path.exists(self.rawLocation):
144 raise RuntimeError('Dataset at ' + self.datasetRoot + 'is missing data directory')
146 def __eq__(self, other):
147 """Test that two Dataset objects are equal.
149 Two objects are equal iff they refer to the same ap_verify dataset.
150 """
151 return self.datasetRoot == other.datasetRoot
153 def __repr__(self):
154 """A string representation that can be used to reconstruct the dataset.
155 """
156 return f"Dataset({self._id!r})"
158 def makeCompatibleRepoGen3(self, repoDir):
159 """Set up a directory as a Gen 3 repository compatible with this ap_verify dataset.
161 If the repository already exists, this call has no effect.
163 Parameters
164 ----------
165 repoDir : `str`
166 The directory where the output repository will be created.
167 """
168 # No way to tell makeRepo "create only what's missing"
169 try:
170 seedConfig = dafButler.Config()
171 # Checksums greatly slow importing of large repositories
172 seedConfig["datastore", "checksum"] = False
173 repoConfig = dafButler.Butler.makeRepo(repoDir, config=seedConfig)
174 butler = dafButler.Butler(repoConfig, writeable=True)
175 butler.import_(directory=self._preloadedRepo, filename=self._preloadedExport,
176 transfer="auto")
177 except FileExistsError:
178 pass
181def _isRepo(repoDir):
182 """Test whether a directory has been set up as a repository.
183 """
184 return os.path.exists(os.path.join(repoDir, '_mapper')) \
185 or os.path.exists(os.path.join(repoDir, 'repositoryCfg.yaml'))