Coverage for python/lsst/ap/verify/dataset.py: 38%

Shortcuts on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

113 statements  

1# 

2# This file is part of ap_verify. 

3# 

4# Developed for the LSST Data Management System. 

5# This product includes software developed by the LSST Project 

6# (http://www.lsst.org). 

7# See the COPYRIGHT file at the top-level directory of this distribution 

8# for details of code ownership. 

9# 

10# This program is free software: you can redistribute it and/or modify 

11# it under the terms of the GNU General Public License as published by 

12# the Free Software Foundation, either version 3 of the License, or 

13# (at your option) any later version. 

14# 

15# This program is distributed in the hope that it will be useful, 

16# but WITHOUT ANY WARRANTY; without even the implied warranty of 

17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

18# GNU General Public License for more details. 

19# 

20# You should have received a copy of the GNU General Public License 

21# along with this program. If not, see <http://www.gnu.org/licenses/>. 

22# 

23 

24__all__ = ["Dataset"] 

25 

26import os 

27import warnings 

28 

29from deprecated.sphinx import deprecated 

30 

31import lsst.daf.persistence as dafPersistence 

32import lsst.daf.butler as dafButler 

33import lsst.obs.base as obsBase 

34from lsst.utils import getPackageDir 

35 

36from .config import Config 

37 

38 

39class Dataset: 

40 """A dataset supported by ``ap_verify``. 

41 

42 Any object of this class is guaranteed to represent a ready-for-use 

43 ap_verify dataset, barring concurrent changes to the file system or EUPS 

44 operations. Constructing a Dataset does not create a compatible output 

45 repository(ies), which can be done by calling `makeCompatibleRepo`. 

46 

47 Parameters 

48 ---------- 

49 datasetId : `str` 

50 The name of the dataset package. A tag identifying the dataset is also 

51 accepted, but this usage is deprecated. 

52 

53 Raises 

54 ------ 

55 RuntimeError 

56 Raised if `datasetId` exists, but is not correctly organized or incomplete 

57 ValueError 

58 Raised if `datasetId` could not be loaded. 

59 """ 

60 

61 def __init__(self, datasetId): 

62 self._id = datasetId 

63 # daf.persistence.Policy's behavior on missing keys is apparently undefined 

64 # test for __getattr__ *either* raising KeyError or returning None 

65 try: 

66 datasetPackage = self._getDatasetInfo()[datasetId] 

67 if datasetPackage is None: 

68 raise KeyError 

69 else: 

70 warnings.warn(f"The {datasetId} name is deprecated, and will be removed after v24.0. " 

71 f"Use {datasetPackage} instead.", category=FutureWarning) 

72 except KeyError: 

73 # if datasetId not known, assume it's a package name 

74 datasetPackage = datasetId 

75 

76 try: 

77 self._dataRootDir = getPackageDir(datasetPackage) 

78 except LookupError as e: 

79 error = f"Cannot find the {datasetPackage} package; is it set up?" 

80 raise ValueError(error) from e 

81 else: 

82 self._validatePackage() 

83 

84 self._initPackage(datasetPackage) 

85 

86 def _initPackage(self, name): 

87 """Prepare the package backing this ap_verify dataset. 

88 

89 Parameters 

90 ---------- 

91 name : `str` 

92 The EUPS package identifier for the desired package. 

93 """ 

94 # No initialization required at present 

95 pass 

96 

97 # TODO: remove in DM-29042 

98 @staticmethod 

99 @deprecated(reason="The concept of 'supported' datasets is deprecated. This " 

100 "method will be removed after v24.0.", version="v22.0", category=FutureWarning) 

101 def getSupportedDatasets(): 

102 """The ap_verify dataset IDs that can be passed to this class's constructor. 

103 

104 Returns 

105 ------- 

106 datasets : `set` of `str` 

107 the set of IDs that will be accepted 

108 

109 Raises 

110 ------ 

111 IoError 

112 Raised if the config file does not exist or is not readable 

113 RuntimeError 

114 Raised if the config file exists, but does not contain the expected data 

115 """ 

116 return Dataset._getDatasetInfo().keys() 

117 

118 # TODO: remove in DM-29042 

119 @staticmethod 

120 def _getDatasetInfo(): 

121 """Return external data on supported ap_verify datasets. 

122 

123 If an exception is raised, the program state shall be unchanged. 

124 

125 Returns 

126 ------- 

127 datasetToPackage : `dict`-like 

128 a map from dataset IDs to package names. 

129 

130 Raises 

131 ------ 

132 RuntimeError 

133 Raised if the config file exists, but does not contain the expected data 

134 """ 

135 return Config.instance['datasets'] 

136 

137 @property 

138 def datasetRoot(self): 

139 """The parent directory containing everything related to the 

140 ap_verify dataset (`str`, read-only). 

141 """ 

142 return self._dataRootDir 

143 

144 @property 

145 def rawLocation(self): 

146 """The directory containing the "raw" input data (`str`, read-only). 

147 """ 

148 return os.path.join(self.datasetRoot, 'raw') 

149 

150 @property 

151 def calibLocation(self): 

152 """The directory containing the calibration data (`str`, read-only). 

153 """ 

154 return os.path.join(self.datasetRoot, 'calib') 

155 

156 @property 

157 def refcatsLocation(self): 

158 """The directory containing external astrometric and photometric 

159 reference catalogs (`str`, read-only). 

160 """ 

161 return os.path.join(self.datasetRoot, 'refcats') 

162 

163 @property 

164 def templateLocation(self): 

165 """The directory containing the image subtraction templates (`str`, read-only). 

166 """ 

167 return os.path.join(self.datasetRoot, 'templates') 

168 

169 @property 

170 def configLocation(self): 

171 """The directory containing configs that can be used to process the data (`str`, read-only). 

172 """ 

173 return os.path.join(self.datasetRoot, 'config') 

174 

175 @property 

176 def pipelineLocation(self): 

177 """The directory containing pipelines that can be used to process the 

178 data in Gen 3 (`str`, read-only). 

179 """ 

180 return os.path.join(self.datasetRoot, 'pipelines') 

181 

182 @property 

183 def obsPackage(self): 

184 """The name of the obs package associated with this data (`str`, read-only). 

185 """ 

186 return dafPersistence.Butler.getMapperClass(self._stubInputRepo).getPackageName() 

187 

188 @property 

189 def camera(self): 

190 """The name of the Gen 2 camera associated with this data (`str`, read-only). 

191 """ 

192 return dafPersistence.Butler.getMapperClass(self._stubInputRepo).getCameraName() 

193 

194 @property 

195 def instrument(self): 

196 """The Gen 3 instrument associated with this data (`lsst.obs.base.Instrument`, read-only). 

197 """ 

198 butler = dafButler.Butler(self._preloadedRepo, writeable=False) 

199 instruments = list(butler.registry.queryDataIds('instrument')) 

200 if len(instruments) != 1: 

201 raise RuntimeError(f"Dataset does not have exactly one instrument; got {instruments}.") 

202 else: 

203 return obsBase.Instrument.fromName(instruments[0]["instrument"], butler.registry) 

204 

205 @property 

206 def _stubInputRepo(self): 

207 """The directory containing the data set's input stub (`str`, read-only). 

208 """ 

209 return os.path.join(self.datasetRoot, 'repo') 

210 

211 @property 

212 def _preloadedRepo(self): 

213 """The directory containing the pre-ingested Gen 3 repo (`str`, read-only). 

214 """ 

215 return os.path.join(self.datasetRoot, 'preloaded') 

216 

217 @property 

218 def _preloadedExport(self): 

219 """The file containing an exported registry of `_preloadedRepo` (`str`, read-only). 

220 """ 

221 return os.path.join(self.configLocation, 'export.yaml') 

222 

223 def _validatePackage(self): 

224 """Confirm that the dataset directory satisfies all assumptions. 

225 

226 Raises 

227 ------ 

228 RuntimeError 

229 Raised if the package represented by this object does not conform to the 

230 dataset framework 

231 

232 Notes 

233 ----- 

234 Requires that `self._dataRootDir` has been initialized. 

235 """ 

236 if not os.path.exists(self.datasetRoot): 

237 raise RuntimeError('Could not find dataset at ' + self.datasetRoot) 

238 if not os.path.exists(self.rawLocation): 

239 raise RuntimeError('Dataset at ' + self.datasetRoot + 'is missing data directory') 

240 if not os.path.exists(self.calibLocation): 

241 raise RuntimeError('Dataset at ' + self.datasetRoot + 'is missing calibration directory') 

242 # Template and refcat directories might not be subdirectories of self.datasetRoot 

243 if not os.path.exists(self.templateLocation): 

244 raise RuntimeError('Dataset is missing template directory at ' + self.templateLocation) 

245 if not os.path.exists(self.refcatsLocation): 

246 raise RuntimeError('Dataset is missing reference catalog directory at ' + self.refcatsLocation) 

247 if not os.path.exists(self._stubInputRepo): 

248 raise RuntimeError('Dataset at ' + self.datasetRoot + 'is missing stub repo') 

249 if not _isRepo(self._stubInputRepo): 

250 raise RuntimeError('Stub repo at ' + self._stubInputRepo + 'is missing mapper file') 

251 

252 def __eq__(self, other): 

253 """Test that two Dataset objects are equal. 

254 

255 Two objects are equal iff they refer to the same ap_verify dataset. 

256 """ 

257 return self.datasetRoot == other.datasetRoot 

258 

259 def __repr__(self): 

260 """A string representation that can be used to reconstruct the dataset. 

261 """ 

262 return f"Dataset({self._id!r})" 

263 

264 def makeCompatibleRepo(self, repoDir, calibRepoDir): 

265 """Set up a directory as a Gen 2 repository compatible with this ap_verify dataset. 

266 

267 If the directory already exists, any files required by the dataset will 

268 be added if absent; otherwise the directory will remain unchanged. 

269 

270 Parameters 

271 ---------- 

272 repoDir : `str` 

273 The directory where the output repository will be created. 

274 calibRepoDir : `str` 

275 The directory where the output calibration repository will be created. 

276 """ 

277 mapperArgs = {'mapperArgs': {'calibRoot': calibRepoDir}} 

278 if _isRepo(self.templateLocation): 

279 # Stub repo is not a parent because can't mix v1 and v2 repositories in parents list 

280 dafPersistence.Butler(inputs=[{"root": self.templateLocation, "mode": "r"}], 

281 outputs=[{"root": repoDir, "mode": "rw", **mapperArgs}]) 

282 else: 

283 dafPersistence.Butler(inputs=[{"root": self._stubInputRepo, "mode": "r"}], 

284 outputs=[{"root": repoDir, "mode": "rw", **mapperArgs}]) 

285 

286 def makeCompatibleRepoGen3(self, repoDir): 

287 """Set up a directory as a Gen 3 repository compatible with this ap_verify dataset. 

288 

289 If the repository already exists, this call has no effect. 

290 

291 Parameters 

292 ---------- 

293 repoDir : `str` 

294 The directory where the output repository will be created. 

295 """ 

296 # No way to tell makeRepo "create only what's missing" 

297 try: 

298 seedConfig = dafButler.Config() 

299 # Checksums greatly slow importing of large repositories 

300 seedConfig["datastore", "checksum"] = False 

301 repoConfig = dafButler.Butler.makeRepo(repoDir, config=seedConfig) 

302 butler = dafButler.Butler(repoConfig, writeable=True) 

303 butler.import_(directory=self._preloadedRepo, filename=self._preloadedExport, 

304 transfer="auto") 

305 except FileExistsError: 

306 pass 

307 

308 

309def _isRepo(repoDir): 

310 """Test whether a directory has been set up as a repository. 

311 """ 

312 return os.path.exists(os.path.join(repoDir, '_mapper')) \ 

313 or os.path.exists(os.path.join(repoDir, 'repositoryCfg.yaml'))