Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# 

2# This file is part of ap_verify. 

3# 

4# Developed for the LSST Data Management System. 

5# This product includes software developed by the LSST Project 

6# (http://www.lsst.org). 

7# See the COPYRIGHT file at the top-level directory of this distribution 

8# for details of code ownership. 

9# 

10# This program is free software: you can redistribute it and/or modify 

11# it under the terms of the GNU General Public License as published by 

12# the Free Software Foundation, either version 3 of the License, or 

13# (at your option) any later version. 

14# 

15# This program is distributed in the hope that it will be useful, 

16# but WITHOUT ANY WARRANTY; without even the implied warranty of 

17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

18# GNU General Public License for more details. 

19# 

20# You should have received a copy of the GNU General Public License 

21# along with this program. If not, see <http://www.gnu.org/licenses/>. 

22# 

23 

24import os 

25import warnings 

26 

27from deprecated.sphinx import deprecated 

28 

29import lsst.daf.persistence as dafPersistence 

30import lsst.daf.butler as dafButler 

31import lsst.obs.base as obsBase 

32import lsst.pex.exceptions as pexExcept 

33from lsst.utils import getPackageDir 

34 

35from .config import Config 

36 

37 

38class Dataset: 

39 """A dataset supported by ``ap_verify``. 

40 

41 Any object of this class is guaranteed to represent a ready-for-use 

42 ap_verify dataset, barring concurrent changes to the file system or EUPS 

43 operations. Constructing a Dataset does not create a compatible output 

44 repository(ies), which can be done by calling `makeCompatibleRepo`. 

45 

46 Parameters 

47 ---------- 

48 datasetId : `str` 

49 The name of the dataset package. A tag identifying the dataset is also 

50 accepted, but this usage is deprecated. 

51 

52 Raises 

53 ------ 

54 RuntimeError 

55 Raised if `datasetId` exists, but is not correctly organized or incomplete 

56 ValueError 

57 Raised if `datasetId` could not be loaded. 

58 """ 

59 

60 def __init__(self, datasetId): 

61 self._id = datasetId 

62 # daf.persistence.Policy's behavior on missing keys is apparently undefined 

63 # test for __getattr__ *either* raising KeyError or returning None 

64 try: 

65 datasetPackage = self._getDatasetInfo()[datasetId] 

66 if datasetPackage is None: 

67 raise KeyError 

68 else: 

69 warnings.warn(f"The {datasetId} name is deprecated, and will be removed after v24.0. " 

70 f"Use {datasetPackage} instead.", category=FutureWarning) 

71 except KeyError: 

72 # if datasetId not known, assume it's a package name 

73 datasetPackage = datasetId 

74 

75 try: 

76 self._dataRootDir = getPackageDir(datasetPackage) 

77 except pexExcept.NotFoundError as e: 

78 error = f"Cannot find the {datasetPackage} package; is it set up?" 

79 raise ValueError(error) from e 

80 else: 

81 self._validatePackage() 

82 

83 self._initPackage(datasetPackage) 

84 

85 def _initPackage(self, name): 

86 """Prepare the package backing this ap_verify dataset. 

87 

88 Parameters 

89 ---------- 

90 name : `str` 

91 The EUPS package identifier for the desired package. 

92 """ 

93 # No initialization required at present 

94 pass 

95 

96 # TODO: remove in DM-29042 

97 @staticmethod 

98 @deprecated(reason="The concept of 'supported' datasets is deprecated. This " 

99 "method will be removed after v24.0.", version="v22.0", category=FutureWarning) 

100 def getSupportedDatasets(): 

101 """The ap_verify dataset IDs that can be passed to this class's constructor. 

102 

103 Returns 

104 ------- 

105 datasets : `set` of `str` 

106 the set of IDs that will be accepted 

107 

108 Raises 

109 ------ 

110 IoError 

111 Raised if the config file does not exist or is not readable 

112 RuntimeError 

113 Raised if the config file exists, but does not contain the expected data 

114 """ 

115 return Dataset._getDatasetInfo().keys() 

116 

117 # TODO: remove in DM-29042 

118 @staticmethod 

119 def _getDatasetInfo(): 

120 """Return external data on supported ap_verify datasets. 

121 

122 If an exception is raised, the program state shall be unchanged. 

123 

124 Returns 

125 ------- 

126 datasetToPackage : `dict`-like 

127 a map from dataset IDs to package names. 

128 

129 Raises 

130 ------ 

131 RuntimeError 

132 Raised if the config file exists, but does not contain the expected data 

133 """ 

134 return Config.instance['datasets'] 

135 

136 @property 

137 def datasetRoot(self): 

138 """The parent directory containing everything related to the 

139 ap_verify dataset (`str`, read-only). 

140 """ 

141 return self._dataRootDir 

142 

143 @property 

144 def rawLocation(self): 

145 """The directory containing the "raw" input data (`str`, read-only). 

146 """ 

147 return os.path.join(self.datasetRoot, 'raw') 

148 

149 @property 

150 def calibLocation(self): 

151 """The directory containing the calibration data (`str`, read-only). 

152 """ 

153 return os.path.join(self.datasetRoot, 'calib') 

154 

155 @property 

156 def refcatsLocation(self): 

157 """The directory containing external astrometric and photometric 

158 reference catalogs (`str`, read-only). 

159 """ 

160 return os.path.join(self.datasetRoot, 'refcats') 

161 

162 @property 

163 def templateLocation(self): 

164 """The directory containing the image subtraction templates (`str`, read-only). 

165 """ 

166 return os.path.join(self.datasetRoot, 'templates') 

167 

168 @property 

169 def configLocation(self): 

170 """The directory containing configs that can be used to process the data (`str`, read-only). 

171 """ 

172 return os.path.join(self.datasetRoot, 'config') 

173 

174 @property 

175 def obsPackage(self): 

176 """The name of the obs package associated with this data (`str`, read-only). 

177 """ 

178 return dafPersistence.Butler.getMapperClass(self._stubInputRepo).getPackageName() 

179 

180 @property 

181 def camera(self): 

182 """The name of the Gen 2 camera associated with this data (`str`, read-only). 

183 """ 

184 return dafPersistence.Butler.getMapperClass(self._stubInputRepo).getCameraName() 

185 

186 @property 

187 def instrument(self): 

188 """The Gen 3 instrument associated with this data (`lsst.obs.base.Instrument`, read-only). 

189 """ 

190 butler = dafButler.Butler(self._preloadedRepo, writeable=False) 

191 instruments = list(butler.registry.queryDataIds('instrument')) 

192 if len(instruments) != 1: 

193 raise RuntimeError(f"Dataset does not have exactly one instrument; got {instruments}.") 

194 else: 

195 return obsBase.Instrument.fromName(instruments[0]["instrument"], butler.registry) 

196 

197 @property 

198 def _stubInputRepo(self): 

199 """The directory containing the data set's input stub (`str`, read-only). 

200 """ 

201 return os.path.join(self.datasetRoot, 'repo') 

202 

203 @property 

204 def _preloadedRepo(self): 

205 """The directory containing the pre-ingested Gen 3 repo (`str`, read-only). 

206 """ 

207 return os.path.join(self.datasetRoot, 'preloaded') 

208 

209 @property 

210 def _preloadedExport(self): 

211 """The file containing an exported registry of `_preloadedRepo` (`str`, read-only). 

212 """ 

213 return os.path.join(self.configLocation, 'export.yaml') 

214 

215 def _validatePackage(self): 

216 """Confirm that the dataset directory satisfies all assumptions. 

217 

218 Raises 

219 ------ 

220 RuntimeError 

221 Raised if the package represented by this object does not conform to the 

222 dataset framework 

223 

224 Notes 

225 ----- 

226 Requires that `self._dataRootDir` has been initialized. 

227 """ 

228 if not os.path.exists(self.datasetRoot): 

229 raise RuntimeError('Could not find dataset at ' + self.datasetRoot) 

230 if not os.path.exists(self.rawLocation): 

231 raise RuntimeError('Dataset at ' + self.datasetRoot + 'is missing data directory') 

232 if not os.path.exists(self.calibLocation): 

233 raise RuntimeError('Dataset at ' + self.datasetRoot + 'is missing calibration directory') 

234 # Template and refcat directories might not be subdirectories of self.datasetRoot 

235 if not os.path.exists(self.templateLocation): 

236 raise RuntimeError('Dataset is missing template directory at ' + self.templateLocation) 

237 if not os.path.exists(self.refcatsLocation): 

238 raise RuntimeError('Dataset is missing reference catalog directory at ' + self.refcatsLocation) 

239 if not os.path.exists(self._stubInputRepo): 

240 raise RuntimeError('Dataset at ' + self.datasetRoot + 'is missing stub repo') 

241 if not _isRepo(self._stubInputRepo): 

242 raise RuntimeError('Stub repo at ' + self._stubInputRepo + 'is missing mapper file') 

243 

244 def __eq__(self, other): 

245 """Test that two Dataset objects are equal. 

246 

247 Two objects are equal iff they refer to the same ap_verify dataset. 

248 """ 

249 return self.datasetRoot == other.datasetRoot 

250 

251 def __repr__(self): 

252 """A string representation that can be used to reconstruct the dataset. 

253 """ 

254 return f"Dataset({self._id!r})" 

255 

256 def makeCompatibleRepo(self, repoDir, calibRepoDir): 

257 """Set up a directory as a Gen 2 repository compatible with this ap_verify dataset. 

258 

259 If the directory already exists, any files required by the dataset will 

260 be added if absent; otherwise the directory will remain unchanged. 

261 

262 Parameters 

263 ---------- 

264 repoDir : `str` 

265 The directory where the output repository will be created. 

266 calibRepoDir : `str` 

267 The directory where the output calibration repository will be created. 

268 """ 

269 mapperArgs = {'mapperArgs': {'calibRoot': calibRepoDir}} 

270 if _isRepo(self.templateLocation): 

271 # Stub repo is not a parent because can't mix v1 and v2 repositories in parents list 

272 dafPersistence.Butler(inputs=[{"root": self.templateLocation, "mode": "r"}], 

273 outputs=[{"root": repoDir, "mode": "rw", **mapperArgs}]) 

274 else: 

275 dafPersistence.Butler(inputs=[{"root": self._stubInputRepo, "mode": "r"}], 

276 outputs=[{"root": repoDir, "mode": "rw", **mapperArgs}]) 

277 

278 def makeCompatibleRepoGen3(self, repoDir): 

279 """Set up a directory as a Gen 3 repository compatible with this ap_verify dataset. 

280 

281 If the repository already exists, this call has no effect. 

282 

283 Parameters 

284 ---------- 

285 repoDir : `str` 

286 The directory where the output repository will be created. 

287 """ 

288 # No way to tell makeRepo "create only what's missing" 

289 try: 

290 seedConfig = dafButler.Config() 

291 # Checksums greatly slow importing of large repositories 

292 seedConfig["datastore", "checksum"] = False 

293 repoConfig = dafButler.Butler.makeRepo(repoDir, config=seedConfig) 

294 butler = dafButler.Butler(repoConfig, writeable=True) 

295 butler.import_(directory=self._preloadedRepo, filename=self._preloadedExport, 

296 transfer="auto") 

297 except FileExistsError: 

298 pass 

299 

300 

301def _isRepo(repoDir): 

302 """Test whether a directory has been set up as a repository. 

303 """ 

304 return os.path.exists(os.path.join(repoDir, '_mapper')) \ 

305 or os.path.exists(os.path.join(repoDir, 'repositoryCfg.yaml'))