Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# 

2# This file is part of ap_verify. 

3# 

4# Developed for the LSST Data Management System. 

5# This product includes software developed by the LSST Project 

6# (http://www.lsst.org). 

7# See the COPYRIGHT file at the top-level directory of this distribution 

8# for details of code ownership. 

9# 

10# This program is free software: you can redistribute it and/or modify 

11# it under the terms of the GNU General Public License as published by 

12# the Free Software Foundation, either version 3 of the License, or 

13# (at your option) any later version. 

14# 

15# This program is distributed in the hope that it will be useful, 

16# but WITHOUT ANY WARRANTY; without even the implied warranty of 

17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

18# GNU General Public License for more details. 

19# 

20# You should have received a copy of the GNU General Public License 

21# along with this program. If not, see <http://www.gnu.org/licenses/>. 

22# 

23 

24__all__ = ["Dataset"] 

25 

26import os 

27import warnings 

28 

29from deprecated.sphinx import deprecated 

30 

31import lsst.daf.persistence as dafPersistence 

32import lsst.daf.butler as dafButler 

33import lsst.obs.base as obsBase 

34import lsst.pex.exceptions as pexExcept 

35from lsst.utils import getPackageDir 

36 

37from .config import Config 

38 

39 

40class Dataset: 

41 """A dataset supported by ``ap_verify``. 

42 

43 Any object of this class is guaranteed to represent a ready-for-use 

44 ap_verify dataset, barring concurrent changes to the file system or EUPS 

45 operations. Constructing a Dataset does not create a compatible output 

46 repository(ies), which can be done by calling `makeCompatibleRepo`. 

47 

48 Parameters 

49 ---------- 

50 datasetId : `str` 

51 The name of the dataset package. A tag identifying the dataset is also 

52 accepted, but this usage is deprecated. 

53 

54 Raises 

55 ------ 

56 RuntimeError 

57 Raised if `datasetId` exists, but is not correctly organized or incomplete 

58 ValueError 

59 Raised if `datasetId` could not be loaded. 

60 """ 

61 

62 def __init__(self, datasetId): 

63 self._id = datasetId 

64 # daf.persistence.Policy's behavior on missing keys is apparently undefined 

65 # test for __getattr__ *either* raising KeyError or returning None 

66 try: 

67 datasetPackage = self._getDatasetInfo()[datasetId] 

68 if datasetPackage is None: 

69 raise KeyError 

70 else: 

71 warnings.warn(f"The {datasetId} name is deprecated, and will be removed after v24.0. " 

72 f"Use {datasetPackage} instead.", category=FutureWarning) 

73 except KeyError: 

74 # if datasetId not known, assume it's a package name 

75 datasetPackage = datasetId 

76 

77 try: 

78 self._dataRootDir = getPackageDir(datasetPackage) 

79 except pexExcept.NotFoundError as e: 

80 error = f"Cannot find the {datasetPackage} package; is it set up?" 

81 raise ValueError(error) from e 

82 else: 

83 self._validatePackage() 

84 

85 self._initPackage(datasetPackage) 

86 

87 def _initPackage(self, name): 

88 """Prepare the package backing this ap_verify dataset. 

89 

90 Parameters 

91 ---------- 

92 name : `str` 

93 The EUPS package identifier for the desired package. 

94 """ 

95 # No initialization required at present 

96 pass 

97 

98 # TODO: remove in DM-29042 

99 @staticmethod 

100 @deprecated(reason="The concept of 'supported' datasets is deprecated. This " 

101 "method will be removed after v24.0.", version="v22.0", category=FutureWarning) 

102 def getSupportedDatasets(): 

103 """The ap_verify dataset IDs that can be passed to this class's constructor. 

104 

105 Returns 

106 ------- 

107 datasets : `set` of `str` 

108 the set of IDs that will be accepted 

109 

110 Raises 

111 ------ 

112 IoError 

113 Raised if the config file does not exist or is not readable 

114 RuntimeError 

115 Raised if the config file exists, but does not contain the expected data 

116 """ 

117 return Dataset._getDatasetInfo().keys() 

118 

119 # TODO: remove in DM-29042 

120 @staticmethod 

121 def _getDatasetInfo(): 

122 """Return external data on supported ap_verify datasets. 

123 

124 If an exception is raised, the program state shall be unchanged. 

125 

126 Returns 

127 ------- 

128 datasetToPackage : `dict`-like 

129 a map from dataset IDs to package names. 

130 

131 Raises 

132 ------ 

133 RuntimeError 

134 Raised if the config file exists, but does not contain the expected data 

135 """ 

136 return Config.instance['datasets'] 

137 

138 @property 

139 def datasetRoot(self): 

140 """The parent directory containing everything related to the 

141 ap_verify dataset (`str`, read-only). 

142 """ 

143 return self._dataRootDir 

144 

145 @property 

146 def rawLocation(self): 

147 """The directory containing the "raw" input data (`str`, read-only). 

148 """ 

149 return os.path.join(self.datasetRoot, 'raw') 

150 

151 @property 

152 def calibLocation(self): 

153 """The directory containing the calibration data (`str`, read-only). 

154 """ 

155 return os.path.join(self.datasetRoot, 'calib') 

156 

157 @property 

158 def refcatsLocation(self): 

159 """The directory containing external astrometric and photometric 

160 reference catalogs (`str`, read-only). 

161 """ 

162 return os.path.join(self.datasetRoot, 'refcats') 

163 

164 @property 

165 def templateLocation(self): 

166 """The directory containing the image subtraction templates (`str`, read-only). 

167 """ 

168 return os.path.join(self.datasetRoot, 'templates') 

169 

170 @property 

171 def configLocation(self): 

172 """The directory containing configs that can be used to process the data (`str`, read-only). 

173 """ 

174 return os.path.join(self.datasetRoot, 'config') 

175 

176 @property 

177 def obsPackage(self): 

178 """The name of the obs package associated with this data (`str`, read-only). 

179 """ 

180 return dafPersistence.Butler.getMapperClass(self._stubInputRepo).getPackageName() 

181 

182 @property 

183 def camera(self): 

184 """The name of the Gen 2 camera associated with this data (`str`, read-only). 

185 """ 

186 return dafPersistence.Butler.getMapperClass(self._stubInputRepo).getCameraName() 

187 

188 @property 

189 def instrument(self): 

190 """The Gen 3 instrument associated with this data (`lsst.obs.base.Instrument`, read-only). 

191 """ 

192 butler = dafButler.Butler(self._preloadedRepo, writeable=False) 

193 instruments = list(butler.registry.queryDataIds('instrument')) 

194 if len(instruments) != 1: 

195 raise RuntimeError(f"Dataset does not have exactly one instrument; got {instruments}.") 

196 else: 

197 return obsBase.Instrument.fromName(instruments[0]["instrument"], butler.registry) 

198 

199 @property 

200 def _stubInputRepo(self): 

201 """The directory containing the data set's input stub (`str`, read-only). 

202 """ 

203 return os.path.join(self.datasetRoot, 'repo') 

204 

205 @property 

206 def _preloadedRepo(self): 

207 """The directory containing the pre-ingested Gen 3 repo (`str`, read-only). 

208 """ 

209 return os.path.join(self.datasetRoot, 'preloaded') 

210 

211 @property 

212 def _preloadedExport(self): 

213 """The file containing an exported registry of `_preloadedRepo` (`str`, read-only). 

214 """ 

215 return os.path.join(self.configLocation, 'export.yaml') 

216 

217 def _validatePackage(self): 

218 """Confirm that the dataset directory satisfies all assumptions. 

219 

220 Raises 

221 ------ 

222 RuntimeError 

223 Raised if the package represented by this object does not conform to the 

224 dataset framework 

225 

226 Notes 

227 ----- 

228 Requires that `self._dataRootDir` has been initialized. 

229 """ 

230 if not os.path.exists(self.datasetRoot): 

231 raise RuntimeError('Could not find dataset at ' + self.datasetRoot) 

232 if not os.path.exists(self.rawLocation): 

233 raise RuntimeError('Dataset at ' + self.datasetRoot + 'is missing data directory') 

234 if not os.path.exists(self.calibLocation): 

235 raise RuntimeError('Dataset at ' + self.datasetRoot + 'is missing calibration directory') 

236 # Template and refcat directories might not be subdirectories of self.datasetRoot 

237 if not os.path.exists(self.templateLocation): 

238 raise RuntimeError('Dataset is missing template directory at ' + self.templateLocation) 

239 if not os.path.exists(self.refcatsLocation): 

240 raise RuntimeError('Dataset is missing reference catalog directory at ' + self.refcatsLocation) 

241 if not os.path.exists(self._stubInputRepo): 

242 raise RuntimeError('Dataset at ' + self.datasetRoot + 'is missing stub repo') 

243 if not _isRepo(self._stubInputRepo): 

244 raise RuntimeError('Stub repo at ' + self._stubInputRepo + 'is missing mapper file') 

245 

246 def __eq__(self, other): 

247 """Test that two Dataset objects are equal. 

248 

249 Two objects are equal iff they refer to the same ap_verify dataset. 

250 """ 

251 return self.datasetRoot == other.datasetRoot 

252 

253 def __repr__(self): 

254 """A string representation that can be used to reconstruct the dataset. 

255 """ 

256 return f"Dataset({self._id!r})" 

257 

258 def makeCompatibleRepo(self, repoDir, calibRepoDir): 

259 """Set up a directory as a Gen 2 repository compatible with this ap_verify dataset. 

260 

261 If the directory already exists, any files required by the dataset will 

262 be added if absent; otherwise the directory will remain unchanged. 

263 

264 Parameters 

265 ---------- 

266 repoDir : `str` 

267 The directory where the output repository will be created. 

268 calibRepoDir : `str` 

269 The directory where the output calibration repository will be created. 

270 """ 

271 mapperArgs = {'mapperArgs': {'calibRoot': calibRepoDir}} 

272 if _isRepo(self.templateLocation): 

273 # Stub repo is not a parent because can't mix v1 and v2 repositories in parents list 

274 dafPersistence.Butler(inputs=[{"root": self.templateLocation, "mode": "r"}], 

275 outputs=[{"root": repoDir, "mode": "rw", **mapperArgs}]) 

276 else: 

277 dafPersistence.Butler(inputs=[{"root": self._stubInputRepo, "mode": "r"}], 

278 outputs=[{"root": repoDir, "mode": "rw", **mapperArgs}]) 

279 

280 def makeCompatibleRepoGen3(self, repoDir): 

281 """Set up a directory as a Gen 3 repository compatible with this ap_verify dataset. 

282 

283 If the repository already exists, this call has no effect. 

284 

285 Parameters 

286 ---------- 

287 repoDir : `str` 

288 The directory where the output repository will be created. 

289 """ 

290 # No way to tell makeRepo "create only what's missing" 

291 try: 

292 seedConfig = dafButler.Config() 

293 # Checksums greatly slow importing of large repositories 

294 seedConfig["datastore", "checksum"] = False 

295 repoConfig = dafButler.Butler.makeRepo(repoDir, config=seedConfig) 

296 butler = dafButler.Butler(repoConfig, writeable=True) 

297 butler.import_(directory=self._preloadedRepo, filename=self._preloadedExport, 

298 transfer="auto") 

299 except FileExistsError: 

300 pass 

301 

302 

303def _isRepo(repoDir): 

304 """Test whether a directory has been set up as a repository. 

305 """ 

306 return os.path.exists(os.path.join(repoDir, '_mapper')) \ 

307 or os.path.exists(os.path.join(repoDir, 'repositoryCfg.yaml'))