Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# 

2# This file is part of ap_verify. 

3# 

4# Developed for the LSST Data Management System. 

5# This product includes software developed by the LSST Project 

6# (http://www.lsst.org). 

7# See the COPYRIGHT file at the top-level directory of this distribution 

8# for details of code ownership. 

9# 

10# This program is free software: you can redistribute it and/or modify 

11# it under the terms of the GNU General Public License as published by 

12# the Free Software Foundation, either version 3 of the License, or 

13# (at your option) any later version. 

14# 

15# This program is distributed in the hope that it will be useful, 

16# but WITHOUT ANY WARRANTY; without even the implied warranty of 

17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

18# GNU General Public License for more details. 

19# 

20# You should have received a copy of the GNU General Public License 

21# along with this program. If not, see <http://www.gnu.org/licenses/>. 

22# 

23 

24import os 

25 

26import lsst.daf.persistence as dafPersistence 

27import lsst.daf.butler as dafButler 

28import lsst.obs.base as obsBase 

29import lsst.pex.exceptions as pexExcept 

30from lsst.utils import getPackageDir 

31 

32from .config import Config 

33 

34 

35class Dataset: 

36 """A dataset supported by ``ap_verify``. 

37 

38 Any object of this class is guaranteed to represent a ready-for-use 

39 ap_verify dataset, barring concurrent changes to the file system or EUPS 

40 operations. Constructing a Dataset does not create a compatible output 

41 repository(ies), which can be done by calling `makeCompatibleRepo`. 

42 

43 Parameters 

44 ---------- 

45 datasetId : `str` 

46 A tag identifying the dataset. 

47 

48 Raises 

49 ------ 

50 RuntimeError 

51 Raised if `datasetId` exists, but is not correctly organized or incomplete 

52 ValueError 

53 Raised if `datasetId` is not a recognized ap_verify dataset. No side 

54 effects if this exception is raised. 

55 """ 

56 

57 def __init__(self, datasetId): 

58 self._id = datasetId 

59 # daf.persistence.Policy's behavior on missing keys is apparently undefined 

60 # test for __getattr__ *either* raising KeyError or returning None 

61 try: 

62 datasetPackage = self._getDatasetInfo()[datasetId] 

63 if datasetPackage is None: 

64 raise KeyError 

65 except KeyError: 

66 raise ValueError('Unsupported dataset: ' + datasetId) 

67 

68 try: 

69 self._dataRootDir = getPackageDir(datasetPackage) 

70 except pexExcept.NotFoundError as e: 

71 error = 'Dataset %s requires the %s package, which has not been set up.' \ 

72 % (datasetId, datasetPackage) 

73 raise RuntimeError(error) from e 

74 else: 

75 self._validatePackage() 

76 

77 self._initPackage(datasetPackage) 

78 

79 def _initPackage(self, name): 

80 """Prepare the package backing this ap_verify dataset. 

81 

82 Parameters 

83 ---------- 

84 name : `str` 

85 The EUPS package identifier for the desired package. 

86 """ 

87 # No initialization required at present 

88 pass 

89 

90 @staticmethod 

91 def getSupportedDatasets(): 

92 """The ap_verify dataset IDs that can be passed to this class's constructor. 

93 

94 Returns 

95 ------- 

96 datasets : `set` of `str` 

97 the set of IDs that will be accepted 

98 

99 Raises 

100 ------ 

101 IoError 

102 Raised if the config file does not exist or is not readable 

103 RuntimeError 

104 Raised if the config file exists, but does not contain the expected data 

105 """ 

106 return Dataset._getDatasetInfo().keys() 

107 

108 @staticmethod 

109 def _getDatasetInfo(): 

110 """Return external data on supported ap_verify datasets. 

111 

112 If an exception is raised, the program state shall be unchanged. 

113 

114 Returns 

115 ------- 

116 datasetToPackage : `dict`-like 

117 a map from dataset IDs to package names. 

118 

119 Raises 

120 ------ 

121 RuntimeError 

122 Raised if the config file exists, but does not contain the expected data 

123 """ 

124 return Config.instance['datasets'] 

125 

126 @property 

127 def datasetRoot(self): 

128 """The parent directory containing everything related to the 

129 ap_verify dataset (`str`, read-only). 

130 """ 

131 return self._dataRootDir 

132 

133 @property 

134 def rawLocation(self): 

135 """The directory containing the "raw" input data (`str`, read-only). 

136 """ 

137 return os.path.join(self.datasetRoot, 'raw') 

138 

139 @property 

140 def calibLocation(self): 

141 """The directory containing the calibration data (`str`, read-only). 

142 """ 

143 return os.path.join(self.datasetRoot, 'calib') 

144 

145 @property 

146 def refcatsLocation(self): 

147 """The directory containing external astrometric and photometric 

148 reference catalogs (`str`, read-only). 

149 """ 

150 return os.path.join(self.datasetRoot, 'refcats') 

151 

152 @property 

153 def templateLocation(self): 

154 """The directory containing the image subtraction templates (`str`, read-only). 

155 """ 

156 return os.path.join(self.datasetRoot, 'templates') 

157 

158 @property 

159 def configLocation(self): 

160 """The directory containing configs that can be used to process the data (`str`, read-only). 

161 """ 

162 return os.path.join(self.datasetRoot, 'config') 

163 

164 @property 

165 def obsPackage(self): 

166 """The name of the obs package associated with this data (`str`, read-only). 

167 """ 

168 return dafPersistence.Butler.getMapperClass(self._stubInputRepo).getPackageName() 

169 

170 @property 

171 def camera(self): 

172 """The name of the Gen 2 camera associated with this data (`str`, read-only). 

173 """ 

174 return dafPersistence.Butler.getMapperClass(self._stubInputRepo).getCameraName() 

175 

176 @property 

177 def instrument(self): 

178 """The Gen 3 instrument associated with this data (`lsst.obs.base.Instrument`, read-only). 

179 """ 

180 butler = dafButler.Butler(self._preloadedRepo, writeable=False) 

181 instruments = list(butler.registry.queryDataIds('instrument')) 

182 if len(instruments) != 1: 

183 raise RuntimeError(f"Dataset does not have exactly one instrument; got {instruments}.") 

184 else: 

185 return obsBase.Instrument.fromName(instruments[0]["instrument"], butler.registry) 

186 

187 @property 

188 def _stubInputRepo(self): 

189 """The directory containing the data set's input stub (`str`, read-only). 

190 """ 

191 return os.path.join(self.datasetRoot, 'repo') 

192 

193 @property 

194 def _preloadedRepo(self): 

195 """The directory containing the pre-ingested Gen 3 repo (`str`, read-only). 

196 """ 

197 return os.path.join(self.datasetRoot, 'preloaded') 

198 

199 @property 

200 def _preloadedExport(self): 

201 """The file containing an exported registry of `_preloadedRepo` (`str`, read-only). 

202 """ 

203 return os.path.join(self.configLocation, 'export.yaml') 

204 

205 def _validatePackage(self): 

206 """Confirm that the dataset directory satisfies all assumptions. 

207 

208 Raises 

209 ------ 

210 RuntimeError 

211 Raised if the package represented by this object does not conform to the 

212 dataset framework 

213 

214 Notes 

215 ----- 

216 Requires that `self._dataRootDir` has been initialized. 

217 """ 

218 if not os.path.exists(self.datasetRoot): 

219 raise RuntimeError('Could not find dataset at ' + self.datasetRoot) 

220 if not os.path.exists(self.rawLocation): 

221 raise RuntimeError('Dataset at ' + self.datasetRoot + 'is missing data directory') 

222 if not os.path.exists(self.calibLocation): 

223 raise RuntimeError('Dataset at ' + self.datasetRoot + 'is missing calibration directory') 

224 # Template and refcat directories might not be subdirectories of self.datasetRoot 

225 if not os.path.exists(self.templateLocation): 

226 raise RuntimeError('Dataset is missing template directory at ' + self.templateLocation) 

227 if not os.path.exists(self.refcatsLocation): 

228 raise RuntimeError('Dataset is missing reference catalog directory at ' + self.refcatsLocation) 

229 if not os.path.exists(self._stubInputRepo): 

230 raise RuntimeError('Dataset at ' + self.datasetRoot + 'is missing stub repo') 

231 if not _isRepo(self._stubInputRepo): 

232 raise RuntimeError('Stub repo at ' + self._stubInputRepo + 'is missing mapper file') 

233 

234 def __eq__(self, other): 

235 """Test that two Dataset objects are equal. 

236 

237 Two objects are equal iff they refer to the same ap_verify dataset. 

238 """ 

239 return self.datasetRoot == other.datasetRoot 

240 

241 def __repr__(self): 

242 """A string representation that can be used to reconstruct the dataset. 

243 """ 

244 return f"Dataset({self._id!r})" 

245 

246 def makeCompatibleRepo(self, repoDir, calibRepoDir): 

247 """Set up a directory as a Gen 2 repository compatible with this ap_verify dataset. 

248 

249 If the directory already exists, any files required by the dataset will 

250 be added if absent; otherwise the directory will remain unchanged. 

251 

252 Parameters 

253 ---------- 

254 repoDir : `str` 

255 The directory where the output repository will be created. 

256 calibRepoDir : `str` 

257 The directory where the output calibration repository will be created. 

258 """ 

259 mapperArgs = {'mapperArgs': {'calibRoot': calibRepoDir}} 

260 if _isRepo(self.templateLocation): 

261 # Stub repo is not a parent because can't mix v1 and v2 repositories in parents list 

262 dafPersistence.Butler(inputs=[{"root": self.templateLocation, "mode": "r"}], 

263 outputs=[{"root": repoDir, "mode": "rw", **mapperArgs}]) 

264 else: 

265 dafPersistence.Butler(inputs=[{"root": self._stubInputRepo, "mode": "r"}], 

266 outputs=[{"root": repoDir, "mode": "rw", **mapperArgs}]) 

267 

268 def makeCompatibleRepoGen3(self, repoDir): 

269 """Set up a directory as a Gen 3 repository compatible with this ap_verify dataset. 

270 

271 If the repository already exists, this call has no effect. 

272 

273 Parameters 

274 ---------- 

275 repoDir : `str` 

276 The directory where the output repository will be created. 

277 """ 

278 # No way to tell makeRepo "create only what's missing" 

279 try: 

280 seedConfig = dafButler.Config() 

281 # Checksums greatly slow importing of large repositories 

282 seedConfig["datastore", "checksum"] = False 

283 repoConfig = dafButler.Butler.makeRepo(repoDir, config=seedConfig) 

284 butler = dafButler.Butler(repoConfig, writeable=True) 

285 butler.import_(directory=self._preloadedRepo, filename=self._preloadedExport, 

286 transfer="auto") 

287 except FileExistsError: 

288 pass 

289 

290 

291def _isRepo(repoDir): 

292 """Test whether a directory has been set up as a repository. 

293 """ 

294 return os.path.exists(os.path.join(repoDir, '_mapper')) \ 

295 or os.path.exists(os.path.join(repoDir, 'repositoryCfg.yaml'))