Coverage for python/lsst/ap/verify/dataset.py: 37%

Shortcuts on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

96 statements  

1# 

2# This file is part of ap_verify. 

3# 

4# Developed for the LSST Data Management System. 

5# This product includes software developed by the LSST Project 

6# (http://www.lsst.org). 

7# See the COPYRIGHT file at the top-level directory of this distribution 

8# for details of code ownership. 

9# 

10# This program is free software: you can redistribute it and/or modify 

11# it under the terms of the GNU General Public License as published by 

12# the Free Software Foundation, either version 3 of the License, or 

13# (at your option) any later version. 

14# 

15# This program is distributed in the hope that it will be useful, 

16# but WITHOUT ANY WARRANTY; without even the implied warranty of 

17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

18# GNU General Public License for more details. 

19# 

20# You should have received a copy of the GNU General Public License 

21# along with this program. If not, see <http://www.gnu.org/licenses/>. 

22# 

23 

24__all__ = ["Dataset"] 

25 

26import os 

27 

28import lsst.daf.persistence as dafPersistence 

29import lsst.daf.butler as dafButler 

30import lsst.obs.base as obsBase 

31from lsst.utils import getPackageDir 

32 

33 

34class Dataset: 

35 """A dataset supported by ``ap_verify``. 

36 

37 Any object of this class is guaranteed to represent a ready-for-use 

38 ap_verify dataset, barring concurrent changes to the file system or EUPS 

39 operations. Constructing a Dataset does not create a compatible output 

40 repository(ies), which can be done by calling `makeCompatibleRepo`. 

41 

42 Parameters 

43 ---------- 

44 datasetId : `str` 

45 The name of the dataset package. A tag identifying the dataset is also 

46 accepted, but this usage is deprecated. 

47 

48 Raises 

49 ------ 

50 RuntimeError 

51 Raised if `datasetId` exists, but is not correctly organized or incomplete 

52 ValueError 

53 Raised if `datasetId` could not be loaded. 

54 """ 

55 

56 def __init__(self, datasetId): 

57 self._id = datasetId 

58 

59 try: 

60 self._dataRootDir = getPackageDir(datasetId) 

61 except LookupError as e: 

62 error = f"Cannot find the {datasetId} package; is it set up?" 

63 raise ValueError(error) from e 

64 else: 

65 self._validatePackage() 

66 

67 self._initPackage(datasetId) 

68 

69 def _initPackage(self, name): 

70 """Prepare the package backing this ap_verify dataset. 

71 

72 Parameters 

73 ---------- 

74 name : `str` 

75 The EUPS package identifier for the desired package. 

76 """ 

77 # No initialization required at present 

78 pass 

79 

80 @property 

81 def datasetRoot(self): 

82 """The parent directory containing everything related to the 

83 ap_verify dataset (`str`, read-only). 

84 """ 

85 return self._dataRootDir 

86 

87 @property 

88 def rawLocation(self): 

89 """The directory containing the "raw" input data (`str`, read-only). 

90 """ 

91 return os.path.join(self.datasetRoot, 'raw') 

92 

93 @property 

94 def calibLocation(self): 

95 """The directory containing the calibration data (`str`, read-only). 

96 """ 

97 return os.path.join(self.datasetRoot, 'calib') 

98 

99 @property 

100 def refcatsLocation(self): 

101 """The directory containing external astrometric and photometric 

102 reference catalogs (`str`, read-only). 

103 """ 

104 return os.path.join(self.datasetRoot, 'refcats') 

105 

106 @property 

107 def templateLocation(self): 

108 """The directory containing the image subtraction templates (`str`, read-only). 

109 """ 

110 return os.path.join(self.datasetRoot, 'templates') 

111 

112 @property 

113 def configLocation(self): 

114 """The directory containing configs that can be used to process the data (`str`, read-only). 

115 """ 

116 return os.path.join(self.datasetRoot, 'config') 

117 

118 @property 

119 def pipelineLocation(self): 

120 """The directory containing pipelines that can be used to process the 

121 data in Gen 3 (`str`, read-only). 

122 """ 

123 return os.path.join(self.datasetRoot, 'pipelines') 

124 

125 @property 

126 def obsPackage(self): 

127 """The name of the obs package associated with this data (`str`, read-only). 

128 """ 

129 return dafPersistence.Butler.getMapperClass(self._stubInputRepo).getPackageName() 

130 

131 @property 

132 def camera(self): 

133 """The name of the Gen 2 camera associated with this data (`str`, read-only). 

134 """ 

135 return dafPersistence.Butler.getMapperClass(self._stubInputRepo).getCameraName() 

136 

137 @property 

138 def instrument(self): 

139 """The Gen 3 instrument associated with this data (`lsst.obs.base.Instrument`, read-only). 

140 """ 

141 butler = dafButler.Butler(self._preloadedRepo, writeable=False) 

142 instruments = list(butler.registry.queryDataIds('instrument')) 

143 if len(instruments) != 1: 

144 raise RuntimeError(f"Dataset does not have exactly one instrument; got {instruments}.") 

145 else: 

146 return obsBase.Instrument.fromName(instruments[0]["instrument"], butler.registry) 

147 

148 @property 

149 def _stubInputRepo(self): 

150 """The directory containing the data set's input stub (`str`, read-only). 

151 """ 

152 return os.path.join(self.datasetRoot, 'repo') 

153 

154 @property 

155 def _preloadedRepo(self): 

156 """The directory containing the pre-ingested Gen 3 repo (`str`, read-only). 

157 """ 

158 return os.path.join(self.datasetRoot, 'preloaded') 

159 

160 @property 

161 def _preloadedExport(self): 

162 """The file containing an exported registry of `_preloadedRepo` (`str`, read-only). 

163 """ 

164 return os.path.join(self.configLocation, 'export.yaml') 

165 

166 def _validatePackage(self): 

167 """Confirm that the dataset directory satisfies all assumptions. 

168 

169 Raises 

170 ------ 

171 RuntimeError 

172 Raised if the package represented by this object does not conform to the 

173 dataset framework 

174 

175 Notes 

176 ----- 

177 Requires that `self._dataRootDir` has been initialized. 

178 """ 

179 if not os.path.exists(self.datasetRoot): 

180 raise RuntimeError('Could not find dataset at ' + self.datasetRoot) 

181 if not os.path.exists(self.rawLocation): 

182 raise RuntimeError('Dataset at ' + self.datasetRoot + 'is missing data directory') 

183 if not os.path.exists(self.calibLocation): 

184 raise RuntimeError('Dataset at ' + self.datasetRoot + 'is missing calibration directory') 

185 # Template and refcat directories might not be subdirectories of self.datasetRoot 

186 if not os.path.exists(self.templateLocation): 

187 raise RuntimeError('Dataset is missing template directory at ' + self.templateLocation) 

188 if not os.path.exists(self.refcatsLocation): 

189 raise RuntimeError('Dataset is missing reference catalog directory at ' + self.refcatsLocation) 

190 if not os.path.exists(self._stubInputRepo): 

191 raise RuntimeError('Dataset at ' + self.datasetRoot + 'is missing stub repo') 

192 if not _isRepo(self._stubInputRepo): 

193 raise RuntimeError('Stub repo at ' + self._stubInputRepo + 'is missing mapper file') 

194 

195 def __eq__(self, other): 

196 """Test that two Dataset objects are equal. 

197 

198 Two objects are equal iff they refer to the same ap_verify dataset. 

199 """ 

200 return self.datasetRoot == other.datasetRoot 

201 

202 def __repr__(self): 

203 """A string representation that can be used to reconstruct the dataset. 

204 """ 

205 return f"Dataset({self._id!r})" 

206 

207 def makeCompatibleRepo(self, repoDir, calibRepoDir): 

208 """Set up a directory as a Gen 2 repository compatible with this ap_verify dataset. 

209 

210 If the directory already exists, any files required by the dataset will 

211 be added if absent; otherwise the directory will remain unchanged. 

212 

213 Parameters 

214 ---------- 

215 repoDir : `str` 

216 The directory where the output repository will be created. 

217 calibRepoDir : `str` 

218 The directory where the output calibration repository will be created. 

219 """ 

220 mapperArgs = {'mapperArgs': {'calibRoot': calibRepoDir}} 

221 if _isRepo(self.templateLocation): 

222 # Stub repo is not a parent because can't mix v1 and v2 repositories in parents list 

223 dafPersistence.Butler(inputs=[{"root": self.templateLocation, "mode": "r"}], 

224 outputs=[{"root": repoDir, "mode": "rw", **mapperArgs}]) 

225 else: 

226 dafPersistence.Butler(inputs=[{"root": self._stubInputRepo, "mode": "r"}], 

227 outputs=[{"root": repoDir, "mode": "rw", **mapperArgs}]) 

228 

229 def makeCompatibleRepoGen3(self, repoDir): 

230 """Set up a directory as a Gen 3 repository compatible with this ap_verify dataset. 

231 

232 If the repository already exists, this call has no effect. 

233 

234 Parameters 

235 ---------- 

236 repoDir : `str` 

237 The directory where the output repository will be created. 

238 """ 

239 # No way to tell makeRepo "create only what's missing" 

240 try: 

241 seedConfig = dafButler.Config() 

242 # Checksums greatly slow importing of large repositories 

243 seedConfig["datastore", "checksum"] = False 

244 repoConfig = dafButler.Butler.makeRepo(repoDir, config=seedConfig) 

245 butler = dafButler.Butler(repoConfig, writeable=True) 

246 butler.import_(directory=self._preloadedRepo, filename=self._preloadedExport, 

247 transfer="auto") 

248 except FileExistsError: 

249 pass 

250 

251 

252def _isRepo(repoDir): 

253 """Test whether a directory has been set up as a repository. 

254 """ 

255 return os.path.exists(os.path.join(repoDir, '_mapper')) \ 

256 or os.path.exists(os.path.join(repoDir, 'repositoryCfg.yaml'))