Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24import os 

25import shutil 

26import tempfile 

27from typing import ( 

28 Any, 

29 Iterable, 

30 Mapping, 

31 Optional, 

32 Tuple, 

33) 

34import unittest 

35import unittest.mock 

36 

37import astropy.time 

38 

39from lsst.daf.butler import ( 

40 Butler, 

41 ButlerConfig, 

42 CollectionType, 

43 DatasetRef, 

44 Datastore, 

45 FileDataset, 

46 Registry, 

47 Timespan, 

48) 

49from lsst.daf.butler.registry import RegistryConfig 

50 

51 

52TESTDIR = os.path.abspath(os.path.dirname(__file__)) 

53 

54 

55def _mock_export(refs: Iterable[DatasetRef], *, 

56 directory: Optional[str] = None, 

57 transfer: Optional[str] = None) -> Iterable[FileDataset]: 

58 """A mock of `Datastore.export` that satisifies the requirement that the 

59 refs passed in are included in the `FileDataset` objects returned. 

60 

61 This can be used to construct a `Datastore` mock that can be used in 

62 repository export via:: 

63 

64 datastore = unittest.mock.Mock(spec=Datastore) 

65 datastore.export = _mock_export 

66 

67 """ 

68 for ref in refs: 

69 yield FileDataset(refs=[ref], 

70 path="mock/path", 

71 formatter="lsst.daf.butler.formatters.json.JsonFormatter") 

72 

73 

74def _mock_get(ref: DatasetRef, parameters: Optional[Mapping[str, Any]] = None 

75 ) -> Tuple[int, Optional[Mapping[str, Any]]]: 

76 """A mock of `Datastore.get` that just returns the integer dataset ID value 

77 and parameters it was given. 

78 """ 

79 return (ref.id, parameters) 

80 

81 

82class SimpleButlerTestCase(unittest.TestCase): 

83 """Tests for butler (including import/export functionality) that should not 

84 depend on the Registry Database backend or Datastore implementation, and 

85 can instead utilize an in-memory SQLite Registry and a mocked Datastore. 

86 """ 

87 

88 def setUp(self): 

89 self.root = tempfile.mkdtemp() 

90 

91 def tearDown(self): 

92 if self.root is not None and os.path.exists(self.root): 

93 shutil.rmtree(self.root, ignore_errors=True) 

94 

95 def makeButler(self, **kwargs: Any) -> Butler: 

96 """Return new Butler instance on each call. 

97 """ 

98 config = ButlerConfig() 

99 

100 # make separate temporary directory for registry of this instance 

101 tmpdir = tempfile.mkdtemp(dir=self.root) 

102 config["registry", "db"] = f"sqlite:///{tmpdir}/gen3.sqlite3" 

103 

104 # have to make a registry first 

105 registryConfig = RegistryConfig(config.get("registry")) 

106 Registry.createFromConfig(registryConfig) 

107 

108 with unittest.mock.patch.object(Datastore, "fromConfig", spec=Datastore.fromConfig): 

109 butler = Butler(config, **kwargs) 

110 butler.datastore.export = _mock_export 

111 butler.datastore.get = _mock_get 

112 return butler 

113 

114 def testReadBackwardsCompatibility(self): 

115 """Test that we can read an export file written by a previous version 

116 and commit to the daf_butler git repo. 

117 

118 Notes 

119 ----- 

120 At present this export file includes only dimension data, not datasets, 

121 which greatly limits the usefulness of this test. We should address 

122 this at some point, but I think it's best to wait for the changes to 

123 the export format required for CALIBRATION collections to land. 

124 """ 

125 butler = self.makeButler(writeable=True) 

126 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "hsc-rc2-subset.yaml")) 

127 # Spot-check a few things, but the most important test is just that 

128 # the above does not raise. 

129 self.assertGreaterEqual( 

130 set(record.id for record in butler.registry.queryDimensionRecords("detector", instrument="HSC")), 

131 set(range(104)), # should have all science CCDs; may have some focus ones. 

132 ) 

133 self.assertGreaterEqual( 

134 { 

135 (record.id, record.physical_filter) 

136 for record in butler.registry.queryDimensionRecords("visit", instrument="HSC") 

137 }, 

138 { 

139 (27136, 'HSC-Z'), 

140 (11694, 'HSC-G'), 

141 (23910, 'HSC-R'), 

142 (11720, 'HSC-Y'), 

143 (23900, 'HSC-R'), 

144 (22646, 'HSC-Y'), 

145 (1248, 'HSC-I'), 

146 (19680, 'HSC-I'), 

147 (1240, 'HSC-I'), 

148 (424, 'HSC-Y'), 

149 (19658, 'HSC-I'), 

150 (344, 'HSC-Y'), 

151 (1218, 'HSC-R'), 

152 (1190, 'HSC-Z'), 

153 (23718, 'HSC-R'), 

154 (11700, 'HSC-G'), 

155 (26036, 'HSC-G'), 

156 (23872, 'HSC-R'), 

157 (1170, 'HSC-Z'), 

158 (1876, 'HSC-Y'), 

159 } 

160 ) 

161 

162 def testDatasetTransfers(self): 

163 """Test exporting all datasets from a repo and then importing them all 

164 back in again. 

165 """ 

166 # Import data to play with. 

167 butler1 = self.makeButler(writeable=True) 

168 butler1.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml")) 

169 butler1.import_(filename=os.path.join(TESTDIR, "data", "registry", "datasets.yaml")) 

170 with tempfile.NamedTemporaryFile(mode='w', suffix=".yaml") as file: 

171 # Export all datasets. 

172 with butler1.export(filename=file.name) as exporter: 

173 exporter.saveDatasets( 

174 butler1.registry.queryDatasets(..., collections=...) 

175 ) 

176 # Import it all again. 

177 butler2 = self.makeButler(writeable=True) 

178 butler2.import_(filename=file.name) 

179 # Check that it all round-tripped. Use unresolved() to make 

180 # comparison not care about dataset_id values, which may be 

181 # rewritten. 

182 self.assertCountEqual( 

183 [ref.unresolved() for ref in butler1.registry.queryDatasets(..., collections=...)], 

184 [ref.unresolved() for ref in butler2.registry.queryDatasets(..., collections=...)], 

185 ) 

186 

187 def testCollectionTransfers(self): 

188 """Test exporting and then importing collections of various types. 

189 """ 

190 # Populate a registry with some datasets. 

191 butler1 = self.makeButler(writeable=True) 

192 butler1.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml")) 

193 butler1.import_(filename=os.path.join(TESTDIR, "data", "registry", "datasets.yaml")) 

194 registry1 = butler1.registry 

195 # Add some more collections. 

196 registry1.registerRun("run1") 

197 registry1.registerCollection("tag1", CollectionType.TAGGED) 

198 registry1.registerCollection("calibration1", CollectionType.CALIBRATION) 

199 registry1.registerCollection("chain1", CollectionType.CHAINED) 

200 registry1.registerCollection("chain2", CollectionType.CHAINED) 

201 registry1.setCollectionChain("chain1", ["tag1", "run1", "chain2"]) 

202 registry1.setCollectionChain("chain2", ["calibration1", "run1"]) 

203 # Associate some datasets into the TAGGED and CALIBRATION collections. 

204 flats1 = list(registry1.queryDatasets("flat", collections=...)) 

205 registry1.associate("tag1", flats1) 

206 t1 = astropy.time.Time('2020-01-01T01:00:00', format="isot", scale="tai") 

207 t2 = astropy.time.Time('2020-01-01T02:00:00', format="isot", scale="tai") 

208 t3 = astropy.time.Time('2020-01-01T03:00:00', format="isot", scale="tai") 

209 bias2a = registry1.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g") 

210 bias3a = registry1.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g") 

211 bias2b = registry1.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r") 

212 bias3b = registry1.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r") 

213 registry1.certify("calibration1", [bias2a, bias3a], Timespan(t1, t2)) 

214 registry1.certify("calibration1", [bias2b], Timespan(t2, None)) 

215 registry1.certify("calibration1", [bias3b], Timespan(t2, t3)) 

216 

217 with tempfile.NamedTemporaryFile(mode='w', suffix=".yaml") as file: 

218 # Export all collections, and some datasets. 

219 with butler1.export(filename=file.name) as exporter: 

220 # Sort results to put chain1 before chain2, which is 

221 # intentionally not topological order. 

222 for collection in sorted(registry1.queryCollections()): 

223 exporter.saveCollection(collection) 

224 exporter.saveDatasets(flats1) 

225 exporter.saveDatasets([bias2a, bias2b, bias3a, bias3b]) 

226 # Import them into a new registry. 

227 butler2 = self.makeButler(writeable=True) 

228 butler2.import_(filename=file.name) 

229 registry2 = butler2.registry 

230 # Check that it all round-tripped, starting with the collections 

231 # themselves. 

232 self.assertIs(registry2.getCollectionType("run1"), CollectionType.RUN) 

233 self.assertIs(registry2.getCollectionType("tag1"), CollectionType.TAGGED) 

234 self.assertIs(registry2.getCollectionType("calibration1"), CollectionType.CALIBRATION) 

235 self.assertIs(registry2.getCollectionType("chain1"), CollectionType.CHAINED) 

236 self.assertIs(registry2.getCollectionType("chain2"), CollectionType.CHAINED) 

237 self.assertEqual( 

238 list(registry2.getCollectionChain("chain1")), 

239 ["tag1", "run1", "chain2"], 

240 ) 

241 self.assertEqual( 

242 list(registry2.getCollectionChain("chain2")), 

243 ["calibration1", "run1"], 

244 ) 

245 # Check that tag collection contents are the same. 

246 self.maxDiff = None 

247 self.assertCountEqual( 

248 [ref.unresolved() for ref in registry1.queryDatasets(..., collections="tag1")], 

249 [ref.unresolved() for ref in registry2.queryDatasets(..., collections="tag1")], 

250 ) 

251 # Check that calibration collection contents are the same. 

252 self.assertCountEqual( 

253 [(assoc.ref.unresolved(), assoc.timespan) 

254 for assoc in registry1.queryDatasetAssociations("bias", collections="calibration1")], 

255 [(assoc.ref.unresolved(), assoc.timespan) 

256 for assoc in registry2.queryDatasetAssociations("bias", collections="calibration1")], 

257 ) 

258 

259 def testGetCalibration(self): 

260 """Test that `Butler.get` can be used to fetch from 

261 `~CollectionType.CALIBRATION` collections if the data ID includes 

262 extra dimensions with temporal information. 

263 """ 

264 # Import data to play with. 

265 butler = self.makeButler(writeable=True) 

266 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml")) 

267 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "datasets.yaml")) 

268 # Certify some biases into a CALIBRATION collection. 

269 registry = butler.registry 

270 registry.registerCollection("calibs", CollectionType.CALIBRATION) 

271 t1 = astropy.time.Time('2020-01-01T01:00:00', format="isot", scale="tai") 

272 t2 = astropy.time.Time('2020-01-01T02:00:00', format="isot", scale="tai") 

273 t3 = astropy.time.Time('2020-01-01T03:00:00', format="isot", scale="tai") 

274 bias2a = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g") 

275 bias3a = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g") 

276 bias2b = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r") 

277 bias3b = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r") 

278 registry.certify("calibs", [bias2a, bias3a], Timespan(t1, t2)) 

279 registry.certify("calibs", [bias2b], Timespan(t2, None)) 

280 registry.certify("calibs", [bias3b], Timespan(t2, t3)) 

281 # Insert some exposure dimension data. 

282 registry.insertDimensionData( 

283 "exposure", 

284 { 

285 "instrument": "Cam1", 

286 "id": 3, 

287 "obs_id": "three", 

288 "timespan": Timespan(t1, t2), 

289 "physical_filter": "Cam1-G", 

290 }, 

291 { 

292 "instrument": "Cam1", 

293 "id": 4, 

294 "obs_id": "four", 

295 "timespan": Timespan(t2, t3), 

296 "physical_filter": "Cam1-G", 

297 }, 

298 ) 

299 # Get some biases from raw-like data IDs. 

300 bias2a_id, _ = butler.get("bias", {"instrument": "Cam1", "exposure": 3, "detector": 2}, 

301 collections="calibs") 

302 self.assertEqual(bias2a_id, bias2a.id) 

303 bias3b_id, _ = butler.get("bias", {"instrument": "Cam1", "exposure": 4, "detector": 3}, 

304 collections="calibs") 

305 self.assertEqual(bias3b_id, bias3b.id) 

306 

307 # Get using the kwarg form 

308 bias3b_id, _ = butler.get("bias", 

309 instrument="Cam1", exposure=4, detector=3, 

310 collections="calibs") 

311 self.assertEqual(bias3b_id, bias3b.id) 

312 

313 # Do it again but using the record information 

314 bias2a_id, _ = butler.get("bias", {"instrument": "Cam1", "exposure.obs_id": "three", 

315 "detector.full_name": "Ab"}, 

316 collections="calibs") 

317 self.assertEqual(bias2a_id, bias2a.id) 

318 bias3b_id, _ = butler.get("bias", {"exposure.obs_id": "four", 

319 "detector.full_name": "Ba"}, 

320 collections="calibs", instrument="Cam1") 

321 self.assertEqual(bias3b_id, bias3b.id) 

322 

323 

324if __name__ == "__main__": 324 ↛ 325line 324 didn't jump to line 325, because the condition on line 324 was never true

325 unittest.main()