Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24import os 

25import tempfile 

26from typing import Any 

27import unittest 

28 

29try: 

30 import numpy as np 

31except ImportError: 

32 np = None 

33 

34import astropy.time 

35 

36from lsst.daf.butler import ( 

37 Butler, 

38 ButlerConfig, 

39 CollectionType, 

40 DatasetType, 

41 Registry, 

42 Timespan, 

43) 

44from lsst.daf.butler.registry import RegistryConfig, RegistryDefaults 

45from lsst.daf.butler.tests import DatastoreMock 

46from lsst.daf.butler.tests.utils import makeTestTempDir, removeTestTempDir 

47 

48 

49TESTDIR = os.path.abspath(os.path.dirname(__file__)) 

50 

51 

52class SimpleButlerTestCase(unittest.TestCase): 

53 """Tests for butler (including import/export functionality) that should not 

54 depend on the Registry Database backend or Datastore implementation, and 

55 can instead utilize an in-memory SQLite Registry and a mocked Datastore. 

56 """ 

57 

58 def setUp(self): 

59 self.root = makeTestTempDir(TESTDIR) 

60 

61 def tearDown(self): 

62 removeTestTempDir(self.root) 

63 

64 def makeButler(self, **kwargs: Any) -> Butler: 

65 """Return new Butler instance on each call. 

66 """ 

67 config = ButlerConfig() 

68 

69 # make separate temporary directory for registry of this instance 

70 tmpdir = tempfile.mkdtemp(dir=self.root) 

71 config["registry", "db"] = f"sqlite:///{tmpdir}/gen3.sqlite3" 

72 config["root"] = self.root 

73 

74 # have to make a registry first 

75 registryConfig = RegistryConfig(config.get("registry")) 

76 Registry.createFromConfig(registryConfig) 

77 

78 butler = Butler(config, **kwargs) 

79 DatastoreMock.apply(butler) 

80 return butler 

81 

82 def testReadBackwardsCompatibility(self): 

83 """Test that we can read an export file written by a previous version 

84 and commit to the daf_butler git repo. 

85 

86 Notes 

87 ----- 

88 At present this export file includes only dimension data, not datasets, 

89 which greatly limits the usefulness of this test. We should address 

90 this at some point, but I think it's best to wait for the changes to 

91 the export format required for CALIBRATION collections to land. 

92 """ 

93 butler = self.makeButler(writeable=True) 

94 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "hsc-rc2-subset.yaml")) 

95 # Spot-check a few things, but the most important test is just that 

96 # the above does not raise. 

97 self.assertGreaterEqual( 

98 set(record.id for record in butler.registry.queryDimensionRecords("detector", instrument="HSC")), 

99 set(range(104)), # should have all science CCDs; may have some focus ones. 

100 ) 

101 self.assertGreaterEqual( 

102 { 

103 (record.id, record.physical_filter) 

104 for record in butler.registry.queryDimensionRecords("visit", instrument="HSC") 

105 }, 

106 { 

107 (27136, 'HSC-Z'), 

108 (11694, 'HSC-G'), 

109 (23910, 'HSC-R'), 

110 (11720, 'HSC-Y'), 

111 (23900, 'HSC-R'), 

112 (22646, 'HSC-Y'), 

113 (1248, 'HSC-I'), 

114 (19680, 'HSC-I'), 

115 (1240, 'HSC-I'), 

116 (424, 'HSC-Y'), 

117 (19658, 'HSC-I'), 

118 (344, 'HSC-Y'), 

119 (1218, 'HSC-R'), 

120 (1190, 'HSC-Z'), 

121 (23718, 'HSC-R'), 

122 (11700, 'HSC-G'), 

123 (26036, 'HSC-G'), 

124 (23872, 'HSC-R'), 

125 (1170, 'HSC-Z'), 

126 (1876, 'HSC-Y'), 

127 } 

128 ) 

129 

130 def testDatasetTransfers(self): 

131 """Test exporting all datasets from a repo and then importing them all 

132 back in again. 

133 """ 

134 # Import data to play with. 

135 butler1 = self.makeButler(writeable=True) 

136 butler1.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml")) 

137 butler1.import_(filename=os.path.join(TESTDIR, "data", "registry", "datasets.yaml")) 

138 with tempfile.NamedTemporaryFile(mode='w', suffix=".yaml") as file: 

139 # Export all datasets. 

140 with butler1.export(filename=file.name) as exporter: 

141 exporter.saveDatasets( 

142 butler1.registry.queryDatasets(..., collections=...) 

143 ) 

144 # Import it all again. 

145 butler2 = self.makeButler(writeable=True) 

146 butler2.import_(filename=file.name) 

147 # Check that it all round-tripped. Use unresolved() to make 

148 # comparison not care about dataset_id values, which may be 

149 # rewritten. 

150 self.assertCountEqual( 

151 [ref.unresolved() for ref in butler1.registry.queryDatasets(..., collections=...)], 

152 [ref.unresolved() for ref in butler2.registry.queryDatasets(..., collections=...)], 

153 ) 

154 

155 def testCollectionTransfers(self): 

156 """Test exporting and then importing collections of various types. 

157 """ 

158 # Populate a registry with some datasets. 

159 butler1 = self.makeButler(writeable=True) 

160 butler1.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml")) 

161 butler1.import_(filename=os.path.join(TESTDIR, "data", "registry", "datasets.yaml")) 

162 registry1 = butler1.registry 

163 # Add some more collections. 

164 registry1.registerRun("run1") 

165 registry1.registerCollection("tag1", CollectionType.TAGGED) 

166 registry1.registerCollection("calibration1", CollectionType.CALIBRATION) 

167 registry1.registerCollection("chain1", CollectionType.CHAINED) 

168 registry1.registerCollection("chain2", CollectionType.CHAINED) 

169 registry1.setCollectionChain("chain1", ["tag1", "run1", "chain2"]) 

170 registry1.setCollectionChain("chain2", ["calibration1", "run1"]) 

171 # Associate some datasets into the TAGGED and CALIBRATION collections. 

172 flats1 = list(registry1.queryDatasets("flat", collections=...)) 

173 registry1.associate("tag1", flats1) 

174 t1 = astropy.time.Time('2020-01-01T01:00:00', format="isot", scale="tai") 

175 t2 = astropy.time.Time('2020-01-01T02:00:00', format="isot", scale="tai") 

176 t3 = astropy.time.Time('2020-01-01T03:00:00', format="isot", scale="tai") 

177 bias2a = registry1.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g") 

178 bias3a = registry1.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g") 

179 bias2b = registry1.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r") 

180 bias3b = registry1.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r") 

181 registry1.certify("calibration1", [bias2a, bias3a], Timespan(t1, t2)) 

182 registry1.certify("calibration1", [bias2b], Timespan(t2, None)) 

183 registry1.certify("calibration1", [bias3b], Timespan(t2, t3)) 

184 

185 with tempfile.NamedTemporaryFile(mode='w', suffix=".yaml") as file: 

186 # Export all collections, and some datasets. 

187 with butler1.export(filename=file.name) as exporter: 

188 # Sort results to put chain1 before chain2, which is 

189 # intentionally not topological order. 

190 for collection in sorted(registry1.queryCollections()): 

191 exporter.saveCollection(collection) 

192 exporter.saveDatasets(flats1) 

193 exporter.saveDatasets([bias2a, bias2b, bias3a, bias3b]) 

194 # Import them into a new registry. 

195 butler2 = self.makeButler(writeable=True) 

196 butler2.import_(filename=file.name) 

197 registry2 = butler2.registry 

198 # Check that it all round-tripped, starting with the collections 

199 # themselves. 

200 self.assertIs(registry2.getCollectionType("run1"), CollectionType.RUN) 

201 self.assertIs(registry2.getCollectionType("tag1"), CollectionType.TAGGED) 

202 self.assertIs(registry2.getCollectionType("calibration1"), CollectionType.CALIBRATION) 

203 self.assertIs(registry2.getCollectionType("chain1"), CollectionType.CHAINED) 

204 self.assertIs(registry2.getCollectionType("chain2"), CollectionType.CHAINED) 

205 self.assertEqual( 

206 list(registry2.getCollectionChain("chain1")), 

207 ["tag1", "run1", "chain2"], 

208 ) 

209 self.assertEqual( 

210 list(registry2.getCollectionChain("chain2")), 

211 ["calibration1", "run1"], 

212 ) 

213 # Check that tag collection contents are the same. 

214 self.maxDiff = None 

215 self.assertCountEqual( 

216 [ref.unresolved() for ref in registry1.queryDatasets(..., collections="tag1")], 

217 [ref.unresolved() for ref in registry2.queryDatasets(..., collections="tag1")], 

218 ) 

219 # Check that calibration collection contents are the same. 

220 self.assertCountEqual( 

221 [(assoc.ref.unresolved(), assoc.timespan) 

222 for assoc in registry1.queryDatasetAssociations("bias", collections="calibration1")], 

223 [(assoc.ref.unresolved(), assoc.timespan) 

224 for assoc in registry2.queryDatasetAssociations("bias", collections="calibration1")], 

225 ) 

226 

227 def testButlerGet(self): 

228 """Test that butler.get can work with different variants.""" 

229 

230 # Import data to play with. 

231 butler = self.makeButler(writeable=True) 

232 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml")) 

233 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "datasets.yaml")) 

234 

235 # Find the DatasetRef for a flat 

236 coll = "imported_g" 

237 flat2g = butler.registry.findDataset("flat", instrument="Cam1", detector=2, physical_filter="Cam1-G", 

238 collections=coll) 

239 

240 # Create a numpy integer to check that works fine 

241 detector_np = np.int64(2) if np else 2 

242 print(type(detector_np)) 

243 

244 # Try to get it using different variations of dataId + keyword 

245 # arguments 

246 # Note that instrument.class_name does not work 

247 variants = ( 

248 (None, {"instrument": "Cam1", "detector": 2, "physical_filter": "Cam1-G"}), 

249 (None, {"instrument": "Cam1", "detector": detector_np, "physical_filter": "Cam1-G"}), 

250 ({"instrument": "Cam1", "detector": 2, "physical_filter": "Cam1-G"}, {}), 

251 ({"instrument": "Cam1", "detector": detector_np, "physical_filter": "Cam1-G"}, {}), 

252 ({"instrument": "Cam1", "detector": 2}, {"physical_filter": "Cam1-G"}), 

253 ({"detector.full_name": "Ab"}, {"instrument": "Cam1", "physical_filter": "Cam1-G"}), 

254 ({"full_name": "Ab"}, {"instrument": "Cam1", "physical_filter": "Cam1-G"}), 

255 (None, {"full_name": "Ab", "instrument": "Cam1", "physical_filter": "Cam1-G"}), 

256 ({"name_in_raft": "b", "raft": "A"}, {"instrument": "Cam1", "physical_filter": "Cam1-G"}), 

257 ({"name_in_raft": "b"}, {"raft": "A", "instrument": "Cam1", "physical_filter": "Cam1-G"}), 

258 (None, {"name_in_raft": "b", "raft": "A", "instrument": "Cam1", "physical_filter": "Cam1-G"}), 

259 ({"detector.name_in_raft": "b", "detector.raft": "A"}, 

260 {"instrument": "Cam1", "physical_filter": "Cam1-G"}), 

261 ({"detector.name_in_raft": "b", "detector.raft": "A", 

262 "instrument": "Cam1", "physical_filter": "Cam1-G"}, {}), 

263 ) 

264 

265 for dataId, kwds in variants: 

266 try: 

267 flat_id, _ = butler.get("flat", dataId=dataId, collections=coll, **kwds) 

268 except Exception as e: 

269 raise type(e)(f"{str(e)}: dataId={dataId}, kwds={kwds}") from e 

270 self.assertEqual(flat_id, flat2g.id, msg=f"DataId: {dataId}, kwds: {kwds}") 

271 

272 def testGetCalibration(self): 

273 """Test that `Butler.get` can be used to fetch from 

274 `~CollectionType.CALIBRATION` collections if the data ID includes 

275 extra dimensions with temporal information. 

276 """ 

277 # Import data to play with. 

278 butler = self.makeButler(writeable=True) 

279 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml")) 

280 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "datasets.yaml")) 

281 # Certify some biases into a CALIBRATION collection. 

282 registry = butler.registry 

283 registry.registerCollection("calibs", CollectionType.CALIBRATION) 

284 t1 = astropy.time.Time('2020-01-01T01:00:00', format="isot", scale="tai") 

285 t2 = astropy.time.Time('2020-01-01T02:00:00', format="isot", scale="tai") 

286 t3 = astropy.time.Time('2020-01-01T03:00:00', format="isot", scale="tai") 

287 bias2a = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g") 

288 bias3a = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g") 

289 bias2b = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r") 

290 bias3b = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r") 

291 registry.certify("calibs", [bias2a, bias3a], Timespan(t1, t2)) 

292 registry.certify("calibs", [bias2b], Timespan(t2, None)) 

293 registry.certify("calibs", [bias3b], Timespan(t2, t3)) 

294 # Insert some exposure dimension data. 

295 registry.insertDimensionData( 

296 "exposure", 

297 { 

298 "instrument": "Cam1", 

299 "id": 3, 

300 "obs_id": "three", 

301 "timespan": Timespan(t1, t2), 

302 "physical_filter": "Cam1-G", 

303 "day_obs": 20201114, 

304 "seq_num": 55, 

305 }, 

306 { 

307 "instrument": "Cam1", 

308 "id": 4, 

309 "obs_id": "four", 

310 "timespan": Timespan(t2, t3), 

311 "physical_filter": "Cam1-G", 

312 "day_obs": 20211114, 

313 "seq_num": 42, 

314 }, 

315 ) 

316 # Get some biases from raw-like data IDs. 

317 bias2a_id, _ = butler.get("bias", {"instrument": "Cam1", "exposure": 3, "detector": 2}, 

318 collections="calibs") 

319 self.assertEqual(bias2a_id, bias2a.id) 

320 bias3b_id, _ = butler.get("bias", {"instrument": "Cam1", "exposure": 4, "detector": 3}, 

321 collections="calibs") 

322 self.assertEqual(bias3b_id, bias3b.id) 

323 

324 # Get using the kwarg form 

325 bias3b_id, _ = butler.get("bias", 

326 instrument="Cam1", exposure=4, detector=3, 

327 collections="calibs") 

328 self.assertEqual(bias3b_id, bias3b.id) 

329 

330 # Do it again but using the record information 

331 bias2a_id, _ = butler.get("bias", {"instrument": "Cam1", "exposure.obs_id": "three", 

332 "detector.full_name": "Ab"}, 

333 collections="calibs") 

334 self.assertEqual(bias2a_id, bias2a.id) 

335 bias3b_id, _ = butler.get("bias", {"exposure.obs_id": "four", 

336 "detector.full_name": "Ba"}, 

337 collections="calibs", instrument="Cam1") 

338 self.assertEqual(bias3b_id, bias3b.id) 

339 

340 # And again but this time using the alternate value rather than 

341 # the primary. 

342 bias3b_id, _ = butler.get("bias", {"exposure": "four", 

343 "detector": "Ba"}, 

344 collections="calibs", instrument="Cam1") 

345 self.assertEqual(bias3b_id, bias3b.id) 

346 

347 # And again but this time using the alternate value rather than 

348 # the primary and do it in the keyword arguments. 

349 bias3b_id, _ = butler.get("bias", 

350 exposure="four", detector="Ba", 

351 collections="calibs", instrument="Cam1") 

352 self.assertEqual(bias3b_id, bias3b.id) 

353 

354 # Now with implied record columns 

355 bias3b_id, _ = butler.get("bias", day_obs=20211114, seq_num=42, 

356 raft="B", name_in_raft="a", 

357 collections="calibs", instrument="Cam1") 

358 self.assertEqual(bias3b_id, bias3b.id) 

359 

360 def testRegistryDefaults(self): 

361 """Test that we can default the collections and some data ID keys when 

362 constructing a butler. 

363 

364 Many tests that use default run already exist in ``test_butler.py``, so 

365 that isn't tested here. And while most of this functionality is 

366 implemented in `Registry`, we test it here instead of 

367 ``daf/butler/tests/registry.py`` because it shouldn't depend on the 

368 database backend at all. 

369 """ 

370 butler = self.makeButler(writeable=True) 

371 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml")) 

372 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "datasets.yaml")) 

373 # Need to actually set defaults later, not at construction, because 

374 # we need to import the instrument before we can use it as a default. 

375 # Don't set a default instrument value for data IDs, because 'Cam1' 

376 # should be inferred by virtue of that being the only value in the 

377 # input collections. 

378 butler.registry.defaults = RegistryDefaults(collections=["imported_g"]) 

379 # Use findDataset without collections or instrument. 

380 ref = butler.registry.findDataset("flat", detector=2, physical_filter="Cam1-G") 

381 # Do the same with Butler.get; this should ultimately invoke a lot of 

382 # the same code, so it's a bit circular, but mostly we're checking that 

383 # it works at all. 

384 dataset_id, _ = butler.get("flat", detector=2, physical_filter="Cam1-G") 

385 self.assertEqual(ref.id, dataset_id) 

386 # Query for datasets. Test defaulting the data ID in both kwargs and 

387 # in the WHERE expression. 

388 queried_refs_1 = set(butler.registry.queryDatasets("flat", detector=2, physical_filter="Cam1-G")) 

389 self.assertEqual({ref}, queried_refs_1) 

390 queried_refs_2 = set(butler.registry.queryDatasets("flat", 

391 where="detector=2 AND physical_filter='Cam1-G'")) 

392 self.assertEqual({ref}, queried_refs_2) 

393 # Query for data IDs with a dataset constraint. 

394 queried_data_ids = set(butler.registry.queryDataIds({"instrument", "detector", "physical_filter"}, 

395 datasets={"flat"}, 

396 detector=2, physical_filter="Cam1-G")) 

397 self.assertEqual({ref.dataId}, queried_data_ids) 

398 # Add another instrument to the repo, and a dataset that uses it to 

399 # the `imported_g` collection. 

400 butler.registry.insertDimensionData("instrument", {"name": "Cam2"}) 

401 camera = DatasetType( 

402 "camera", 

403 dimensions=butler.registry.dimensions["instrument"].graph, 

404 storageClass="Camera", 

405 ) 

406 butler.registry.registerDatasetType(camera) 

407 butler.registry.insertDatasets(camera, [{"instrument": "Cam2"}], run="imported_g") 

408 # Initialize a new butler with `imported_g` as its default run. 

409 # This should not have a default instrument, because there are two. 

410 # Pass run instead of collections; this should set both. 

411 butler2 = Butler(butler=butler, run="imported_g") 

412 self.assertEqual(list(butler2.registry.defaults.collections), ["imported_g"]) 

413 self.assertEqual(butler2.registry.defaults.run, "imported_g") 

414 self.assertFalse(butler2.registry.defaults.dataId) 

415 # Initialize a new butler with an instrument default explicitly given. 

416 # Set collections instead of run, which should then be None. 

417 butler3 = Butler(butler=butler, collections=["imported_g"], instrument="Cam2") 

418 self.assertEqual(list(butler3.registry.defaults.collections), ["imported_g"]) 

419 self.assertIsNone(butler3.registry.defaults.run, None) 

420 self.assertEqual(butler3.registry.defaults.dataId.byName(), {"instrument": "Cam2"}) 

421 

422 def testJson(self): 

423 """Test JSON serialization mediated by registry. 

424 """ 

425 butler = self.makeButler(writeable=True) 

426 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml")) 

427 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "datasets.yaml")) 

428 # Need to actually set defaults later, not at construction, because 

429 # we need to import the instrument before we can use it as a default. 

430 # Don't set a default instrument value for data IDs, because 'Cam1' 

431 # should be inferred by virtue of that being the only value in the 

432 # input collections. 

433 butler.registry.defaults = RegistryDefaults(collections=["imported_g"]) 

434 # Use findDataset without collections or instrument. 

435 ref = butler.registry.findDataset("flat", detector=2, physical_filter="Cam1-G") 

436 

437 # Transform the ref and dataset type to and from JSON 

438 # and check that it can be reconstructed properly 

439 

440 # Do it with the ref and a component ref in minimal and standard form 

441 compRef = ref.makeComponentRef("wcs") 

442 

443 for test_item in (ref, ref.datasetType, compRef, compRef.datasetType): 

444 for minimal in (False, True): 

445 json_str = test_item.to_json(minimal=minimal) 

446 from_json = type(test_item).from_json(json_str, registry=butler.registry) 

447 self.assertEqual(from_json, test_item, msg=f"From JSON '{json_str}' using registry") 

448 

449 # for minimal=False case also do a test without registry 

450 if not minimal: 

451 from_json = type(test_item).from_json(json_str, universe=butler.registry.dimensions) 

452 self.assertEqual(from_json, test_item, msg=f"From JSON '{json_str}' using universe") 

453 

454 def testJsonDimensionRecords(self): 

455 # Dimension Records 

456 butler = self.makeButler(writeable=True) 

457 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "hsc-rc2-subset.yaml")) 

458 

459 for dimension in ("detector", "visit"): 

460 records = butler.registry.queryDimensionRecords(dimension, instrument="HSC") 

461 for r in records: 

462 for minimal in (True, False): 

463 json_str = r.to_json(minimal=minimal) 

464 r_json = type(r).from_json(json_str, registry=butler.registry) 

465 self.assertEqual(r_json, r) 

466 # Also check equality of each of the components as dicts 

467 self.assertEqual(r_json.toDict(), r.toDict()) 

468 

469 

470if __name__ == "__main__": 470 ↛ 471line 470 didn't jump to line 471, because the condition on line 470 was never true

471 unittest.main()