Coverage for tests/test_simpleButler.py: 13%

275 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2022-11-29 02:00 -0800

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24import json 

25import os 

26import re 

27import tempfile 

28import unittest 

29import uuid 

30from typing import Any 

31 

32try: 

33 import numpy as np 

34except ImportError: 

35 np = None 

36 

37import astropy.time 

38from lsst.daf.butler import Butler, ButlerConfig, CollectionType, DatasetRef, DatasetType, Registry, Timespan 

39from lsst.daf.butler.registry import ConflictingDefinitionError, RegistryConfig, RegistryDefaults 

40from lsst.daf.butler.tests import DatastoreMock 

41from lsst.daf.butler.tests.utils import makeTestTempDir, removeTestTempDir 

42 

43TESTDIR = os.path.abspath(os.path.dirname(__file__)) 

44 

45 

46class SimpleButlerTestCase(unittest.TestCase): 

47 """Tests for butler (including import/export functionality) that should not 

48 depend on the Registry Database backend or Datastore implementation, and 

49 can instead utilize an in-memory SQLite Registry and a mocked Datastore. 

50 """ 

51 

52 datasetsManager = "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManager" 

53 datasetsImportFile = "datasets.yaml" 

54 datasetsIdType = int 

55 

56 def setUp(self): 

57 self.root = makeTestTempDir(TESTDIR) 

58 

59 def tearDown(self): 

60 removeTestTempDir(self.root) 

61 

62 def makeButler(self, **kwargs: Any) -> Butler: 

63 """Return new Butler instance on each call.""" 

64 config = ButlerConfig() 

65 

66 # make separate temporary directory for registry of this instance 

67 tmpdir = tempfile.mkdtemp(dir=self.root) 

68 config["registry", "db"] = f"sqlite:///{tmpdir}/gen3.sqlite3" 

69 config["registry", "managers", "datasets"] = self.datasetsManager 

70 config["root"] = self.root 

71 

72 # have to make a registry first 

73 registryConfig = RegistryConfig(config.get("registry")) 

74 if self.datasetsIdType is int: 

75 with self.assertWarns(FutureWarning): 

76 Registry.createFromConfig(registryConfig) 

77 else: 

78 Registry.createFromConfig(registryConfig) 

79 

80 butler = Butler(config, **kwargs) 

81 DatastoreMock.apply(butler) 

82 return butler 

83 

84 def comparableRef(self, ref: DatasetRef) -> DatasetRef: 

85 """Return a DatasetRef that can be compared to a DatasetRef from 

86 other repository. 

87 

88 For repositories that do not support round-trip of ID values this 

89 method returns unresolved DatasetRef, for round-trip-safe repos it 

90 returns unchanged ref. 

91 """ 

92 return ref if self.datasetsIdType is uuid.UUID else ref.unresolved() 

93 

94 def testReadBackwardsCompatibility(self): 

95 """Test that we can read an export file written by a previous version 

96 and commit to the daf_butler git repo. 

97 

98 Notes 

99 ----- 

100 At present this export file includes only dimension data, not datasets, 

101 which greatly limits the usefulness of this test. We should address 

102 this at some point, but I think it's best to wait for the changes to 

103 the export format required for CALIBRATION collections to land. 

104 """ 

105 butler = self.makeButler(writeable=True) 

106 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "hsc-rc2-subset.yaml")) 

107 # Spot-check a few things, but the most important test is just that 

108 # the above does not raise. 

109 self.assertGreaterEqual( 

110 set(record.id for record in butler.registry.queryDimensionRecords("detector", instrument="HSC")), 

111 set(range(104)), # should have all science CCDs; may have some focus ones. 

112 ) 

113 self.assertGreaterEqual( 

114 { 

115 (record.id, record.physical_filter) 

116 for record in butler.registry.queryDimensionRecords("visit", instrument="HSC") 

117 }, 

118 { 

119 (27136, "HSC-Z"), 

120 (11694, "HSC-G"), 

121 (23910, "HSC-R"), 

122 (11720, "HSC-Y"), 

123 (23900, "HSC-R"), 

124 (22646, "HSC-Y"), 

125 (1248, "HSC-I"), 

126 (19680, "HSC-I"), 

127 (1240, "HSC-I"), 

128 (424, "HSC-Y"), 

129 (19658, "HSC-I"), 

130 (344, "HSC-Y"), 

131 (1218, "HSC-R"), 

132 (1190, "HSC-Z"), 

133 (23718, "HSC-R"), 

134 (11700, "HSC-G"), 

135 (26036, "HSC-G"), 

136 (23872, "HSC-R"), 

137 (1170, "HSC-Z"), 

138 (1876, "HSC-Y"), 

139 }, 

140 ) 

141 

142 def testDatasetTransfers(self): 

143 """Test exporting all datasets from a repo and then importing them all 

144 back in again. 

145 """ 

146 # Import data to play with. 

147 butler1 = self.makeButler(writeable=True) 

148 butler1.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml")) 

149 butler1.import_(filename=os.path.join(TESTDIR, "data", "registry", self.datasetsImportFile)) 

150 with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml") as file: 

151 # Export all datasets. 

152 with butler1.export(filename=file.name) as exporter: 

153 exporter.saveDatasets(butler1.registry.queryDatasets(..., collections=...)) 

154 # Import it all again. 

155 butler2 = self.makeButler(writeable=True) 

156 butler2.import_(filename=file.name) 

157 datasets1 = list(butler1.registry.queryDatasets(..., collections=...)) 

158 datasets2 = list(butler2.registry.queryDatasets(..., collections=...)) 

159 self.assertTrue(all(isinstance(ref.id, self.datasetsIdType) for ref in datasets1)) 

160 self.assertTrue(all(isinstance(ref.id, self.datasetsIdType) for ref in datasets2)) 

161 self.assertCountEqual( 

162 [self.comparableRef(ref) for ref in datasets1], 

163 [self.comparableRef(ref) for ref in datasets2], 

164 ) 

165 

166 def testImportTwice(self): 

167 """Test exporting dimension records and datasets from a repo and then 

168 importing them all back in again twice. 

169 """ 

170 if self.datasetsIdType is not uuid.UUID: 

171 self.skipTest("This test can only work for UUIDs") 

172 # Import data to play with. 

173 butler1 = self.makeButler(writeable=True) 

174 butler1.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml")) 

175 butler1.import_(filename=os.path.join(TESTDIR, "data", "registry", self.datasetsImportFile)) 

176 with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml", delete=False) as file: 

177 # Export all datasets. 

178 with butler1.export(filename=file.name) as exporter: 

179 exporter.saveDatasets(butler1.registry.queryDatasets(..., collections=...)) 

180 butler2 = self.makeButler(writeable=True) 

181 # Import it once. 

182 butler2.import_(filename=file.name) 

183 # Import it again 

184 butler2.import_(filename=file.name) 

185 datasets1 = list(butler1.registry.queryDatasets(..., collections=...)) 

186 datasets2 = list(butler2.registry.queryDatasets(..., collections=...)) 

187 self.assertTrue(all(isinstance(ref.id, self.datasetsIdType) for ref in datasets1)) 

188 self.assertTrue(all(isinstance(ref.id, self.datasetsIdType) for ref in datasets2)) 

189 self.assertCountEqual( 

190 [self.comparableRef(ref) for ref in datasets1], 

191 [self.comparableRef(ref) for ref in datasets2], 

192 ) 

193 

194 def testDatasetImportReuseIds(self): 

195 """Test for import that should preserve dataset IDs. 

196 

197 This test assumes that dataset IDs in datasets YAML are different from 

198 what auto-incremental insert would produce. 

199 """ 

200 if self.datasetsIdType is not int: 

201 self.skipTest("This test can only work for UUIDs") 

202 # Import data to play with. 

203 butler = self.makeButler(writeable=True) 

204 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml")) 

205 filename = os.path.join(TESTDIR, "data", "registry", self.datasetsImportFile) 

206 butler.import_(filename=filename, reuseIds=True) 

207 datasets = list(butler.registry.queryDatasets(..., collections=...)) 

208 self.assertTrue(all(isinstance(ref.id, self.datasetsIdType) for ref in datasets)) 

209 # IDs are copied from YAML, list needs to be updated if file contents 

210 # is changed. 

211 self.assertCountEqual( 

212 [ref.id for ref in datasets], 

213 [1001, 1002, 1003, 1010, 1020, 1030, 2001, 2002, 2003, 2010, 2020, 2030, 2040], 

214 ) 

215 

216 # Try once again, it will raise 

217 with self.assertRaises(ConflictingDefinitionError): 

218 butler.import_(filename=filename, reuseIds=True) 

219 

220 def testCollectionTransfers(self): 

221 """Test exporting and then importing collections of various types.""" 

222 # Populate a registry with some datasets. 

223 butler1 = self.makeButler(writeable=True) 

224 butler1.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml")) 

225 butler1.import_(filename=os.path.join(TESTDIR, "data", "registry", self.datasetsImportFile)) 

226 registry1 = butler1.registry 

227 # Add some more collections. 

228 registry1.registerRun("run1") 

229 registry1.registerCollection("tag1", CollectionType.TAGGED) 

230 registry1.registerCollection("calibration1", CollectionType.CALIBRATION) 

231 registry1.registerCollection("chain1", CollectionType.CHAINED) 

232 registry1.registerCollection("chain2", CollectionType.CHAINED) 

233 registry1.setCollectionChain("chain1", ["tag1", "run1", "chain2"]) 

234 registry1.setCollectionChain("chain2", ["calibration1", "run1"]) 

235 # Associate some datasets into the TAGGED and CALIBRATION collections. 

236 flats1 = list(registry1.queryDatasets("flat", collections=...)) 

237 registry1.associate("tag1", flats1) 

238 t1 = astropy.time.Time("2020-01-01T01:00:00", format="isot", scale="tai") 

239 t2 = astropy.time.Time("2020-01-01T02:00:00", format="isot", scale="tai") 

240 t3 = astropy.time.Time("2020-01-01T03:00:00", format="isot", scale="tai") 

241 bias1a = registry1.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g") 

242 bias2a = registry1.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g") 

243 bias3a = registry1.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g") 

244 bias2b = registry1.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r") 

245 bias3b = registry1.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r") 

246 registry1.certify("calibration1", [bias2a, bias3a], Timespan(t1, t2)) 

247 registry1.certify("calibration1", [bias2b], Timespan(t2, None)) 

248 registry1.certify("calibration1", [bias3b], Timespan(t2, t3)) 

249 registry1.certify("calibration1", [bias1a], Timespan.makeEmpty()) 

250 

251 with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml") as file: 

252 # Export all collections, and some datasets. 

253 with butler1.export(filename=file.name) as exporter: 

254 # Sort results to put chain1 before chain2, which is 

255 # intentionally not topological order. 

256 for collection in sorted(registry1.queryCollections()): 

257 exporter.saveCollection(collection) 

258 exporter.saveDatasets(flats1) 

259 exporter.saveDatasets([bias1a, bias2a, bias2b, bias3a, bias3b]) 

260 # Import them into a new registry. 

261 butler2 = self.makeButler(writeable=True) 

262 butler2.import_(filename=file.name) 

263 registry2 = butler2.registry 

264 # Check that it all round-tripped, starting with the collections 

265 # themselves. 

266 self.assertIs(registry2.getCollectionType("run1"), CollectionType.RUN) 

267 self.assertIs(registry2.getCollectionType("tag1"), CollectionType.TAGGED) 

268 self.assertIs(registry2.getCollectionType("calibration1"), CollectionType.CALIBRATION) 

269 self.assertIs(registry2.getCollectionType("chain1"), CollectionType.CHAINED) 

270 self.assertIs(registry2.getCollectionType("chain2"), CollectionType.CHAINED) 

271 self.assertEqual( 

272 list(registry2.getCollectionChain("chain1")), 

273 ["tag1", "run1", "chain2"], 

274 ) 

275 self.assertEqual( 

276 list(registry2.getCollectionChain("chain2")), 

277 ["calibration1", "run1"], 

278 ) 

279 # Check that tag collection contents are the same. 

280 self.maxDiff = None 

281 self.assertCountEqual( 

282 [self.comparableRef(ref) for ref in registry1.queryDatasets(..., collections="tag1")], 

283 [self.comparableRef(ref) for ref in registry2.queryDatasets(..., collections="tag1")], 

284 ) 

285 # Check that calibration collection contents are the same. 

286 self.assertCountEqual( 

287 [ 

288 (self.comparableRef(assoc.ref), assoc.timespan) 

289 for assoc in registry1.queryDatasetAssociations("bias", collections="calibration1") 

290 ], 

291 [ 

292 (self.comparableRef(assoc.ref), assoc.timespan) 

293 for assoc in registry2.queryDatasetAssociations("bias", collections="calibration1") 

294 ], 

295 ) 

296 

297 def testButlerGet(self): 

298 """Test that butler.get can work with different variants.""" 

299 

300 # Import data to play with. 

301 butler = self.makeButler(writeable=True) 

302 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml")) 

303 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", self.datasetsImportFile)) 

304 

305 # Find the DatasetRef for a flat 

306 coll = "imported_g" 

307 flat2g = butler.registry.findDataset( 

308 "flat", instrument="Cam1", detector=2, physical_filter="Cam1-G", collections=coll 

309 ) 

310 

311 # Create a numpy integer to check that works fine 

312 detector_np = np.int64(2) if np else 2 

313 print(type(detector_np)) 

314 

315 # Try to get it using different variations of dataId + keyword 

316 # arguments 

317 # Note that instrument.class_name does not work 

318 variants = ( 

319 (None, {"instrument": "Cam1", "detector": 2, "physical_filter": "Cam1-G"}), 

320 (None, {"instrument": "Cam1", "detector": detector_np, "physical_filter": "Cam1-G"}), 

321 ({"instrument": "Cam1", "detector": 2, "physical_filter": "Cam1-G"}, {}), 

322 ({"instrument": "Cam1", "detector": detector_np, "physical_filter": "Cam1-G"}, {}), 

323 ({"instrument": "Cam1", "detector": 2}, {"physical_filter": "Cam1-G"}), 

324 ({"detector.full_name": "Ab"}, {"instrument": "Cam1", "physical_filter": "Cam1-G"}), 

325 ({"full_name": "Ab"}, {"instrument": "Cam1", "physical_filter": "Cam1-G"}), 

326 (None, {"full_name": "Ab", "instrument": "Cam1", "physical_filter": "Cam1-G"}), 

327 (None, {"detector": "Ab", "instrument": "Cam1", "physical_filter": "Cam1-G"}), 

328 ({"name_in_raft": "b", "raft": "A"}, {"instrument": "Cam1", "physical_filter": "Cam1-G"}), 

329 ({"name_in_raft": "b"}, {"raft": "A", "instrument": "Cam1", "physical_filter": "Cam1-G"}), 

330 (None, {"name_in_raft": "b", "raft": "A", "instrument": "Cam1", "physical_filter": "Cam1-G"}), 

331 ( 

332 {"detector.name_in_raft": "b", "detector.raft": "A"}, 

333 {"instrument": "Cam1", "physical_filter": "Cam1-G"}, 

334 ), 

335 ( 

336 { 

337 "detector.name_in_raft": "b", 

338 "detector.raft": "A", 

339 "instrument": "Cam1", 

340 "physical_filter": "Cam1-G", 

341 }, 

342 {}, 

343 ), 

344 # Duplicate (but valid) information. 

345 (None, {"instrument": "Cam1", "detector": 2, "raft": "A", "physical_filter": "Cam1-G"}), 

346 ({"detector": 2}, {"instrument": "Cam1", "raft": "A", "physical_filter": "Cam1-G"}), 

347 ({"raft": "A"}, {"instrument": "Cam1", "detector": 2, "physical_filter": "Cam1-G"}), 

348 ({"raft": "A"}, {"instrument": "Cam1", "detector": "Ab", "physical_filter": "Cam1-G"}), 

349 ) 

350 

351 for dataId, kwds in variants: 

352 try: 

353 flat_id, _ = butler.get("flat", dataId=dataId, collections=coll, **kwds) 

354 except Exception as e: 

355 raise type(e)(f"{str(e)}: dataId={dataId}, kwds={kwds}") from e 

356 self.assertEqual(flat_id, flat2g.id, msg=f"DataId: {dataId}, kwds: {kwds}") 

357 

358 # Check that bad combinations raise. 

359 variants = ( 

360 # Inconsistent detector information. 

361 (None, {"instrument": "Cam1", "detector": 2, "raft": "B", "physical_filter": "Cam1-G"}), 

362 ({"detector": 2}, {"instrument": "Cam1", "raft": "B", "physical_filter": "Cam1-G"}), 

363 ({"detector": 12}, {"instrument": "Cam1", "raft": "B", "physical_filter": "Cam1-G"}), 

364 ({"raft": "B"}, {"instrument": "Cam1", "detector": 2, "physical_filter": "Cam1-G"}), 

365 ({"raft": "B"}, {"instrument": "Cam1", "detector": "Ab", "physical_filter": "Cam1-G"}), 

366 # Under-specified. 

367 ({"raft": "B"}, {"instrument": "Cam1", "physical_filter": "Cam1-G"}), 

368 # Spurious kwargs. 

369 (None, {"instrument": "Cam1", "detector": 2, "physical_filter": "Cam1-G", "x": "y"}), 

370 ({"x": "y"}, {"instrument": "Cam1", "detector": 2, "physical_filter": "Cam1-G"}), 

371 ) 

372 for dataId, kwds in variants: 

373 with self.assertRaises((ValueError, LookupError)): 

374 butler.get("flat", dataId=dataId, collections=coll, **kwds) 

375 

376 def testGetCalibration(self): 

377 """Test that `Butler.get` can be used to fetch from 

378 `~CollectionType.CALIBRATION` collections if the data ID includes 

379 extra dimensions with temporal information. 

380 """ 

381 # Import data to play with. 

382 butler = self.makeButler(writeable=True) 

383 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml")) 

384 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", self.datasetsImportFile)) 

385 # Certify some biases into a CALIBRATION collection. 

386 registry = butler.registry 

387 registry.registerCollection("calibs", CollectionType.CALIBRATION) 

388 t1 = astropy.time.Time("2020-01-01T01:00:00", format="isot", scale="tai") 

389 t2 = astropy.time.Time("2020-01-01T02:00:00", format="isot", scale="tai") 

390 t3 = astropy.time.Time("2020-01-01T03:00:00", format="isot", scale="tai") 

391 bias2a = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g") 

392 bias3a = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g") 

393 bias2b = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r") 

394 bias3b = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r") 

395 registry.certify("calibs", [bias2a, bias3a], Timespan(t1, t2)) 

396 registry.certify("calibs", [bias2b], Timespan(t2, None)) 

397 registry.certify("calibs", [bias3b], Timespan(t2, t3)) 

398 # Insert some exposure dimension data. 

399 registry.insertDimensionData( 

400 "exposure", 

401 { 

402 "instrument": "Cam1", 

403 "id": 3, 

404 "obs_id": "three", 

405 "timespan": Timespan(t1, t2), 

406 "physical_filter": "Cam1-G", 

407 "day_obs": 20201114, 

408 "seq_num": 55, 

409 }, 

410 { 

411 "instrument": "Cam1", 

412 "id": 4, 

413 "obs_id": "four", 

414 "timespan": Timespan(t2, t3), 

415 "physical_filter": "Cam1-G", 

416 "day_obs": 20211114, 

417 "seq_num": 42, 

418 }, 

419 ) 

420 # Get some biases from raw-like data IDs. 

421 bias2a_id, _ = butler.get( 

422 "bias", {"instrument": "Cam1", "exposure": 3, "detector": 2}, collections="calibs" 

423 ) 

424 self.assertEqual(bias2a_id, bias2a.id) 

425 bias3b_id, _ = butler.get( 

426 "bias", {"instrument": "Cam1", "exposure": 4, "detector": 3}, collections="calibs" 

427 ) 

428 self.assertEqual(bias3b_id, bias3b.id) 

429 

430 # Get using the kwarg form 

431 bias3b_id, _ = butler.get("bias", instrument="Cam1", exposure=4, detector=3, collections="calibs") 

432 self.assertEqual(bias3b_id, bias3b.id) 

433 

434 # Do it again but using the record information 

435 bias2a_id, _ = butler.get( 

436 "bias", 

437 {"instrument": "Cam1", "exposure.obs_id": "three", "detector.full_name": "Ab"}, 

438 collections="calibs", 

439 ) 

440 self.assertEqual(bias2a_id, bias2a.id) 

441 bias3b_id, _ = butler.get( 

442 "bias", 

443 {"exposure.obs_id": "four", "detector.full_name": "Ba"}, 

444 collections="calibs", 

445 instrument="Cam1", 

446 ) 

447 self.assertEqual(bias3b_id, bias3b.id) 

448 

449 # And again but this time using the alternate value rather than 

450 # the primary. 

451 bias3b_id, _ = butler.get( 

452 "bias", {"exposure": "four", "detector": "Ba"}, collections="calibs", instrument="Cam1" 

453 ) 

454 self.assertEqual(bias3b_id, bias3b.id) 

455 

456 # And again but this time using the alternate value rather than 

457 # the primary and do it in the keyword arguments. 

458 bias3b_id, _ = butler.get( 

459 "bias", exposure="four", detector="Ba", collections="calibs", instrument="Cam1" 

460 ) 

461 self.assertEqual(bias3b_id, bias3b.id) 

462 

463 # Now with implied record columns 

464 bias3b_id, _ = butler.get( 

465 "bias", 

466 day_obs=20211114, 

467 seq_num=42, 

468 raft="B", 

469 name_in_raft="a", 

470 collections="calibs", 

471 instrument="Cam1", 

472 ) 

473 self.assertEqual(bias3b_id, bias3b.id) 

474 

475 # Allow a fully-specified dataId and unnecessary extra information 

476 # that comes from the record. 

477 bias3b_id, _ = butler.get( 

478 "bias", 

479 dataId=dict( 

480 exposure=4, 

481 day_obs=20211114, 

482 seq_num=42, 

483 detector=3, 

484 instrument="Cam1", 

485 ), 

486 collections="calibs", 

487 ) 

488 self.assertEqual(bias3b_id, bias3b.id) 

489 

490 # Extra but inconsistent record values are a problem. 

491 with self.assertRaises(ValueError): 

492 bias3b_id, _ = butler.get( 

493 "bias", 

494 exposure=3, 

495 day_obs=20211114, 

496 seq_num=42, 

497 detector=3, 

498 collections="calibs", 

499 instrument="Cam1", 

500 ) 

501 

502 # Ensure that spurious kwargs cause an exception. 

503 with self.assertRaises(ValueError): 

504 butler.get( 

505 "bias", 

506 {"exposure.obs_id": "four", "immediate": True, "detector.full_name": "Ba"}, 

507 collections="calibs", 

508 instrument="Cam1", 

509 ) 

510 

511 with self.assertRaises(ValueError): 

512 butler.get( 

513 "bias", 

514 day_obs=20211114, 

515 seq_num=42, 

516 raft="B", 

517 name_in_raft="a", 

518 collections="calibs", 

519 instrument="Cam1", 

520 immediate=True, 

521 ) 

522 

523 def testRegistryDefaults(self): 

524 """Test that we can default the collections and some data ID keys when 

525 constructing a butler. 

526 

527 Many tests that use default run already exist in ``test_butler.py``, so 

528 that isn't tested here. And while most of this functionality is 

529 implemented in `Registry`, we test it here instead of 

530 ``daf/butler/tests/registry.py`` because it shouldn't depend on the 

531 database backend at all. 

532 """ 

533 butler = self.makeButler(writeable=True) 

534 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml")) 

535 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", self.datasetsImportFile)) 

536 # Need to actually set defaults later, not at construction, because 

537 # we need to import the instrument before we can use it as a default. 

538 # Don't set a default instrument value for data IDs, because 'Cam1' 

539 # should be inferred by virtue of that being the only value in the 

540 # input collections. 

541 butler.registry.defaults = RegistryDefaults(collections=["imported_g"]) 

542 # Use findDataset without collections or instrument. 

543 ref = butler.registry.findDataset("flat", detector=2, physical_filter="Cam1-G") 

544 # Do the same with Butler.get; this should ultimately invoke a lot of 

545 # the same code, so it's a bit circular, but mostly we're checking that 

546 # it works at all. 

547 dataset_id, _ = butler.get("flat", detector=2, physical_filter="Cam1-G") 

548 self.assertEqual(ref.id, dataset_id) 

549 # Query for datasets. Test defaulting the data ID in both kwargs and 

550 # in the WHERE expression. 

551 queried_refs_1 = set(butler.registry.queryDatasets("flat", detector=2, physical_filter="Cam1-G")) 

552 self.assertEqual({ref}, queried_refs_1) 

553 queried_refs_2 = set( 

554 butler.registry.queryDatasets("flat", where="detector=2 AND physical_filter='Cam1-G'") 

555 ) 

556 self.assertEqual({ref}, queried_refs_2) 

557 # Query for data IDs with a dataset constraint. 

558 queried_data_ids = set( 

559 butler.registry.queryDataIds( 

560 {"instrument", "detector", "physical_filter"}, 

561 datasets={"flat"}, 

562 detector=2, 

563 physical_filter="Cam1-G", 

564 ) 

565 ) 

566 self.assertEqual({ref.dataId}, queried_data_ids) 

567 # Add another instrument to the repo, and a dataset that uses it to 

568 # the `imported_g` collection. 

569 butler.registry.insertDimensionData("instrument", {"name": "Cam2"}) 

570 camera = DatasetType( 

571 "camera", 

572 dimensions=butler.registry.dimensions["instrument"].graph, 

573 storageClass="Camera", 

574 ) 

575 butler.registry.registerDatasetType(camera) 

576 butler.registry.insertDatasets(camera, [{"instrument": "Cam2"}], run="imported_g") 

577 # Initialize a new butler with `imported_g` as its default run. 

578 # This should not have a default instrument, because there are two. 

579 # Pass run instead of collections; this should set both. 

580 butler2 = Butler(butler=butler, run="imported_g") 

581 self.assertEqual(list(butler2.registry.defaults.collections), ["imported_g"]) 

582 self.assertEqual(butler2.registry.defaults.run, "imported_g") 

583 self.assertFalse(butler2.registry.defaults.dataId) 

584 # Initialize a new butler with an instrument default explicitly given. 

585 # Set collections instead of run, which should then be None. 

586 butler3 = Butler(butler=butler, collections=["imported_g"], instrument="Cam2") 

587 self.assertEqual(list(butler3.registry.defaults.collections), ["imported_g"]) 

588 self.assertIsNone(butler3.registry.defaults.run, None) 

589 self.assertEqual(butler3.registry.defaults.dataId.byName(), {"instrument": "Cam2"}) 

590 

591 def testJson(self): 

592 """Test JSON serialization mediated by registry.""" 

593 butler = self.makeButler(writeable=True) 

594 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml")) 

595 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", self.datasetsImportFile)) 

596 # Need to actually set defaults later, not at construction, because 

597 # we need to import the instrument before we can use it as a default. 

598 # Don't set a default instrument value for data IDs, because 'Cam1' 

599 # should be inferred by virtue of that being the only value in the 

600 # input collections. 

601 butler.registry.defaults = RegistryDefaults(collections=["imported_g"]) 

602 # Use findDataset without collections or instrument. 

603 ref = butler.registry.findDataset("flat", detector=2, physical_filter="Cam1-G") 

604 

605 # Transform the ref and dataset type to and from JSON 

606 # and check that it can be reconstructed properly 

607 

608 # Do it with the ref and a component ref in minimal and standard form 

609 compRef = ref.makeComponentRef("wcs") 

610 

611 for test_item in (ref, ref.datasetType, compRef, compRef.datasetType): 

612 for minimal in (False, True): 

613 json_str = test_item.to_json(minimal=minimal) 

614 from_json = type(test_item).from_json(json_str, registry=butler.registry) 

615 self.assertEqual(from_json, test_item, msg=f"From JSON '{json_str}' using registry") 

616 

617 # for minimal=False case also do a test without registry 

618 if not minimal: 

619 from_json = type(test_item).from_json(json_str, universe=butler.registry.dimensions) 

620 self.assertEqual(from_json, test_item, msg=f"From JSON '{json_str}' using universe") 

621 

622 def testJsonDimensionRecordsAndHtmlRepresentation(self): 

623 # Dimension Records 

624 butler = self.makeButler(writeable=True) 

625 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "hsc-rc2-subset.yaml")) 

626 

627 for dimension in ("detector", "visit"): 

628 records = butler.registry.queryDimensionRecords(dimension, instrument="HSC") 

629 for r in records: 

630 for minimal in (True, False): 

631 json_str = r.to_json(minimal=minimal) 

632 r_json = type(r).from_json(json_str, registry=butler.registry) 

633 self.assertEqual(r_json, r) 

634 # check with direct method 

635 simple = r.to_simple() 

636 fromDirect = type(simple).direct(**json.loads(json_str)) 

637 self.assertEqual(simple, fromDirect) 

638 # Also check equality of each of the components as dicts 

639 self.assertEqual(r_json.toDict(), r.toDict()) 

640 

641 # check the html representation of records 

642 r_html = r._repr_html_() 

643 self.assertTrue(isinstance(r_html, str)) 

644 self.assertIn(dimension, r_html) 

645 

646 def testWildcardQueries(self): 

647 """Test that different collection type queries work.""" 

648 

649 # Import data to play with. 

650 butler = self.makeButler(writeable=True) 

651 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml")) 

652 

653 # Create some collections 

654 created = {"collection", "u/user/test", "coll3"} 

655 for collection in created: 

656 butler.registry.registerCollection(collection, type=CollectionType.RUN) 

657 

658 collections = butler.registry.queryCollections() 

659 self.assertEqual(set(collections), created) 

660 

661 expressions = ( 

662 ("collection", {"collection"}), 

663 (..., created), 

664 ("*", created), 

665 (("collection", "*"), created), 

666 ("u/*", {"u/user/test"}), 

667 (re.compile("u.*"), {"u/user/test"}), 

668 (re.compile(".*oll.*"), {"collection", "coll3"}), 

669 ("*oll*", {"collection", "coll3"}), 

670 ((re.compile(r".*\d$"), "u/user/test"), {"coll3", "u/user/test"}), 

671 ("*[0-9]", {"coll3"}), 

672 ) 

673 for expression, expected in expressions: 

674 result = butler.registry.queryCollections(expression) 

675 self.assertEqual(set(result), expected) 

676 

677 

678class SimpleButlerUUIDTestCase(SimpleButlerTestCase): 

679 """Same as SimpleButlerTestCase but uses UUID-based datasets manager and 

680 loads datasets from YAML file with UUIDs. 

681 """ 

682 

683 datasetsManager = ( 

684 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID" 

685 ) 

686 datasetsImportFile = "datasets-uuid.yaml" 

687 datasetsIdType = uuid.UUID 

688 

689 

690class SimpleButlerMixedUUIDTestCase(SimpleButlerTestCase): 

691 """Same as SimpleButlerTestCase but uses UUID-based datasets manager and 

692 loads datasets from YAML file with integer IDs. 

693 """ 

694 

695 datasetsManager = ( 

696 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID" 

697 ) 

698 datasetsImportFile = "datasets.yaml" 

699 datasetsIdType = uuid.UUID 

700 

701 

702if __name__ == "__main__": 702 ↛ 703line 702 didn't jump to line 703, because the condition on line 702 was never true

703 unittest.main()