Coverage for tests/test_simpleButler.py: 11%

256 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-07-12 10:56 -0700

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24import json 

25import os 

26import re 

27import tempfile 

28import unittest 

29from typing import Any 

30 

31try: 

32 import numpy as np 

33except ImportError: 

34 np = None 

35 

36import astropy.time 

37from lsst.daf.butler import ( 

38 Butler, 

39 ButlerConfig, 

40 CollectionType, 

41 DatasetId, 

42 DatasetRef, 

43 DatasetType, 

44 Registry, 

45 Timespan, 

46) 

47from lsst.daf.butler.registry import RegistryConfig, RegistryDefaults 

48from lsst.daf.butler.tests import DatastoreMock 

49from lsst.daf.butler.tests.utils import makeTestTempDir, removeTestTempDir 

50 

51TESTDIR = os.path.abspath(os.path.dirname(__file__)) 

52 

53 

54class SimpleButlerTestCase(unittest.TestCase): 

55 """Tests for butler (including import/export functionality) that should not 

56 depend on the Registry Database backend or Datastore implementation, and 

57 can instead utilize an in-memory SQLite Registry and a mocked Datastore. 

58 """ 

59 

60 datasetsManager = ( 

61 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID" 

62 ) 

63 datasetsImportFile = "datasets-uuid.yaml" 

64 

65 def setUp(self): 

66 self.root = makeTestTempDir(TESTDIR) 

67 

68 def tearDown(self): 

69 removeTestTempDir(self.root) 

70 

71 def makeButler(self, **kwargs: Any) -> Butler: 

72 """Return new Butler instance on each call.""" 

73 config = ButlerConfig() 

74 

75 # make separate temporary directory for registry of this instance 

76 tmpdir = tempfile.mkdtemp(dir=self.root) 

77 config["registry", "db"] = f"sqlite:///{tmpdir}/gen3.sqlite3" 

78 config["registry", "managers", "datasets"] = self.datasetsManager 

79 config["root"] = self.root 

80 

81 # have to make a registry first 

82 registryConfig = RegistryConfig(config.get("registry")) 

83 Registry.createFromConfig(registryConfig) 

84 

85 butler = Butler(config, **kwargs) 

86 DatastoreMock.apply(butler) 

87 return butler 

88 

89 def comparableRef(self, ref: DatasetRef) -> DatasetRef: 

90 """Return a DatasetRef that can be compared to a DatasetRef from 

91 other repository. 

92 

93 For repositories that do not support round-trip of ID values this 

94 method returns unresolved DatasetRef, for round-trip-safe repos it 

95 returns unchanged ref. 

96 """ 

97 return ref 

98 

99 def testReadBackwardsCompatibility(self): 

100 """Test that we can read an export file written by a previous version 

101 and commit to the daf_butler git repo. 

102 

103 Notes 

104 ----- 

105 At present this export file includes only dimension data, not datasets, 

106 which greatly limits the usefulness of this test. We should address 

107 this at some point, but I think it's best to wait for the changes to 

108 the export format required for CALIBRATION collections to land. 

109 """ 

110 butler = self.makeButler(writeable=True) 

111 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "hsc-rc2-subset.yaml")) 

112 # Spot-check a few things, but the most important test is just that 

113 # the above does not raise. 

114 self.assertGreaterEqual( 

115 {record.id for record in butler.registry.queryDimensionRecords("detector", instrument="HSC")}, 

116 set(range(104)), # should have all science CCDs; may have some focus ones. 

117 ) 

118 self.assertGreaterEqual( 

119 { 

120 (record.id, record.physical_filter) 

121 for record in butler.registry.queryDimensionRecords("visit", instrument="HSC") 

122 }, 

123 { 

124 (27136, "HSC-Z"), 

125 (11694, "HSC-G"), 

126 (23910, "HSC-R"), 

127 (11720, "HSC-Y"), 

128 (23900, "HSC-R"), 

129 (22646, "HSC-Y"), 

130 (1248, "HSC-I"), 

131 (19680, "HSC-I"), 

132 (1240, "HSC-I"), 

133 (424, "HSC-Y"), 

134 (19658, "HSC-I"), 

135 (344, "HSC-Y"), 

136 (1218, "HSC-R"), 

137 (1190, "HSC-Z"), 

138 (23718, "HSC-R"), 

139 (11700, "HSC-G"), 

140 (26036, "HSC-G"), 

141 (23872, "HSC-R"), 

142 (1170, "HSC-Z"), 

143 (1876, "HSC-Y"), 

144 }, 

145 ) 

146 

147 def testDatasetTransfers(self): 

148 """Test exporting all datasets from a repo and then importing them all 

149 back in again. 

150 """ 

151 # Import data to play with. 

152 butler1 = self.makeButler(writeable=True) 

153 butler1.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml")) 

154 butler1.import_(filename=os.path.join(TESTDIR, "data", "registry", self.datasetsImportFile)) 

155 with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml") as file: 

156 # Export all datasets. 

157 with butler1.export(filename=file.name) as exporter: 

158 exporter.saveDatasets(butler1.registry.queryDatasets(..., collections=...)) 

159 # Import it all again. 

160 butler2 = self.makeButler(writeable=True) 

161 butler2.import_(filename=file.name) 

162 datasets1 = list(butler1.registry.queryDatasets(..., collections=...)) 

163 datasets2 = list(butler2.registry.queryDatasets(..., collections=...)) 

164 self.assertTrue(all(isinstance(ref.id, DatasetId) for ref in datasets1)) 

165 self.assertTrue(all(isinstance(ref.id, DatasetId) for ref in datasets2)) 

166 self.assertCountEqual( 

167 [self.comparableRef(ref) for ref in datasets1], 

168 [self.comparableRef(ref) for ref in datasets2], 

169 ) 

170 

171 def testImportTwice(self): 

172 """Test exporting dimension records and datasets from a repo and then 

173 importing them all back in again twice. 

174 """ 

175 # Import data to play with. 

176 butler1 = self.makeButler(writeable=True) 

177 butler1.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml")) 

178 butler1.import_(filename=os.path.join(TESTDIR, "data", "registry", self.datasetsImportFile)) 

179 with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml", delete=False) as file: 

180 # Export all datasets. 

181 with butler1.export(filename=file.name) as exporter: 

182 exporter.saveDatasets(butler1.registry.queryDatasets(..., collections=...)) 

183 butler2 = self.makeButler(writeable=True) 

184 # Import it once. 

185 butler2.import_(filename=file.name) 

186 # Import it again 

187 butler2.import_(filename=file.name) 

188 datasets1 = list(butler1.registry.queryDatasets(..., collections=...)) 

189 datasets2 = list(butler2.registry.queryDatasets(..., collections=...)) 

190 self.assertTrue(all(isinstance(ref.id, DatasetId) for ref in datasets1)) 

191 self.assertTrue(all(isinstance(ref.id, DatasetId) for ref in datasets2)) 

192 self.assertCountEqual( 

193 [self.comparableRef(ref) for ref in datasets1], 

194 [self.comparableRef(ref) for ref in datasets2], 

195 ) 

196 

197 def testCollectionTransfers(self): 

198 """Test exporting and then importing collections of various types.""" 

199 # Populate a registry with some datasets. 

200 butler1 = self.makeButler(writeable=True) 

201 butler1.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml")) 

202 butler1.import_(filename=os.path.join(TESTDIR, "data", "registry", self.datasetsImportFile)) 

203 registry1 = butler1.registry 

204 # Add some more collections. 

205 registry1.registerRun("run1") 

206 registry1.registerCollection("tag1", CollectionType.TAGGED) 

207 registry1.registerCollection("calibration1", CollectionType.CALIBRATION) 

208 registry1.registerCollection("chain1", CollectionType.CHAINED) 

209 registry1.registerCollection("chain2", CollectionType.CHAINED) 

210 registry1.setCollectionChain("chain1", ["tag1", "run1", "chain2"]) 

211 registry1.setCollectionChain("chain2", ["calibration1", "run1"]) 

212 # Associate some datasets into the TAGGED and CALIBRATION collections. 

213 flats1 = list(registry1.queryDatasets("flat", collections=...)) 

214 registry1.associate("tag1", flats1) 

215 t1 = astropy.time.Time("2020-01-01T01:00:00", format="isot", scale="tai") 

216 t2 = astropy.time.Time("2020-01-01T02:00:00", format="isot", scale="tai") 

217 t3 = astropy.time.Time("2020-01-01T03:00:00", format="isot", scale="tai") 

218 bias1a = registry1.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g") 

219 bias2a = registry1.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g") 

220 bias3a = registry1.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g") 

221 bias2b = registry1.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r") 

222 bias3b = registry1.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r") 

223 registry1.certify("calibration1", [bias2a, bias3a], Timespan(t1, t2)) 

224 registry1.certify("calibration1", [bias2b], Timespan(t2, None)) 

225 registry1.certify("calibration1", [bias3b], Timespan(t2, t3)) 

226 registry1.certify("calibration1", [bias1a], Timespan.makeEmpty()) 

227 

228 with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml") as file: 

229 # Export all collections, and some datasets. 

230 with butler1.export(filename=file.name) as exporter: 

231 # Sort results to put chain1 before chain2, which is 

232 # intentionally not topological order. 

233 for collection in sorted(registry1.queryCollections()): 

234 exporter.saveCollection(collection) 

235 exporter.saveDatasets(flats1) 

236 exporter.saveDatasets([bias1a, bias2a, bias2b, bias3a, bias3b]) 

237 # Import them into a new registry. 

238 butler2 = self.makeButler(writeable=True) 

239 butler2.import_(filename=file.name) 

240 registry2 = butler2.registry 

241 # Check that it all round-tripped, starting with the collections 

242 # themselves. 

243 self.assertIs(registry2.getCollectionType("run1"), CollectionType.RUN) 

244 self.assertIs(registry2.getCollectionType("tag1"), CollectionType.TAGGED) 

245 self.assertIs(registry2.getCollectionType("calibration1"), CollectionType.CALIBRATION) 

246 self.assertIs(registry2.getCollectionType("chain1"), CollectionType.CHAINED) 

247 self.assertIs(registry2.getCollectionType("chain2"), CollectionType.CHAINED) 

248 self.assertEqual( 

249 list(registry2.getCollectionChain("chain1")), 

250 ["tag1", "run1", "chain2"], 

251 ) 

252 self.assertEqual( 

253 list(registry2.getCollectionChain("chain2")), 

254 ["calibration1", "run1"], 

255 ) 

256 # Check that tag collection contents are the same. 

257 self.maxDiff = None 

258 self.assertCountEqual( 

259 [self.comparableRef(ref) for ref in registry1.queryDatasets(..., collections="tag1")], 

260 [self.comparableRef(ref) for ref in registry2.queryDatasets(..., collections="tag1")], 

261 ) 

262 # Check that calibration collection contents are the same. 

263 self.assertCountEqual( 

264 [ 

265 (self.comparableRef(assoc.ref), assoc.timespan) 

266 for assoc in registry1.queryDatasetAssociations("bias", collections="calibration1") 

267 ], 

268 [ 

269 (self.comparableRef(assoc.ref), assoc.timespan) 

270 for assoc in registry2.queryDatasetAssociations("bias", collections="calibration1") 

271 ], 

272 ) 

273 

274 def testButlerGet(self): 

275 """Test that butler.get can work with different variants.""" 

276 # Import data to play with. 

277 butler = self.makeButler(writeable=True) 

278 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml")) 

279 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", self.datasetsImportFile)) 

280 

281 # Find the DatasetRef for a flat 

282 coll = "imported_g" 

283 flat2g = butler.registry.findDataset( 

284 "flat", instrument="Cam1", detector=2, physical_filter="Cam1-G", collections=coll 

285 ) 

286 

287 # Create a numpy integer to check that works fine 

288 detector_np = np.int64(2) if np else 2 

289 print(type(detector_np)) 

290 

291 # Try to get it using different variations of dataId + keyword 

292 # arguments 

293 # Note that instrument.class_name does not work 

294 variants = ( 

295 (None, {"instrument": "Cam1", "detector": 2, "physical_filter": "Cam1-G"}), 

296 (None, {"instrument": "Cam1", "detector": detector_np, "physical_filter": "Cam1-G"}), 

297 ({"instrument": "Cam1", "detector": 2, "physical_filter": "Cam1-G"}, {}), 

298 ({"instrument": "Cam1", "detector": detector_np, "physical_filter": "Cam1-G"}, {}), 

299 ({"instrument": "Cam1", "detector": 2}, {"physical_filter": "Cam1-G"}), 

300 ({"detector.full_name": "Ab"}, {"instrument": "Cam1", "physical_filter": "Cam1-G"}), 

301 ({"full_name": "Ab"}, {"instrument": "Cam1", "physical_filter": "Cam1-G"}), 

302 (None, {"full_name": "Ab", "instrument": "Cam1", "physical_filter": "Cam1-G"}), 

303 (None, {"detector": "Ab", "instrument": "Cam1", "physical_filter": "Cam1-G"}), 

304 ({"name_in_raft": "b", "raft": "A"}, {"instrument": "Cam1", "physical_filter": "Cam1-G"}), 

305 ({"name_in_raft": "b"}, {"raft": "A", "instrument": "Cam1", "physical_filter": "Cam1-G"}), 

306 (None, {"name_in_raft": "b", "raft": "A", "instrument": "Cam1", "physical_filter": "Cam1-G"}), 

307 ( 

308 {"detector.name_in_raft": "b", "detector.raft": "A"}, 

309 {"instrument": "Cam1", "physical_filter": "Cam1-G"}, 

310 ), 

311 ( 

312 { 

313 "detector.name_in_raft": "b", 

314 "detector.raft": "A", 

315 "instrument": "Cam1", 

316 "physical_filter": "Cam1-G", 

317 }, 

318 {}, 

319 ), 

320 # Duplicate (but valid) information. 

321 (None, {"instrument": "Cam1", "detector": 2, "raft": "A", "physical_filter": "Cam1-G"}), 

322 ({"detector": 2}, {"instrument": "Cam1", "raft": "A", "physical_filter": "Cam1-G"}), 

323 ({"raft": "A"}, {"instrument": "Cam1", "detector": 2, "physical_filter": "Cam1-G"}), 

324 ({"raft": "A"}, {"instrument": "Cam1", "detector": "Ab", "physical_filter": "Cam1-G"}), 

325 ) 

326 

327 for dataId, kwds in variants: 

328 try: 

329 flat_id, _ = butler.get("flat", dataId=dataId, collections=coll, **kwds) 

330 except Exception as e: 

331 raise type(e)(f"{str(e)}: dataId={dataId}, kwds={kwds}") from e 

332 self.assertEqual(flat_id, flat2g.id, msg=f"DataId: {dataId}, kwds: {kwds}") 

333 

334 # Check that bad combinations raise. 

335 variants = ( 

336 # Inconsistent detector information. 

337 (None, {"instrument": "Cam1", "detector": 2, "raft": "B", "physical_filter": "Cam1-G"}), 

338 ({"detector": 2}, {"instrument": "Cam1", "raft": "B", "physical_filter": "Cam1-G"}), 

339 ({"detector": 12}, {"instrument": "Cam1", "raft": "B", "physical_filter": "Cam1-G"}), 

340 ({"raft": "B"}, {"instrument": "Cam1", "detector": 2, "physical_filter": "Cam1-G"}), 

341 ({"raft": "B"}, {"instrument": "Cam1", "detector": "Ab", "physical_filter": "Cam1-G"}), 

342 # Under-specified. 

343 ({"raft": "B"}, {"instrument": "Cam1", "physical_filter": "Cam1-G"}), 

344 # Spurious kwargs. 

345 (None, {"instrument": "Cam1", "detector": 2, "physical_filter": "Cam1-G", "x": "y"}), 

346 ({"x": "y"}, {"instrument": "Cam1", "detector": 2, "physical_filter": "Cam1-G"}), 

347 ) 

348 for dataId, kwds in variants: 

349 with self.assertRaises((ValueError, LookupError)): 

350 butler.get("flat", dataId=dataId, collections=coll, **kwds) 

351 

352 def testGetCalibration(self): 

353 """Test that `Butler.get` can be used to fetch from 

354 `~CollectionType.CALIBRATION` collections if the data ID includes 

355 extra dimensions with temporal information. 

356 """ 

357 # Import data to play with. 

358 butler = self.makeButler(writeable=True) 

359 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml")) 

360 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", self.datasetsImportFile)) 

361 # Certify some biases into a CALIBRATION collection. 

362 registry = butler.registry 

363 registry.registerCollection("calibs", CollectionType.CALIBRATION) 

364 t1 = astropy.time.Time("2020-01-01T01:00:00", format="isot", scale="tai") 

365 t2 = astropy.time.Time("2020-01-01T02:00:00", format="isot", scale="tai") 

366 t3 = astropy.time.Time("2020-01-01T03:00:00", format="isot", scale="tai") 

367 bias2a = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g") 

368 bias3a = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g") 

369 bias2b = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r") 

370 bias3b = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r") 

371 registry.certify("calibs", [bias2a, bias3a], Timespan(t1, t2)) 

372 registry.certify("calibs", [bias2b], Timespan(t2, None)) 

373 registry.certify("calibs", [bias3b], Timespan(t2, t3)) 

374 # Insert some exposure dimension data. 

375 registry.insertDimensionData( 

376 "exposure", 

377 { 

378 "instrument": "Cam1", 

379 "id": 3, 

380 "obs_id": "three", 

381 "timespan": Timespan(t1, t2), 

382 "physical_filter": "Cam1-G", 

383 "day_obs": 20201114, 

384 "seq_num": 55, 

385 }, 

386 { 

387 "instrument": "Cam1", 

388 "id": 4, 

389 "obs_id": "four", 

390 "timespan": Timespan(t2, t3), 

391 "physical_filter": "Cam1-G", 

392 "day_obs": 20211114, 

393 "seq_num": 42, 

394 }, 

395 ) 

396 # Get some biases from raw-like data IDs. 

397 bias2a_id, _ = butler.get( 

398 "bias", {"instrument": "Cam1", "exposure": 3, "detector": 2}, collections="calibs" 

399 ) 

400 self.assertEqual(bias2a_id, bias2a.id) 

401 bias3b_id, _ = butler.get( 

402 "bias", {"instrument": "Cam1", "exposure": 4, "detector": 3}, collections="calibs" 

403 ) 

404 self.assertEqual(bias3b_id, bias3b.id) 

405 

406 # Get using the kwarg form 

407 bias3b_id, _ = butler.get("bias", instrument="Cam1", exposure=4, detector=3, collections="calibs") 

408 self.assertEqual(bias3b_id, bias3b.id) 

409 

410 # Do it again but using the record information 

411 bias2a_id, _ = butler.get( 

412 "bias", 

413 {"instrument": "Cam1", "exposure.obs_id": "three", "detector.full_name": "Ab"}, 

414 collections="calibs", 

415 ) 

416 self.assertEqual(bias2a_id, bias2a.id) 

417 bias3b_id, _ = butler.get( 

418 "bias", 

419 {"exposure.obs_id": "four", "detector.full_name": "Ba"}, 

420 collections="calibs", 

421 instrument="Cam1", 

422 ) 

423 self.assertEqual(bias3b_id, bias3b.id) 

424 

425 # And again but this time using the alternate value rather than 

426 # the primary. 

427 bias3b_id, _ = butler.get( 

428 "bias", {"exposure": "four", "detector": "Ba"}, collections="calibs", instrument="Cam1" 

429 ) 

430 self.assertEqual(bias3b_id, bias3b.id) 

431 

432 # And again but this time using the alternate value rather than 

433 # the primary and do it in the keyword arguments. 

434 bias3b_id, _ = butler.get( 

435 "bias", exposure="four", detector="Ba", collections="calibs", instrument="Cam1" 

436 ) 

437 self.assertEqual(bias3b_id, bias3b.id) 

438 

439 # Now with implied record columns 

440 bias3b_id, _ = butler.get( 

441 "bias", 

442 day_obs=20211114, 

443 seq_num=42, 

444 raft="B", 

445 name_in_raft="a", 

446 collections="calibs", 

447 instrument="Cam1", 

448 ) 

449 self.assertEqual(bias3b_id, bias3b.id) 

450 

451 # Allow a fully-specified dataId and unnecessary extra information 

452 # that comes from the record. 

453 bias3b_id, _ = butler.get( 

454 "bias", 

455 dataId=dict( 

456 exposure=4, 

457 day_obs=20211114, 

458 seq_num=42, 

459 detector=3, 

460 instrument="Cam1", 

461 ), 

462 collections="calibs", 

463 ) 

464 self.assertEqual(bias3b_id, bias3b.id) 

465 

466 # Extra but inconsistent record values are a problem. 

467 with self.assertRaises(ValueError): 

468 bias3b_id, _ = butler.get( 

469 "bias", 

470 exposure=3, 

471 day_obs=20211114, 

472 seq_num=42, 

473 detector=3, 

474 collections="calibs", 

475 instrument="Cam1", 

476 ) 

477 

478 # Ensure that spurious kwargs cause an exception. 

479 with self.assertRaises(ValueError): 

480 butler.get( 

481 "bias", 

482 {"exposure.obs_id": "four", "immediate": True, "detector.full_name": "Ba"}, 

483 collections="calibs", 

484 instrument="Cam1", 

485 ) 

486 

487 with self.assertRaises(ValueError): 

488 butler.get( 

489 "bias", 

490 day_obs=20211114, 

491 seq_num=42, 

492 raft="B", 

493 name_in_raft="a", 

494 collections="calibs", 

495 instrument="Cam1", 

496 immediate=True, 

497 ) 

498 

499 def testRegistryDefaults(self): 

500 """Test that we can default the collections and some data ID keys when 

501 constructing a butler. 

502 

503 Many tests that use default run already exist in ``test_butler.py``, so 

504 that isn't tested here. And while most of this functionality is 

505 implemented in `Registry`, we test it here instead of 

506 ``daf/butler/tests/registry.py`` because it shouldn't depend on the 

507 database backend at all. 

508 """ 

509 butler = self.makeButler(writeable=True) 

510 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml")) 

511 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", self.datasetsImportFile)) 

512 # Need to actually set defaults later, not at construction, because 

513 # we need to import the instrument before we can use it as a default. 

514 # Don't set a default instrument value for data IDs, because 'Cam1' 

515 # should be inferred by virtue of that being the only value in the 

516 # input collections. 

517 butler.registry.defaults = RegistryDefaults(collections=["imported_g"]) 

518 # Use findDataset without collections or instrument. 

519 ref = butler.registry.findDataset("flat", detector=2, physical_filter="Cam1-G") 

520 # Do the same with Butler.get; this should ultimately invoke a lot of 

521 # the same code, so it's a bit circular, but mostly we're checking that 

522 # it works at all. 

523 dataset_id, _ = butler.get("flat", detector=2, physical_filter="Cam1-G") 

524 self.assertEqual(ref.id, dataset_id) 

525 # Query for datasets. Test defaulting the data ID in both kwargs and 

526 # in the WHERE expression. 

527 queried_refs_1 = set(butler.registry.queryDatasets("flat", detector=2, physical_filter="Cam1-G")) 

528 self.assertEqual({ref}, queried_refs_1) 

529 queried_refs_2 = set( 

530 butler.registry.queryDatasets("flat", where="detector=2 AND physical_filter='Cam1-G'") 

531 ) 

532 self.assertEqual({ref}, queried_refs_2) 

533 # Query for data IDs with a dataset constraint. 

534 queried_data_ids = set( 

535 butler.registry.queryDataIds( 

536 {"instrument", "detector", "physical_filter"}, 

537 datasets={"flat"}, 

538 detector=2, 

539 physical_filter="Cam1-G", 

540 ) 

541 ) 

542 self.assertEqual({ref.dataId}, queried_data_ids) 

543 # Add another instrument to the repo, and a dataset that uses it to 

544 # the `imported_g` collection. 

545 butler.registry.insertDimensionData("instrument", {"name": "Cam2"}) 

546 camera = DatasetType( 

547 "camera", 

548 dimensions=butler.dimensions["instrument"].graph, 

549 storageClass="Camera", 

550 ) 

551 butler.registry.registerDatasetType(camera) 

552 butler.registry.insertDatasets(camera, [{"instrument": "Cam2"}], run="imported_g") 

553 # Initialize a new butler with `imported_g` as its default run. 

554 # This should not have a default instrument, because there are two. 

555 # Pass run instead of collections; this should set both. 

556 butler2 = Butler(butler=butler, run="imported_g") 

557 self.assertEqual(list(butler2.registry.defaults.collections), ["imported_g"]) 

558 self.assertEqual(butler2.registry.defaults.run, "imported_g") 

559 self.assertFalse(butler2.registry.defaults.dataId) 

560 # Initialize a new butler with an instrument default explicitly given. 

561 # Set collections instead of run, which should then be None. 

562 butler3 = Butler(butler=butler, collections=["imported_g"], instrument="Cam2") 

563 self.assertEqual(list(butler3.registry.defaults.collections), ["imported_g"]) 

564 self.assertIsNone(butler3.registry.defaults.run, None) 

565 self.assertEqual(butler3.registry.defaults.dataId.byName(), {"instrument": "Cam2"}) 

566 

567 # Check that repr() does not fail. 

568 defaults = RegistryDefaults(collections=["imported_g"], run="test") 

569 r = repr(defaults) 

570 self.assertIn("collections=('imported_g',)", r) 

571 self.assertIn("run='test'", r) 

572 

573 defaults = RegistryDefaults(run="test", instrument="DummyCam", skypix="pix") 

574 r = repr(defaults) 

575 self.assertIn("skypix='pix'", r) 

576 self.assertIn("instrument='DummyCam'", r) 

577 

578 def testJson(self): 

579 """Test JSON serialization mediated by registry.""" 

580 butler = self.makeButler(writeable=True) 

581 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml")) 

582 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", self.datasetsImportFile)) 

583 # Need to actually set defaults later, not at construction, because 

584 # we need to import the instrument before we can use it as a default. 

585 # Don't set a default instrument value for data IDs, because 'Cam1' 

586 # should be inferred by virtue of that being the only value in the 

587 # input collections. 

588 butler.registry.defaults = RegistryDefaults(collections=["imported_g"]) 

589 # Use findDataset without collections or instrument. 

590 ref = butler.registry.findDataset("flat", detector=2, physical_filter="Cam1-G") 

591 

592 # Transform the ref and dataset type to and from JSON 

593 # and check that it can be reconstructed properly 

594 

595 # Do it with the ref and a component ref in minimal and standard form 

596 compRef = ref.makeComponentRef("wcs") 

597 

598 for test_item in (ref, ref.datasetType, compRef, compRef.datasetType): 

599 for minimal in (False, True): 

600 json_str = test_item.to_json(minimal=minimal) 

601 from_json = type(test_item).from_json(json_str, registry=butler.registry) 

602 self.assertEqual(from_json, test_item, msg=f"From JSON '{json_str}' using registry") 

603 

604 # for minimal=False case also do a test without registry 

605 if not minimal: 

606 from_json = type(test_item).from_json(json_str, universe=butler.dimensions) 

607 self.assertEqual(from_json, test_item, msg=f"From JSON '{json_str}' using universe") 

608 

609 def testJsonDimensionRecordsAndHtmlRepresentation(self): 

610 # Dimension Records 

611 butler = self.makeButler(writeable=True) 

612 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "hsc-rc2-subset.yaml")) 

613 

614 for dimension in ("detector", "visit"): 

615 records = butler.registry.queryDimensionRecords(dimension, instrument="HSC") 

616 for r in records: 

617 for minimal in (True, False): 

618 json_str = r.to_json(minimal=minimal) 

619 r_json = type(r).from_json(json_str, registry=butler.registry) 

620 self.assertEqual(r_json, r) 

621 # check with direct method 

622 simple = r.to_simple() 

623 fromDirect = type(simple).direct(**json.loads(json_str)) 

624 self.assertEqual(simple, fromDirect) 

625 # Also check equality of each of the components as dicts 

626 self.assertEqual(r_json.toDict(), r.toDict()) 

627 

628 # check the html representation of records 

629 r_html = r._repr_html_() 

630 self.assertTrue(isinstance(r_html, str)) 

631 self.assertIn(dimension, r_html) 

632 

633 def testWildcardQueries(self): 

634 """Test that different collection type queries work.""" 

635 # Import data to play with. 

636 butler = self.makeButler(writeable=True) 

637 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml")) 

638 

639 # Create some collections 

640 created = {"collection", "u/user/test", "coll3"} 

641 for collection in created: 

642 butler.registry.registerCollection(collection, type=CollectionType.RUN) 

643 

644 collections = butler.registry.queryCollections() 

645 self.assertEqual(set(collections), created) 

646 

647 expressions = ( 

648 ("collection", {"collection"}), 

649 (..., created), 

650 ("*", created), 

651 (("collection", "*"), created), 

652 ("u/*", {"u/user/test"}), 

653 (re.compile("u.*"), {"u/user/test"}), 

654 (re.compile(".*oll.*"), {"collection", "coll3"}), 

655 ("*oll*", {"collection", "coll3"}), 

656 ((re.compile(r".*\d$"), "u/user/test"), {"coll3", "u/user/test"}), 

657 ("*[0-9]", {"coll3"}), 

658 ) 

659 for expression, expected in expressions: 

660 result = butler.registry.queryCollections(expression) 

661 self.assertEqual(set(result), expected) 

662 

663 

664class SimpleButlerMixedUUIDTestCase(SimpleButlerTestCase): 

665 """Same as SimpleButlerTestCase but uses UUID-based datasets manager and 

666 loads datasets from YAML file with integer IDs. 

667 """ 

668 

669 datasetsImportFile = "datasets.yaml" 

670 

671 

672if __name__ == "__main__": 

673 unittest.main()