Coverage for tests/test_simpleButler.py: 11%

256 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-06-23 09:30 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24import json 

25import os 

26import re 

27import tempfile 

28import unittest 

29from typing import Any 

30 

31try: 

32 import numpy as np 

33except ImportError: 

34 np = None 

35 

36import astropy.time 

37from lsst.daf.butler import ( 

38 Butler, 

39 ButlerConfig, 

40 CollectionType, 

41 DatasetId, 

42 DatasetRef, 

43 DatasetType, 

44 Registry, 

45 Timespan, 

46) 

47from lsst.daf.butler.registry import RegistryConfig, RegistryDefaults 

48from lsst.daf.butler.tests import DatastoreMock 

49from lsst.daf.butler.tests.utils import makeTestTempDir, removeTestTempDir 

50 

51TESTDIR = os.path.abspath(os.path.dirname(__file__)) 

52 

53 

54class SimpleButlerTestCase(unittest.TestCase): 

55 """Tests for butler (including import/export functionality) that should not 

56 depend on the Registry Database backend or Datastore implementation, and 

57 can instead utilize an in-memory SQLite Registry and a mocked Datastore. 

58 """ 

59 

60 datasetsManager = ( 

61 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID" 

62 ) 

63 datasetsImportFile = "datasets-uuid.yaml" 

64 

65 def setUp(self): 

66 self.root = makeTestTempDir(TESTDIR) 

67 

68 def tearDown(self): 

69 removeTestTempDir(self.root) 

70 

71 def makeButler(self, **kwargs: Any) -> Butler: 

72 """Return new Butler instance on each call.""" 

73 config = ButlerConfig() 

74 

75 # make separate temporary directory for registry of this instance 

76 tmpdir = tempfile.mkdtemp(dir=self.root) 

77 config["registry", "db"] = f"sqlite:///{tmpdir}/gen3.sqlite3" 

78 config["registry", "managers", "datasets"] = self.datasetsManager 

79 config["root"] = self.root 

80 

81 # have to make a registry first 

82 registryConfig = RegistryConfig(config.get("registry")) 

83 Registry.createFromConfig(registryConfig) 

84 

85 butler = Butler(config, **kwargs) 

86 DatastoreMock.apply(butler) 

87 return butler 

88 

89 def comparableRef(self, ref: DatasetRef) -> DatasetRef: 

90 """Return a DatasetRef that can be compared to a DatasetRef from 

91 other repository. 

92 

93 For repositories that do not support round-trip of ID values this 

94 method returns unresolved DatasetRef, for round-trip-safe repos it 

95 returns unchanged ref. 

96 """ 

97 return ref 

98 

99 def testReadBackwardsCompatibility(self): 

100 """Test that we can read an export file written by a previous version 

101 and commit to the daf_butler git repo. 

102 

103 Notes 

104 ----- 

105 At present this export file includes only dimension data, not datasets, 

106 which greatly limits the usefulness of this test. We should address 

107 this at some point, but I think it's best to wait for the changes to 

108 the export format required for CALIBRATION collections to land. 

109 """ 

110 butler = self.makeButler(writeable=True) 

111 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "hsc-rc2-subset.yaml")) 

112 # Spot-check a few things, but the most important test is just that 

113 # the above does not raise. 

114 self.assertGreaterEqual( 

115 {record.id for record in butler.registry.queryDimensionRecords("detector", instrument="HSC")}, 

116 set(range(104)), # should have all science CCDs; may have some focus ones. 

117 ) 

118 self.assertGreaterEqual( 

119 { 

120 (record.id, record.physical_filter) 

121 for record in butler.registry.queryDimensionRecords("visit", instrument="HSC") 

122 }, 

123 { 

124 (27136, "HSC-Z"), 

125 (11694, "HSC-G"), 

126 (23910, "HSC-R"), 

127 (11720, "HSC-Y"), 

128 (23900, "HSC-R"), 

129 (22646, "HSC-Y"), 

130 (1248, "HSC-I"), 

131 (19680, "HSC-I"), 

132 (1240, "HSC-I"), 

133 (424, "HSC-Y"), 

134 (19658, "HSC-I"), 

135 (344, "HSC-Y"), 

136 (1218, "HSC-R"), 

137 (1190, "HSC-Z"), 

138 (23718, "HSC-R"), 

139 (11700, "HSC-G"), 

140 (26036, "HSC-G"), 

141 (23872, "HSC-R"), 

142 (1170, "HSC-Z"), 

143 (1876, "HSC-Y"), 

144 }, 

145 ) 

146 

147 def testDatasetTransfers(self): 

148 """Test exporting all datasets from a repo and then importing them all 

149 back in again. 

150 """ 

151 # Import data to play with. 

152 butler1 = self.makeButler(writeable=True) 

153 butler1.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml")) 

154 butler1.import_(filename=os.path.join(TESTDIR, "data", "registry", self.datasetsImportFile)) 

155 with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml") as file: 

156 # Export all datasets. 

157 with butler1.export(filename=file.name) as exporter: 

158 exporter.saveDatasets(butler1.registry.queryDatasets(..., collections=...)) 

159 # Import it all again. 

160 butler2 = self.makeButler(writeable=True) 

161 butler2.import_(filename=file.name) 

162 datasets1 = list(butler1.registry.queryDatasets(..., collections=...)) 

163 datasets2 = list(butler2.registry.queryDatasets(..., collections=...)) 

164 self.assertTrue(all(isinstance(ref.id, DatasetId) for ref in datasets1)) 

165 self.assertTrue(all(isinstance(ref.id, DatasetId) for ref in datasets2)) 

166 self.assertCountEqual( 

167 [self.comparableRef(ref) for ref in datasets1], 

168 [self.comparableRef(ref) for ref in datasets2], 

169 ) 

170 

171 def testImportTwice(self): 

172 """Test exporting dimension records and datasets from a repo and then 

173 importing them all back in again twice. 

174 """ 

175 # Import data to play with. 

176 butler1 = self.makeButler(writeable=True) 

177 butler1.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml")) 

178 butler1.import_(filename=os.path.join(TESTDIR, "data", "registry", self.datasetsImportFile)) 

179 with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml", delete=False) as file: 

180 # Export all datasets. 

181 with butler1.export(filename=file.name) as exporter: 

182 exporter.saveDatasets(butler1.registry.queryDatasets(..., collections=...)) 

183 butler2 = self.makeButler(writeable=True) 

184 # Import it once. 

185 butler2.import_(filename=file.name) 

186 # Import it again 

187 butler2.import_(filename=file.name) 

188 datasets1 = list(butler1.registry.queryDatasets(..., collections=...)) 

189 datasets2 = list(butler2.registry.queryDatasets(..., collections=...)) 

190 self.assertTrue(all(isinstance(ref.id, DatasetId) for ref in datasets1)) 

191 self.assertTrue(all(isinstance(ref.id, DatasetId) for ref in datasets2)) 

192 self.assertCountEqual( 

193 [self.comparableRef(ref) for ref in datasets1], 

194 [self.comparableRef(ref) for ref in datasets2], 

195 ) 

196 

197 def testCollectionTransfers(self): 

198 """Test exporting and then importing collections of various types.""" 

199 # Populate a registry with some datasets. 

200 butler1 = self.makeButler(writeable=True) 

201 butler1.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml")) 

202 butler1.import_(filename=os.path.join(TESTDIR, "data", "registry", self.datasetsImportFile)) 

203 registry1 = butler1.registry 

204 # Add some more collections. 

205 registry1.registerRun("run1") 

206 registry1.registerCollection("tag1", CollectionType.TAGGED) 

207 registry1.registerCollection("calibration1", CollectionType.CALIBRATION) 

208 registry1.registerCollection("chain1", CollectionType.CHAINED) 

209 registry1.registerCollection("chain2", CollectionType.CHAINED) 

210 registry1.setCollectionChain("chain1", ["tag1", "run1", "chain2"]) 

211 registry1.setCollectionChain("chain2", ["calibration1", "run1"]) 

212 # Associate some datasets into the TAGGED and CALIBRATION collections. 

213 flats1 = list(registry1.queryDatasets("flat", collections=...)) 

214 registry1.associate("tag1", flats1) 

215 t1 = astropy.time.Time("2020-01-01T01:00:00", format="isot", scale="tai") 

216 t2 = astropy.time.Time("2020-01-01T02:00:00", format="isot", scale="tai") 

217 t3 = astropy.time.Time("2020-01-01T03:00:00", format="isot", scale="tai") 

218 bias1a = registry1.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g") 

219 bias2a = registry1.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g") 

220 bias3a = registry1.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g") 

221 bias2b = registry1.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r") 

222 bias3b = registry1.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r") 

223 registry1.certify("calibration1", [bias2a, bias3a], Timespan(t1, t2)) 

224 registry1.certify("calibration1", [bias2b], Timespan(t2, None)) 

225 registry1.certify("calibration1", [bias3b], Timespan(t2, t3)) 

226 registry1.certify("calibration1", [bias1a], Timespan.makeEmpty()) 

227 

228 with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml") as file: 

229 # Export all collections, and some datasets. 

230 with butler1.export(filename=file.name) as exporter: 

231 # Sort results to put chain1 before chain2, which is 

232 # intentionally not topological order. 

233 for collection in sorted(registry1.queryCollections()): 

234 exporter.saveCollection(collection) 

235 exporter.saveDatasets(flats1) 

236 exporter.saveDatasets([bias1a, bias2a, bias2b, bias3a, bias3b]) 

237 # Import them into a new registry. 

238 butler2 = self.makeButler(writeable=True) 

239 butler2.import_(filename=file.name) 

240 registry2 = butler2.registry 

241 # Check that it all round-tripped, starting with the collections 

242 # themselves. 

243 self.assertIs(registry2.getCollectionType("run1"), CollectionType.RUN) 

244 self.assertIs(registry2.getCollectionType("tag1"), CollectionType.TAGGED) 

245 self.assertIs(registry2.getCollectionType("calibration1"), CollectionType.CALIBRATION) 

246 self.assertIs(registry2.getCollectionType("chain1"), CollectionType.CHAINED) 

247 self.assertIs(registry2.getCollectionType("chain2"), CollectionType.CHAINED) 

248 self.assertEqual( 

249 list(registry2.getCollectionChain("chain1")), 

250 ["tag1", "run1", "chain2"], 

251 ) 

252 self.assertEqual( 

253 list(registry2.getCollectionChain("chain2")), 

254 ["calibration1", "run1"], 

255 ) 

256 # Check that tag collection contents are the same. 

257 self.maxDiff = None 

258 self.assertCountEqual( 

259 [self.comparableRef(ref) for ref in registry1.queryDatasets(..., collections="tag1")], 

260 [self.comparableRef(ref) for ref in registry2.queryDatasets(..., collections="tag1")], 

261 ) 

262 # Check that calibration collection contents are the same. 

263 self.assertCountEqual( 

264 [ 

265 (self.comparableRef(assoc.ref), assoc.timespan) 

266 for assoc in registry1.queryDatasetAssociations("bias", collections="calibration1") 

267 ], 

268 [ 

269 (self.comparableRef(assoc.ref), assoc.timespan) 

270 for assoc in registry2.queryDatasetAssociations("bias", collections="calibration1") 

271 ], 

272 ) 

273 

274 def testButlerGet(self): 

275 """Test that butler.get can work with different variants.""" 

276 

277 # Import data to play with. 

278 butler = self.makeButler(writeable=True) 

279 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml")) 

280 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", self.datasetsImportFile)) 

281 

282 # Find the DatasetRef for a flat 

283 coll = "imported_g" 

284 flat2g = butler.registry.findDataset( 

285 "flat", instrument="Cam1", detector=2, physical_filter="Cam1-G", collections=coll 

286 ) 

287 

288 # Create a numpy integer to check that works fine 

289 detector_np = np.int64(2) if np else 2 

290 print(type(detector_np)) 

291 

292 # Try to get it using different variations of dataId + keyword 

293 # arguments 

294 # Note that instrument.class_name does not work 

295 variants = ( 

296 (None, {"instrument": "Cam1", "detector": 2, "physical_filter": "Cam1-G"}), 

297 (None, {"instrument": "Cam1", "detector": detector_np, "physical_filter": "Cam1-G"}), 

298 ({"instrument": "Cam1", "detector": 2, "physical_filter": "Cam1-G"}, {}), 

299 ({"instrument": "Cam1", "detector": detector_np, "physical_filter": "Cam1-G"}, {}), 

300 ({"instrument": "Cam1", "detector": 2}, {"physical_filter": "Cam1-G"}), 

301 ({"detector.full_name": "Ab"}, {"instrument": "Cam1", "physical_filter": "Cam1-G"}), 

302 ({"full_name": "Ab"}, {"instrument": "Cam1", "physical_filter": "Cam1-G"}), 

303 (None, {"full_name": "Ab", "instrument": "Cam1", "physical_filter": "Cam1-G"}), 

304 (None, {"detector": "Ab", "instrument": "Cam1", "physical_filter": "Cam1-G"}), 

305 ({"name_in_raft": "b", "raft": "A"}, {"instrument": "Cam1", "physical_filter": "Cam1-G"}), 

306 ({"name_in_raft": "b"}, {"raft": "A", "instrument": "Cam1", "physical_filter": "Cam1-G"}), 

307 (None, {"name_in_raft": "b", "raft": "A", "instrument": "Cam1", "physical_filter": "Cam1-G"}), 

308 ( 

309 {"detector.name_in_raft": "b", "detector.raft": "A"}, 

310 {"instrument": "Cam1", "physical_filter": "Cam1-G"}, 

311 ), 

312 ( 

313 { 

314 "detector.name_in_raft": "b", 

315 "detector.raft": "A", 

316 "instrument": "Cam1", 

317 "physical_filter": "Cam1-G", 

318 }, 

319 {}, 

320 ), 

321 # Duplicate (but valid) information. 

322 (None, {"instrument": "Cam1", "detector": 2, "raft": "A", "physical_filter": "Cam1-G"}), 

323 ({"detector": 2}, {"instrument": "Cam1", "raft": "A", "physical_filter": "Cam1-G"}), 

324 ({"raft": "A"}, {"instrument": "Cam1", "detector": 2, "physical_filter": "Cam1-G"}), 

325 ({"raft": "A"}, {"instrument": "Cam1", "detector": "Ab", "physical_filter": "Cam1-G"}), 

326 ) 

327 

328 for dataId, kwds in variants: 

329 try: 

330 flat_id, _ = butler.get("flat", dataId=dataId, collections=coll, **kwds) 

331 except Exception as e: 

332 raise type(e)(f"{str(e)}: dataId={dataId}, kwds={kwds}") from e 

333 self.assertEqual(flat_id, flat2g.id, msg=f"DataId: {dataId}, kwds: {kwds}") 

334 

335 # Check that bad combinations raise. 

336 variants = ( 

337 # Inconsistent detector information. 

338 (None, {"instrument": "Cam1", "detector": 2, "raft": "B", "physical_filter": "Cam1-G"}), 

339 ({"detector": 2}, {"instrument": "Cam1", "raft": "B", "physical_filter": "Cam1-G"}), 

340 ({"detector": 12}, {"instrument": "Cam1", "raft": "B", "physical_filter": "Cam1-G"}), 

341 ({"raft": "B"}, {"instrument": "Cam1", "detector": 2, "physical_filter": "Cam1-G"}), 

342 ({"raft": "B"}, {"instrument": "Cam1", "detector": "Ab", "physical_filter": "Cam1-G"}), 

343 # Under-specified. 

344 ({"raft": "B"}, {"instrument": "Cam1", "physical_filter": "Cam1-G"}), 

345 # Spurious kwargs. 

346 (None, {"instrument": "Cam1", "detector": 2, "physical_filter": "Cam1-G", "x": "y"}), 

347 ({"x": "y"}, {"instrument": "Cam1", "detector": 2, "physical_filter": "Cam1-G"}), 

348 ) 

349 for dataId, kwds in variants: 

350 with self.assertRaises((ValueError, LookupError)): 

351 butler.get("flat", dataId=dataId, collections=coll, **kwds) 

352 

353 def testGetCalibration(self): 

354 """Test that `Butler.get` can be used to fetch from 

355 `~CollectionType.CALIBRATION` collections if the data ID includes 

356 extra dimensions with temporal information. 

357 """ 

358 # Import data to play with. 

359 butler = self.makeButler(writeable=True) 

360 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml")) 

361 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", self.datasetsImportFile)) 

362 # Certify some biases into a CALIBRATION collection. 

363 registry = butler.registry 

364 registry.registerCollection("calibs", CollectionType.CALIBRATION) 

365 t1 = astropy.time.Time("2020-01-01T01:00:00", format="isot", scale="tai") 

366 t2 = astropy.time.Time("2020-01-01T02:00:00", format="isot", scale="tai") 

367 t3 = astropy.time.Time("2020-01-01T03:00:00", format="isot", scale="tai") 

368 bias2a = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g") 

369 bias3a = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g") 

370 bias2b = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r") 

371 bias3b = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r") 

372 registry.certify("calibs", [bias2a, bias3a], Timespan(t1, t2)) 

373 registry.certify("calibs", [bias2b], Timespan(t2, None)) 

374 registry.certify("calibs", [bias3b], Timespan(t2, t3)) 

375 # Insert some exposure dimension data. 

376 registry.insertDimensionData( 

377 "exposure", 

378 { 

379 "instrument": "Cam1", 

380 "id": 3, 

381 "obs_id": "three", 

382 "timespan": Timespan(t1, t2), 

383 "physical_filter": "Cam1-G", 

384 "day_obs": 20201114, 

385 "seq_num": 55, 

386 }, 

387 { 

388 "instrument": "Cam1", 

389 "id": 4, 

390 "obs_id": "four", 

391 "timespan": Timespan(t2, t3), 

392 "physical_filter": "Cam1-G", 

393 "day_obs": 20211114, 

394 "seq_num": 42, 

395 }, 

396 ) 

397 # Get some biases from raw-like data IDs. 

398 bias2a_id, _ = butler.get( 

399 "bias", {"instrument": "Cam1", "exposure": 3, "detector": 2}, collections="calibs" 

400 ) 

401 self.assertEqual(bias2a_id, bias2a.id) 

402 bias3b_id, _ = butler.get( 

403 "bias", {"instrument": "Cam1", "exposure": 4, "detector": 3}, collections="calibs" 

404 ) 

405 self.assertEqual(bias3b_id, bias3b.id) 

406 

407 # Get using the kwarg form 

408 bias3b_id, _ = butler.get("bias", instrument="Cam1", exposure=4, detector=3, collections="calibs") 

409 self.assertEqual(bias3b_id, bias3b.id) 

410 

411 # Do it again but using the record information 

412 bias2a_id, _ = butler.get( 

413 "bias", 

414 {"instrument": "Cam1", "exposure.obs_id": "three", "detector.full_name": "Ab"}, 

415 collections="calibs", 

416 ) 

417 self.assertEqual(bias2a_id, bias2a.id) 

418 bias3b_id, _ = butler.get( 

419 "bias", 

420 {"exposure.obs_id": "four", "detector.full_name": "Ba"}, 

421 collections="calibs", 

422 instrument="Cam1", 

423 ) 

424 self.assertEqual(bias3b_id, bias3b.id) 

425 

426 # And again but this time using the alternate value rather than 

427 # the primary. 

428 bias3b_id, _ = butler.get( 

429 "bias", {"exposure": "four", "detector": "Ba"}, collections="calibs", instrument="Cam1" 

430 ) 

431 self.assertEqual(bias3b_id, bias3b.id) 

432 

433 # And again but this time using the alternate value rather than 

434 # the primary and do it in the keyword arguments. 

435 bias3b_id, _ = butler.get( 

436 "bias", exposure="four", detector="Ba", collections="calibs", instrument="Cam1" 

437 ) 

438 self.assertEqual(bias3b_id, bias3b.id) 

439 

440 # Now with implied record columns 

441 bias3b_id, _ = butler.get( 

442 "bias", 

443 day_obs=20211114, 

444 seq_num=42, 

445 raft="B", 

446 name_in_raft="a", 

447 collections="calibs", 

448 instrument="Cam1", 

449 ) 

450 self.assertEqual(bias3b_id, bias3b.id) 

451 

452 # Allow a fully-specified dataId and unnecessary extra information 

453 # that comes from the record. 

454 bias3b_id, _ = butler.get( 

455 "bias", 

456 dataId=dict( 

457 exposure=4, 

458 day_obs=20211114, 

459 seq_num=42, 

460 detector=3, 

461 instrument="Cam1", 

462 ), 

463 collections="calibs", 

464 ) 

465 self.assertEqual(bias3b_id, bias3b.id) 

466 

467 # Extra but inconsistent record values are a problem. 

468 with self.assertRaises(ValueError): 

469 bias3b_id, _ = butler.get( 

470 "bias", 

471 exposure=3, 

472 day_obs=20211114, 

473 seq_num=42, 

474 detector=3, 

475 collections="calibs", 

476 instrument="Cam1", 

477 ) 

478 

479 # Ensure that spurious kwargs cause an exception. 

480 with self.assertRaises(ValueError): 

481 butler.get( 

482 "bias", 

483 {"exposure.obs_id": "four", "immediate": True, "detector.full_name": "Ba"}, 

484 collections="calibs", 

485 instrument="Cam1", 

486 ) 

487 

488 with self.assertRaises(ValueError): 

489 butler.get( 

490 "bias", 

491 day_obs=20211114, 

492 seq_num=42, 

493 raft="B", 

494 name_in_raft="a", 

495 collections="calibs", 

496 instrument="Cam1", 

497 immediate=True, 

498 ) 

499 

500 def testRegistryDefaults(self): 

501 """Test that we can default the collections and some data ID keys when 

502 constructing a butler. 

503 

504 Many tests that use default run already exist in ``test_butler.py``, so 

505 that isn't tested here. And while most of this functionality is 

506 implemented in `Registry`, we test it here instead of 

507 ``daf/butler/tests/registry.py`` because it shouldn't depend on the 

508 database backend at all. 

509 """ 

510 butler = self.makeButler(writeable=True) 

511 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml")) 

512 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", self.datasetsImportFile)) 

513 # Need to actually set defaults later, not at construction, because 

514 # we need to import the instrument before we can use it as a default. 

515 # Don't set a default instrument value for data IDs, because 'Cam1' 

516 # should be inferred by virtue of that being the only value in the 

517 # input collections. 

518 butler.registry.defaults = RegistryDefaults(collections=["imported_g"]) 

519 # Use findDataset without collections or instrument. 

520 ref = butler.registry.findDataset("flat", detector=2, physical_filter="Cam1-G") 

521 # Do the same with Butler.get; this should ultimately invoke a lot of 

522 # the same code, so it's a bit circular, but mostly we're checking that 

523 # it works at all. 

524 dataset_id, _ = butler.get("flat", detector=2, physical_filter="Cam1-G") 

525 self.assertEqual(ref.id, dataset_id) 

526 # Query for datasets. Test defaulting the data ID in both kwargs and 

527 # in the WHERE expression. 

528 queried_refs_1 = set(butler.registry.queryDatasets("flat", detector=2, physical_filter="Cam1-G")) 

529 self.assertEqual({ref}, queried_refs_1) 

530 queried_refs_2 = set( 

531 butler.registry.queryDatasets("flat", where="detector=2 AND physical_filter='Cam1-G'") 

532 ) 

533 self.assertEqual({ref}, queried_refs_2) 

534 # Query for data IDs with a dataset constraint. 

535 queried_data_ids = set( 

536 butler.registry.queryDataIds( 

537 {"instrument", "detector", "physical_filter"}, 

538 datasets={"flat"}, 

539 detector=2, 

540 physical_filter="Cam1-G", 

541 ) 

542 ) 

543 self.assertEqual({ref.dataId}, queried_data_ids) 

544 # Add another instrument to the repo, and a dataset that uses it to 

545 # the `imported_g` collection. 

546 butler.registry.insertDimensionData("instrument", {"name": "Cam2"}) 

547 camera = DatasetType( 

548 "camera", 

549 dimensions=butler.dimensions["instrument"].graph, 

550 storageClass="Camera", 

551 ) 

552 butler.registry.registerDatasetType(camera) 

553 butler.registry.insertDatasets(camera, [{"instrument": "Cam2"}], run="imported_g") 

554 # Initialize a new butler with `imported_g` as its default run. 

555 # This should not have a default instrument, because there are two. 

556 # Pass run instead of collections; this should set both. 

557 butler2 = Butler(butler=butler, run="imported_g") 

558 self.assertEqual(list(butler2.registry.defaults.collections), ["imported_g"]) 

559 self.assertEqual(butler2.registry.defaults.run, "imported_g") 

560 self.assertFalse(butler2.registry.defaults.dataId) 

561 # Initialize a new butler with an instrument default explicitly given. 

562 # Set collections instead of run, which should then be None. 

563 butler3 = Butler(butler=butler, collections=["imported_g"], instrument="Cam2") 

564 self.assertEqual(list(butler3.registry.defaults.collections), ["imported_g"]) 

565 self.assertIsNone(butler3.registry.defaults.run, None) 

566 self.assertEqual(butler3.registry.defaults.dataId.byName(), {"instrument": "Cam2"}) 

567 

568 # Check that repr() does not fail. 

569 defaults = RegistryDefaults(collections=["imported_g"], run="test") 

570 r = repr(defaults) 

571 self.assertIn("collections=('imported_g',)", r) 

572 self.assertIn("run='test'", r) 

573 

574 defaults = RegistryDefaults(run="test", instrument="DummyCam", skypix="pix") 

575 r = repr(defaults) 

576 self.assertIn("skypix='pix'", r) 

577 self.assertIn("instrument='DummyCam'", r) 

578 

579 def testJson(self): 

580 """Test JSON serialization mediated by registry.""" 

581 butler = self.makeButler(writeable=True) 

582 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml")) 

583 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", self.datasetsImportFile)) 

584 # Need to actually set defaults later, not at construction, because 

585 # we need to import the instrument before we can use it as a default. 

586 # Don't set a default instrument value for data IDs, because 'Cam1' 

587 # should be inferred by virtue of that being the only value in the 

588 # input collections. 

589 butler.registry.defaults = RegistryDefaults(collections=["imported_g"]) 

590 # Use findDataset without collections or instrument. 

591 ref = butler.registry.findDataset("flat", detector=2, physical_filter="Cam1-G") 

592 

593 # Transform the ref and dataset type to and from JSON 

594 # and check that it can be reconstructed properly 

595 

596 # Do it with the ref and a component ref in minimal and standard form 

597 compRef = ref.makeComponentRef("wcs") 

598 

599 for test_item in (ref, ref.datasetType, compRef, compRef.datasetType): 

600 for minimal in (False, True): 

601 json_str = test_item.to_json(minimal=minimal) 

602 from_json = type(test_item).from_json(json_str, registry=butler.registry) 

603 self.assertEqual(from_json, test_item, msg=f"From JSON '{json_str}' using registry") 

604 

605 # for minimal=False case also do a test without registry 

606 if not minimal: 

607 from_json = type(test_item).from_json(json_str, universe=butler.dimensions) 

608 self.assertEqual(from_json, test_item, msg=f"From JSON '{json_str}' using universe") 

609 

610 def testJsonDimensionRecordsAndHtmlRepresentation(self): 

611 # Dimension Records 

612 butler = self.makeButler(writeable=True) 

613 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "hsc-rc2-subset.yaml")) 

614 

615 for dimension in ("detector", "visit"): 

616 records = butler.registry.queryDimensionRecords(dimension, instrument="HSC") 

617 for r in records: 

618 for minimal in (True, False): 

619 json_str = r.to_json(minimal=minimal) 

620 r_json = type(r).from_json(json_str, registry=butler.registry) 

621 self.assertEqual(r_json, r) 

622 # check with direct method 

623 simple = r.to_simple() 

624 fromDirect = type(simple).direct(**json.loads(json_str)) 

625 self.assertEqual(simple, fromDirect) 

626 # Also check equality of each of the components as dicts 

627 self.assertEqual(r_json.toDict(), r.toDict()) 

628 

629 # check the html representation of records 

630 r_html = r._repr_html_() 

631 self.assertTrue(isinstance(r_html, str)) 

632 self.assertIn(dimension, r_html) 

633 

634 def testWildcardQueries(self): 

635 """Test that different collection type queries work.""" 

636 

637 # Import data to play with. 

638 butler = self.makeButler(writeable=True) 

639 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml")) 

640 

641 # Create some collections 

642 created = {"collection", "u/user/test", "coll3"} 

643 for collection in created: 

644 butler.registry.registerCollection(collection, type=CollectionType.RUN) 

645 

646 collections = butler.registry.queryCollections() 

647 self.assertEqual(set(collections), created) 

648 

649 expressions = ( 

650 ("collection", {"collection"}), 

651 (..., created), 

652 ("*", created), 

653 (("collection", "*"), created), 

654 ("u/*", {"u/user/test"}), 

655 (re.compile("u.*"), {"u/user/test"}), 

656 (re.compile(".*oll.*"), {"collection", "coll3"}), 

657 ("*oll*", {"collection", "coll3"}), 

658 ((re.compile(r".*\d$"), "u/user/test"), {"coll3", "u/user/test"}), 

659 ("*[0-9]", {"coll3"}), 

660 ) 

661 for expression, expected in expressions: 

662 result = butler.registry.queryCollections(expression) 

663 self.assertEqual(set(result), expected) 

664 

665 

666class SimpleButlerMixedUUIDTestCase(SimpleButlerTestCase): 

667 """Same as SimpleButlerTestCase but uses UUID-based datasets manager and 

668 loads datasets from YAML file with integer IDs. 

669 """ 

670 

671 datasetsImportFile = "datasets.yaml" 

672 

673 

674if __name__ == "__main__": 

675 unittest.main()