Coverage for tests/test_simpleButler.py: 10%

263 statements  

« prev     ^ index     » next       coverage.py v7.4.0, created at 2024-01-16 10:44 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28from __future__ import annotations 

29 

30import json 

31import os 

32import re 

33import tempfile 

34import unittest 

35from typing import Any 

36 

37try: 

38 import numpy as np 

39except ImportError: 

40 np = None 

41 

42import astropy.time 

43from lsst.daf.butler import Butler, ButlerConfig, CollectionType, DatasetId, DatasetRef, DatasetType, Timespan 

44from lsst.daf.butler.registry import RegistryConfig, RegistryDefaults, _RegistryFactory 

45from lsst.daf.butler.tests import DatastoreMock 

46from lsst.daf.butler.tests.utils import makeTestTempDir, removeTestTempDir 

47 

48TESTDIR = os.path.abspath(os.path.dirname(__file__)) 

49 

50 

51class SimpleButlerTestCase(unittest.TestCase): 

52 """Tests for butler (including import/export functionality) that should not 

53 depend on the Registry Database backend or Datastore implementation, and 

54 can instead utilize an in-memory SQLite Registry and a mocked Datastore. 

55 """ 

56 

57 datasetsManager = ( 

58 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID" 

59 ) 

60 datasetsImportFile = "datasets.yaml" 

61 

62 def setUp(self): 

63 self.root = makeTestTempDir(TESTDIR) 

64 

65 def tearDown(self): 

66 removeTestTempDir(self.root) 

67 

68 def makeButler(self, **kwargs: Any) -> Butler: 

69 """Return new Butler instance on each call.""" 

70 config = ButlerConfig() 

71 

72 # make separate temporary directory for registry of this instance 

73 tmpdir = tempfile.mkdtemp(dir=self.root) 

74 config["registry", "db"] = f"sqlite:///{tmpdir}/gen3.sqlite3" 

75 config["registry", "managers", "datasets"] = self.datasetsManager 

76 config["root"] = self.root 

77 

78 # have to make a registry first 

79 registryConfig = RegistryConfig(config.get("registry")) 

80 _RegistryFactory(registryConfig).create_from_config() 

81 

82 butler = Butler.from_config(config, **kwargs) 

83 DatastoreMock.apply(butler) 

84 return butler 

85 

86 def comparableRef(self, ref: DatasetRef) -> DatasetRef: 

87 """Return a DatasetRef that can be compared to a DatasetRef from 

88 other repository. 

89 

90 For repositories that do not support round-trip of ID values this 

91 method returns unresolved DatasetRef, for round-trip-safe repos it 

92 returns unchanged ref. 

93 """ 

94 return ref 

95 

96 def testReadBackwardsCompatibility(self): 

97 """Test that we can read an export file written by a previous version 

98 and commit to the daf_butler git repo. 

99 

100 Notes 

101 ----- 

102 At present this export file includes only dimension data, not datasets, 

103 which greatly limits the usefulness of this test. We should address 

104 this at some point, but I think it's best to wait for the changes to 

105 the export format required for CALIBRATION collections to land. 

106 """ 

107 butler = self.makeButler(writeable=True) 

108 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "hsc-rc2-subset.yaml")) 

109 # Spot-check a few things, but the most important test is just that 

110 # the above does not raise. 

111 self.assertGreaterEqual( 

112 {record.id for record in butler.registry.queryDimensionRecords("detector", instrument="HSC")}, 

113 set(range(104)), # should have all science CCDs; may have some focus ones. 

114 ) 

115 self.assertGreaterEqual( 

116 { 

117 (record.id, record.physical_filter) 

118 for record in butler.registry.queryDimensionRecords("visit", instrument="HSC") 

119 }, 

120 { 

121 (27136, "HSC-Z"), 

122 (11694, "HSC-G"), 

123 (23910, "HSC-R"), 

124 (11720, "HSC-Y"), 

125 (23900, "HSC-R"), 

126 (22646, "HSC-Y"), 

127 (1248, "HSC-I"), 

128 (19680, "HSC-I"), 

129 (1240, "HSC-I"), 

130 (424, "HSC-Y"), 

131 (19658, "HSC-I"), 

132 (344, "HSC-Y"), 

133 (1218, "HSC-R"), 

134 (1190, "HSC-Z"), 

135 (23718, "HSC-R"), 

136 (11700, "HSC-G"), 

137 (26036, "HSC-G"), 

138 (23872, "HSC-R"), 

139 (1170, "HSC-Z"), 

140 (1876, "HSC-Y"), 

141 }, 

142 ) 

143 

144 def testDatasetTransfers(self): 

145 """Test exporting all datasets from a repo and then importing them all 

146 back in again. 

147 """ 

148 # Import data to play with. 

149 butler1 = self.makeButler(writeable=True) 

150 butler1.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml")) 

151 butler1.import_(filename=os.path.join(TESTDIR, "data", "registry", self.datasetsImportFile)) 

152 with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml") as file: 

153 # Export all datasets. 

154 with butler1.export(filename=file.name) as exporter: 

155 exporter.saveDatasets(butler1.registry.queryDatasets(..., collections=...)) 

156 # Import it all again. 

157 butler2 = self.makeButler(writeable=True) 

158 butler2.import_(filename=file.name) 

159 datasets1 = list(butler1.registry.queryDatasets(..., collections=...)) 

160 datasets2 = list(butler2.registry.queryDatasets(..., collections=...)) 

161 self.assertTrue(all(isinstance(ref.id, DatasetId) for ref in datasets1)) 

162 self.assertTrue(all(isinstance(ref.id, DatasetId) for ref in datasets2)) 

163 self.assertCountEqual( 

164 [self.comparableRef(ref) for ref in datasets1], 

165 [self.comparableRef(ref) for ref in datasets2], 

166 ) 

167 

168 def testImportTwice(self): 

169 """Test exporting dimension records and datasets from a repo and then 

170 importing them all back in again twice. 

171 """ 

172 # Import data to play with. 

173 butler1 = self.makeButler(writeable=True) 

174 butler1.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml")) 

175 butler1.import_(filename=os.path.join(TESTDIR, "data", "registry", self.datasetsImportFile)) 

176 with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml", delete=False) as file: 

177 # Export all datasets. 

178 with butler1.export(filename=file.name) as exporter: 

179 exporter.saveDatasets(butler1.registry.queryDatasets(..., collections=...)) 

180 butler2 = self.makeButler(writeable=True) 

181 # Import it once. 

182 butler2.import_(filename=file.name) 

183 # Import it again 

184 butler2.import_(filename=file.name) 

185 datasets1 = list(butler1.registry.queryDatasets(..., collections=...)) 

186 datasets2 = list(butler2.registry.queryDatasets(..., collections=...)) 

187 self.assertTrue(all(isinstance(ref.id, DatasetId) for ref in datasets1)) 

188 self.assertTrue(all(isinstance(ref.id, DatasetId) for ref in datasets2)) 

189 self.assertCountEqual( 

190 [self.comparableRef(ref) for ref in datasets1], 

191 [self.comparableRef(ref) for ref in datasets2], 

192 ) 

193 

194 def testCollectionTransfers(self): 

195 """Test exporting and then importing collections of various types.""" 

196 # Populate a registry with some datasets. 

197 butler1 = self.makeButler(writeable=True) 

198 butler1.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml")) 

199 butler1.import_(filename=os.path.join(TESTDIR, "data", "registry", self.datasetsImportFile)) 

200 registry1 = butler1.registry 

201 # Add some more collections. 

202 registry1.registerRun("run1") 

203 registry1.registerCollection("tag1", CollectionType.TAGGED) 

204 registry1.registerCollection("calibration1", CollectionType.CALIBRATION) 

205 registry1.registerCollection("chain1", CollectionType.CHAINED) 

206 registry1.registerCollection("chain2", CollectionType.CHAINED) 

207 registry1.setCollectionChain("chain1", ["tag1", "run1", "chain2"]) 

208 registry1.setCollectionChain("chain2", ["calibration1", "run1"]) 

209 # Associate some datasets into the TAGGED and CALIBRATION collections. 

210 flats1 = list(registry1.queryDatasets("flat", collections=...)) 

211 registry1.associate("tag1", flats1) 

212 t1 = astropy.time.Time("2020-01-01T01:00:00", format="isot", scale="tai") 

213 t2 = astropy.time.Time("2020-01-01T02:00:00", format="isot", scale="tai") 

214 t3 = astropy.time.Time("2020-01-01T03:00:00", format="isot", scale="tai") 

215 bias1a = registry1.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g") 

216 bias2a = registry1.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g") 

217 bias3a = registry1.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g") 

218 bias2b = registry1.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r") 

219 bias3b = registry1.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r") 

220 registry1.certify("calibration1", [bias2a, bias3a], Timespan(t1, t2)) 

221 registry1.certify("calibration1", [bias2b], Timespan(t2, None)) 

222 registry1.certify("calibration1", [bias3b], Timespan(t2, t3)) 

223 registry1.certify("calibration1", [bias1a], Timespan.makeEmpty()) 

224 

225 with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml") as file: 

226 # Export all collections, and some datasets. 

227 with butler1.export(filename=file.name) as exporter: 

228 # Sort results to put chain1 before chain2, which is 

229 # intentionally not topological order. 

230 for collection in sorted(registry1.queryCollections()): 

231 exporter.saveCollection(collection) 

232 exporter.saveDatasets(flats1) 

233 exporter.saveDatasets([bias1a, bias2a, bias2b, bias3a, bias3b]) 

234 # Import them into a new registry. 

235 butler2 = self.makeButler(writeable=True) 

236 butler2.import_(filename=file.name) 

237 registry2 = butler2.registry 

238 # Check that it all round-tripped, starting with the collections 

239 # themselves. 

240 self.assertIs(registry2.getCollectionType("run1"), CollectionType.RUN) 

241 self.assertIs(registry2.getCollectionType("tag1"), CollectionType.TAGGED) 

242 self.assertIs(registry2.getCollectionType("calibration1"), CollectionType.CALIBRATION) 

243 self.assertIs(registry2.getCollectionType("chain1"), CollectionType.CHAINED) 

244 self.assertIs(registry2.getCollectionType("chain2"), CollectionType.CHAINED) 

245 self.assertEqual( 

246 list(registry2.getCollectionChain("chain1")), 

247 ["tag1", "run1", "chain2"], 

248 ) 

249 self.assertEqual( 

250 list(registry2.getCollectionChain("chain2")), 

251 ["calibration1", "run1"], 

252 ) 

253 # Check that tag collection contents are the same. 

254 self.maxDiff = None 

255 self.assertCountEqual( 

256 [self.comparableRef(ref) for ref in registry1.queryDatasets(..., collections="tag1")], 

257 [self.comparableRef(ref) for ref in registry2.queryDatasets(..., collections="tag1")], 

258 ) 

259 # Check that calibration collection contents are the same. 

260 self.assertCountEqual( 

261 [ 

262 (self.comparableRef(assoc.ref), assoc.timespan) 

263 for assoc in registry1.queryDatasetAssociations("bias", collections="calibration1") 

264 ], 

265 [ 

266 (self.comparableRef(assoc.ref), assoc.timespan) 

267 for assoc in registry2.queryDatasetAssociations("bias", collections="calibration1") 

268 ], 

269 ) 

270 

271 def testButlerGet(self): 

272 """Test that butler.get can work with different variants.""" 

273 # Import data to play with. 

274 butler = self.makeButler(writeable=True) 

275 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml")) 

276 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", self.datasetsImportFile)) 

277 

278 # Find the DatasetRef for a flat 

279 coll = "imported_g" 

280 flat2g = butler.find_dataset( 

281 "flat", instrument="Cam1", full_name="Ab", physical_filter="Cam1-G", collections=coll 

282 ) 

283 

284 # Create a numpy integer to check that works fine 

285 detector_np = np.int64(2) if np else 2 

286 

287 # Try to get it using different variations of dataId + keyword 

288 # arguments 

289 # Note that instrument.class_name does not work 

290 variants = ( 

291 (None, {"instrument": "Cam1", "detector": 2, "physical_filter": "Cam1-G"}), 

292 (None, {"instrument": "Cam1", "detector": detector_np, "physical_filter": "Cam1-G"}), 

293 ({"instrument": "Cam1", "detector": 2, "physical_filter": "Cam1-G"}, {}), 

294 ({"instrument": "Cam1", "detector": detector_np, "physical_filter": "Cam1-G"}, {}), 

295 ({"instrument": "Cam1", "detector": 2}, {"physical_filter": "Cam1-G"}), 

296 ({"detector.full_name": "Ab"}, {"instrument": "Cam1", "physical_filter": "Cam1-G"}), 

297 ({"full_name": "Ab"}, {"instrument": "Cam1", "physical_filter": "Cam1-G"}), 

298 (None, {"full_name": "Ab", "instrument": "Cam1", "physical_filter": "Cam1-G"}), 

299 (None, {"detector": "Ab", "instrument": "Cam1", "physical_filter": "Cam1-G"}), 

300 ({"name_in_raft": "b", "raft": "A"}, {"instrument": "Cam1", "physical_filter": "Cam1-G"}), 

301 ({"name_in_raft": "b"}, {"raft": "A", "instrument": "Cam1", "physical_filter": "Cam1-G"}), 

302 (None, {"name_in_raft": "b", "raft": "A", "instrument": "Cam1", "physical_filter": "Cam1-G"}), 

303 ( 

304 {"detector.name_in_raft": "b", "detector.raft": "A"}, 

305 {"instrument": "Cam1", "physical_filter": "Cam1-G"}, 

306 ), 

307 ( 

308 { 

309 "detector.name_in_raft": "b", 

310 "detector.raft": "A", 

311 "instrument": "Cam1", 

312 "physical_filter": "Cam1-G", 

313 }, 

314 {}, 

315 ), 

316 # Duplicate (but valid) information. 

317 (None, {"instrument": "Cam1", "detector": 2, "raft": "A", "physical_filter": "Cam1-G"}), 

318 ({"detector": 2}, {"instrument": "Cam1", "raft": "A", "physical_filter": "Cam1-G"}), 

319 ({"raft": "A"}, {"instrument": "Cam1", "detector": 2, "physical_filter": "Cam1-G"}), 

320 ({"raft": "A"}, {"instrument": "Cam1", "detector": "Ab", "physical_filter": "Cam1-G"}), 

321 ) 

322 

323 for dataId, kwds in variants: 

324 try: 

325 flat_id, _ = butler.get("flat", dataId=dataId, collections=coll, **kwds) 

326 except Exception as e: 

327 e.add_note(f"dataId={dataId}, kwds={kwds}") 

328 raise 

329 self.assertEqual(flat_id, flat2g.id, msg=f"DataId: {dataId}, kwds: {kwds}") 

330 

331 # Check that bad combinations raise. 

332 variants = ( 

333 # Inconsistent detector information. 

334 (None, {"instrument": "Cam1", "detector": 2, "raft": "B", "physical_filter": "Cam1-G"}), 

335 ({"detector": 2}, {"instrument": "Cam1", "raft": "B", "physical_filter": "Cam1-G"}), 

336 ({"detector": 12}, {"instrument": "Cam1", "raft": "B", "physical_filter": "Cam1-G"}), 

337 ({"raft": "B"}, {"instrument": "Cam1", "detector": 2, "physical_filter": "Cam1-G"}), 

338 ({"raft": "B"}, {"instrument": "Cam1", "detector": "Ab", "physical_filter": "Cam1-G"}), 

339 # Under-specified. 

340 ({"raft": "B"}, {"instrument": "Cam1", "physical_filter": "Cam1-G"}), 

341 # Spurious kwargs. 

342 (None, {"instrument": "Cam1", "detector": 2, "physical_filter": "Cam1-G", "x": "y"}), 

343 ({"x": "y"}, {"instrument": "Cam1", "detector": 2, "physical_filter": "Cam1-G"}), 

344 ) 

345 for dataId, kwds in variants: 

346 with self.assertRaises((ValueError, LookupError)): 

347 butler.get("flat", dataId=dataId, collections=coll, **kwds) 

348 

349 def testGetCalibration(self): 

350 """Test that `Butler.get` can be used to fetch from 

351 `~CollectionType.CALIBRATION` collections if the data ID includes 

352 extra dimensions with temporal information. 

353 """ 

354 # Import data to play with. 

355 butler = self.makeButler(writeable=True) 

356 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml")) 

357 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", self.datasetsImportFile)) 

358 # Certify some biases into a CALIBRATION collection. 

359 registry = butler.registry 

360 registry.registerCollection("calibs", CollectionType.CALIBRATION) 

361 t1 = astropy.time.Time("2020-01-01T01:00:00", format="isot", scale="tai") 

362 t2 = astropy.time.Time("2020-01-01T02:00:00", format="isot", scale="tai") 

363 t3 = astropy.time.Time("2020-01-01T03:00:00", format="isot", scale="tai") 

364 bias2a = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g") 

365 bias3a = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g") 

366 bias2b = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r") 

367 bias3b = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r") 

368 registry.certify("calibs", [bias2a, bias3a], Timespan(t1, t2)) 

369 registry.certify("calibs", [bias2b], Timespan(t2, None)) 

370 registry.certify("calibs", [bias3b], Timespan(t2, t3)) 

371 # Insert some exposure dimension data. 

372 registry.insertDimensionData( 

373 "exposure", 

374 { 

375 "instrument": "Cam1", 

376 "id": 3, 

377 "obs_id": "three", 

378 "timespan": Timespan(t1, t2), 

379 "physical_filter": "Cam1-G", 

380 "day_obs": 20201114, 

381 "seq_num": 55, 

382 }, 

383 { 

384 "instrument": "Cam1", 

385 "id": 4, 

386 "obs_id": "four", 

387 "timespan": Timespan(t2, t3), 

388 "physical_filter": "Cam1-G", 

389 "day_obs": 20211114, 

390 "seq_num": 42, 

391 }, 

392 ) 

393 # Get some biases from raw-like data IDs. 

394 bias2a_id, _ = butler.get( 

395 "bias", {"instrument": "Cam1", "exposure": 3, "detector": 2}, collections="calibs" 

396 ) 

397 self.assertEqual(bias2a_id, bias2a.id) 

398 bias3b_id, _ = butler.get( 

399 "bias", {"instrument": "Cam1", "exposure": 4, "detector": 3}, collections="calibs" 

400 ) 

401 self.assertEqual(bias3b_id, bias3b.id) 

402 

403 # Get using the kwarg form 

404 bias3b_id, _ = butler.get("bias", instrument="Cam1", exposure=4, detector=3, collections="calibs") 

405 self.assertEqual(bias3b_id, bias3b.id) 

406 

407 # Do it again but using the record information 

408 bias2a_id, _ = butler.get( 

409 "bias", 

410 {"instrument": "Cam1", "exposure.obs_id": "three", "detector.full_name": "Ab"}, 

411 collections="calibs", 

412 ) 

413 self.assertEqual(bias2a_id, bias2a.id) 

414 bias3b_id, _ = butler.get( 

415 "bias", 

416 {"exposure.obs_id": "four", "detector.full_name": "Ba"}, 

417 collections="calibs", 

418 instrument="Cam1", 

419 ) 

420 self.assertEqual(bias3b_id, bias3b.id) 

421 

422 # And again but this time using the alternate value rather than 

423 # the primary. 

424 bias3b_id, _ = butler.get( 

425 "bias", {"exposure": "four", "detector": "Ba"}, collections="calibs", instrument="Cam1" 

426 ) 

427 self.assertEqual(bias3b_id, bias3b.id) 

428 

429 # And again but this time using the alternate value rather than 

430 # the primary and do it in the keyword arguments. 

431 bias3b_id, _ = butler.get( 

432 "bias", exposure="four", detector="Ba", collections="calibs", instrument="Cam1" 

433 ) 

434 self.assertEqual(bias3b_id, bias3b.id) 

435 

436 # Now with implied record columns 

437 bias3b_id, _ = butler.get( 

438 "bias", 

439 day_obs=20211114, 

440 seq_num=42, 

441 raft="B", 

442 name_in_raft="a", 

443 collections="calibs", 

444 instrument="Cam1", 

445 ) 

446 self.assertEqual(bias3b_id, bias3b.id) 

447 

448 # Allow a fully-specified dataId and unnecessary extra information 

449 # that comes from the record. 

450 bias3b_id, _ = butler.get( 

451 "bias", 

452 dataId=dict( 

453 exposure=4, 

454 day_obs=20211114, 

455 seq_num=42, 

456 detector=3, 

457 instrument="Cam1", 

458 ), 

459 collections="calibs", 

460 ) 

461 self.assertEqual(bias3b_id, bias3b.id) 

462 

463 # Extra but inconsistent record values are a problem. 

464 with self.assertRaises(ValueError): 

465 bias3b_id, _ = butler.get( 

466 "bias", 

467 exposure=3, 

468 day_obs=20211114, 

469 seq_num=42, 

470 detector=3, 

471 collections="calibs", 

472 instrument="Cam1", 

473 ) 

474 

475 # Ensure that spurious kwargs cause an exception. 

476 with self.assertRaises(ValueError): 

477 butler.get( 

478 "bias", 

479 {"exposure.obs_id": "four", "immediate": True, "detector.full_name": "Ba"}, 

480 collections="calibs", 

481 instrument="Cam1", 

482 ) 

483 

484 with self.assertRaises(ValueError): 

485 butler.get( 

486 "bias", 

487 day_obs=20211114, 

488 seq_num=42, 

489 raft="B", 

490 name_in_raft="a", 

491 collections="calibs", 

492 instrument="Cam1", 

493 immediate=True, 

494 ) 

495 

496 def testRegistryDefaults(self): 

497 """Test that we can default the collections and some data ID keys when 

498 constructing a butler. 

499 

500 Many tests that use default run already exist in ``test_butler.py``, so 

501 that isn't tested here. And while most of this functionality is 

502 implemented in `Registry`, we test it here instead of 

503 ``daf/butler/tests/registry.py`` because it shouldn't depend on the 

504 database backend at all. 

505 """ 

506 butler = self.makeButler(writeable=True) 

507 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml")) 

508 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", self.datasetsImportFile)) 

509 # Need to actually set defaults later, not at construction, because 

510 # we need to import the instrument before we can use it as a default. 

511 # Don't set a default instrument value for data IDs, because 'Cam1' 

512 # should be inferred by virtue of that being the only value in the 

513 # input collections. 

514 butler.registry.defaults = RegistryDefaults(collections=["imported_g"]) 

515 # Use findDataset without collections or instrument. 

516 ref = butler.find_dataset("flat", detector=2, physical_filter="Cam1-G") 

517 # Do the same with Butler.get; this should ultimately invoke a lot of 

518 # the same code, so it's a bit circular, but mostly we're checking that 

519 # it works at all. 

520 dataset_id, _ = butler.get("flat", detector=2, physical_filter="Cam1-G") 

521 self.assertEqual(ref.id, dataset_id) 

522 # Query for datasets. Test defaulting the data ID in both kwargs and 

523 # in the WHERE expression. 

524 queried_refs_1 = set(butler.registry.queryDatasets("flat", detector=2, physical_filter="Cam1-G")) 

525 self.assertEqual({ref}, queried_refs_1) 

526 queried_refs_2 = set( 

527 butler.registry.queryDatasets("flat", where="detector=2 AND physical_filter='Cam1-G'") 

528 ) 

529 self.assertEqual({ref}, queried_refs_2) 

530 # Query for data IDs with a dataset constraint. 

531 queried_data_ids = set( 

532 butler.registry.queryDataIds( 

533 {"instrument", "detector", "physical_filter"}, 

534 datasets={"flat"}, 

535 detector=2, 

536 physical_filter="Cam1-G", 

537 ) 

538 ) 

539 self.assertEqual({ref.dataId}, queried_data_ids) 

540 # Add another instrument to the repo, and a dataset that uses it to 

541 # the `imported_g` collection. 

542 butler.registry.insertDimensionData("instrument", {"name": "Cam2"}) 

543 camera = DatasetType( 

544 "camera", 

545 dimensions=butler.dimensions["instrument"].graph, 

546 storageClass="Camera", 

547 ) 

548 butler.registry.registerDatasetType(camera) 

549 butler.registry.insertDatasets(camera, [{"instrument": "Cam2"}], run="imported_g") 

550 # Initialize a new butler with `imported_g` as its default run. 

551 # This should not have a default instrument, because there are two. 

552 # Pass run instead of collections; this should set both. 

553 butler2 = Butler.from_config(butler=butler, run="imported_g") 

554 self.assertEqual(list(butler2.registry.defaults.collections), ["imported_g"]) 

555 self.assertEqual(butler2.registry.defaults.run, "imported_g") 

556 self.assertFalse(butler2.registry.defaults.dataId) 

557 # Initialize a new butler with an instrument default explicitly given. 

558 # Set collections instead of run, which should then be None. 

559 butler3 = Butler.from_config(butler=butler, collections=["imported_g"], instrument="Cam2") 

560 self.assertEqual(list(butler3.registry.defaults.collections), ["imported_g"]) 

561 self.assertIsNone(butler3.registry.defaults.run, None) 

562 self.assertEqual(butler3.registry.defaults.dataId.required, {"instrument": "Cam2"}) 

563 

564 # Check that repr() does not fail. 

565 defaults = RegistryDefaults(collections=["imported_g"], run="test") 

566 r = repr(defaults) 

567 self.assertIn("collections=('imported_g',)", r) 

568 self.assertIn("run='test'", r) 

569 

570 defaults = RegistryDefaults(run="test", instrument="DummyCam", skypix="pix") 

571 r = repr(defaults) 

572 self.assertIn("skypix='pix'", r) 

573 self.assertIn("instrument='DummyCam'", r) 

574 

575 def testJson(self): 

576 """Test JSON serialization mediated by registry.""" 

577 butler = self.makeButler(writeable=True) 

578 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml")) 

579 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", self.datasetsImportFile)) 

580 # Need to actually set defaults later, not at construction, because 

581 # we need to import the instrument before we can use it as a default. 

582 # Don't set a default instrument value for data IDs, because 'Cam1' 

583 # should be inferred by virtue of that being the only value in the 

584 # input collections. 

585 butler.registry.defaults = RegistryDefaults(collections=["imported_g"]) 

586 # Use findDataset without collections or instrument. 

587 ref = butler.find_dataset("flat", detector=2, physical_filter="Cam1-G") 

588 

589 # Transform the ref and dataset type to and from JSON 

590 # and check that it can be reconstructed properly 

591 

592 # Do it with the ref and a component ref in minimal and standard form 

593 compRef = ref.makeComponentRef("wcs") 

594 

595 for test_item in (ref, ref.datasetType, compRef, compRef.datasetType): 

596 for minimal in (False, True): 

597 json_str = test_item.to_json(minimal=minimal) 

598 from_json = type(test_item).from_json(json_str, registry=butler.registry) 

599 self.assertEqual(from_json, test_item, msg=f"From JSON '{json_str}' using registry") 

600 

601 # for minimal=False case also do a test without registry 

602 if not minimal: 

603 from_json = type(test_item).from_json(json_str, universe=butler.dimensions) 

604 self.assertEqual(from_json, test_item, msg=f"From JSON '{json_str}' using universe") 

605 

606 def test_populated_by(self): 

607 """Test that dimension records can find other records.""" 

608 butler = self.makeButler(writeable=True) 

609 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "hsc-rc2-subset.yaml")) 

610 

611 elements = frozenset(element for element in butler.dimensions.elements if element.has_own_table) 

612 

613 # Get a visit-based dataId. 

614 data_ids = set(butler.registry.queryDataIds("visit", visit=1232, instrument="HSC")) 

615 

616 # Request all the records related to it. 

617 records = butler._extract_all_dimension_records_from_data_ids(butler, data_ids, elements) 

618 

619 self.assertIn(butler.dimensions["visit_detector_region"], records, f"Keys: {records.keys()}") 

620 self.assertIn(butler.dimensions["visit_system_membership"], records) 

621 self.assertIn(butler.dimensions["visit_system"], records) 

622 

623 def testJsonDimensionRecordsAndHtmlRepresentation(self): 

624 # Dimension Records 

625 butler = self.makeButler(writeable=True) 

626 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "hsc-rc2-subset.yaml")) 

627 

628 for dimension in ("detector", "visit"): 

629 records = butler.registry.queryDimensionRecords(dimension, instrument="HSC") 

630 for r in records: 

631 for minimal in (True, False): 

632 json_str = r.to_json(minimal=minimal) 

633 r_json = type(r).from_json(json_str, registry=butler.registry) 

634 self.assertEqual(r_json, r) 

635 # check with direct method 

636 simple = r.to_simple() 

637 fromDirect = type(simple).direct(**json.loads(json_str)) 

638 self.assertEqual(simple, fromDirect) 

639 # Also check equality of each of the components as dicts 

640 self.assertEqual(r_json.toDict(), r.toDict()) 

641 

642 # check the html representation of records 

643 r_html = r._repr_html_() 

644 self.assertTrue(isinstance(r_html, str)) 

645 self.assertIn(dimension, r_html) 

646 

647 def testWildcardQueries(self): 

648 """Test that different collection type queries work.""" 

649 # Import data to play with. 

650 butler = self.makeButler(writeable=True) 

651 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml")) 

652 

653 # Create some collections 

654 created = {"collection", "u/user/test", "coll3"} 

655 for collection in created: 

656 butler.registry.registerCollection(collection, type=CollectionType.RUN) 

657 

658 collections = butler.registry.queryCollections() 

659 self.assertEqual(set(collections), created) 

660 

661 expressions = ( 

662 ("collection", {"collection"}), 

663 (..., created), 

664 ("*", created), 

665 (("collection", "*"), created), 

666 ("u/*", {"u/user/test"}), 

667 (re.compile("u.*"), {"u/user/test"}), 

668 (re.compile(".*oll.*"), {"collection", "coll3"}), 

669 ("*oll*", {"collection", "coll3"}), 

670 ((re.compile(r".*\d$"), "u/user/test"), {"coll3", "u/user/test"}), 

671 ("*[0-9]", {"coll3"}), 

672 ) 

673 for expression, expected in expressions: 

674 result = butler.registry.queryCollections(expression) 

675 self.assertEqual(set(result), expected) 

676 

677 

678if __name__ == "__main__": 

679 unittest.main()