Coverage for tests/test_simpleButler.py: 10%

285 statements  

« prev     ^ index     » next       coverage.py v7.4.3, created at 2024-03-12 10:07 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28from __future__ import annotations 

29 

30import json 

31import os 

32import re 

33import tempfile 

34import unittest 

35from typing import Any 

36 

37try: 

38 import numpy as np 

39except ImportError: 

40 np = None 

41 

42import astropy.time 

43from lsst.daf.butler import ( 

44 Butler, 

45 ButlerConfig, 

46 CollectionType, 

47 DataCoordinate, 

48 DatasetId, 

49 DatasetRef, 

50 DatasetType, 

51 StorageClass, 

52 Timespan, 

53) 

54from lsst.daf.butler.datastore.file_templates import FileTemplate 

55from lsst.daf.butler.registry import RegistryConfig, RegistryDefaults, _RegistryFactory 

56from lsst.daf.butler.tests import DatastoreMock 

57from lsst.daf.butler.tests.utils import makeTestTempDir, removeTestTempDir 

58 

59TESTDIR = os.path.abspath(os.path.dirname(__file__)) 

60 

61 

62class SimpleButlerTestCase(unittest.TestCase): 

63 """Tests for butler (including import/export functionality) that should not 

64 depend on the Registry Database backend or Datastore implementation, and 

65 can instead utilize an in-memory SQLite Registry and a mocked Datastore. 

66 """ 

67 

68 datasetsManager = ( 

69 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID" 

70 ) 

71 datasetsImportFile = "datasets.yaml" 

72 

73 def setUp(self): 

74 self.root = makeTestTempDir(TESTDIR) 

75 

76 def tearDown(self): 

77 removeTestTempDir(self.root) 

78 

79 def makeButler(self, **kwargs: Any) -> Butler: 

80 """Return new Butler instance on each call.""" 

81 config = ButlerConfig() 

82 

83 # make separate temporary directory for registry of this instance 

84 tmpdir = tempfile.mkdtemp(dir=self.root) 

85 config["registry", "db"] = f"sqlite:///{tmpdir}/gen3.sqlite3" 

86 config["registry", "managers", "datasets"] = self.datasetsManager 

87 config["root"] = self.root 

88 

89 # have to make a registry first 

90 registryConfig = RegistryConfig(config.get("registry")) 

91 _RegistryFactory(registryConfig).create_from_config() 

92 

93 butler = Butler.from_config(config, **kwargs) 

94 DatastoreMock.apply(butler) 

95 return butler 

96 

97 def comparableRef(self, ref: DatasetRef) -> DatasetRef: 

98 """Return a DatasetRef that can be compared to a DatasetRef from 

99 other repository. 

100 

101 For repositories that do not support round-trip of ID values this 

102 method returns unresolved DatasetRef, for round-trip-safe repos it 

103 returns unchanged ref. 

104 """ 

105 return ref 

106 

107 def testReadBackwardsCompatibility(self): 

108 """Test that we can read an export file written by a previous version 

109 and commit to the daf_butler git repo. 

110 

111 Notes 

112 ----- 

113 At present this export file includes only dimension data, not datasets, 

114 which greatly limits the usefulness of this test. We should address 

115 this at some point, but I think it's best to wait for the changes to 

116 the export format required for CALIBRATION collections to land. 

117 """ 

118 butler = self.makeButler(writeable=True) 

119 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "hsc-rc2-subset.yaml")) 

120 # Spot-check a few things, but the most important test is just that 

121 # the above does not raise. 

122 self.assertGreaterEqual( 

123 {record.id for record in butler.registry.queryDimensionRecords("detector", instrument="HSC")}, 

124 set(range(104)), # should have all science CCDs; may have some focus ones. 

125 ) 

126 self.assertGreaterEqual( 

127 { 

128 (record.id, record.physical_filter) 

129 for record in butler.registry.queryDimensionRecords("visit", instrument="HSC") 

130 }, 

131 { 

132 (27136, "HSC-Z"), 

133 (11694, "HSC-G"), 

134 (23910, "HSC-R"), 

135 (11720, "HSC-Y"), 

136 (23900, "HSC-R"), 

137 (22646, "HSC-Y"), 

138 (1248, "HSC-I"), 

139 (19680, "HSC-I"), 

140 (1240, "HSC-I"), 

141 (424, "HSC-Y"), 

142 (19658, "HSC-I"), 

143 (344, "HSC-Y"), 

144 (1218, "HSC-R"), 

145 (1190, "HSC-Z"), 

146 (23718, "HSC-R"), 

147 (11700, "HSC-G"), 

148 (26036, "HSC-G"), 

149 (23872, "HSC-R"), 

150 (1170, "HSC-Z"), 

151 (1876, "HSC-Y"), 

152 }, 

153 ) 

154 

155 def testDatasetTransfers(self): 

156 """Test exporting all datasets from a repo and then importing them all 

157 back in again. 

158 """ 

159 # Import data to play with. 

160 butler1 = self.makeButler(writeable=True) 

161 butler1.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml")) 

162 butler1.import_(filename=os.path.join(TESTDIR, "data", "registry", self.datasetsImportFile)) 

163 with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml") as file: 

164 # Export all datasets. 

165 with butler1.export(filename=file.name) as exporter: 

166 exporter.saveDatasets(butler1.registry.queryDatasets(..., collections=...)) 

167 # Import it all again. 

168 butler2 = self.makeButler(writeable=True) 

169 butler2.import_(filename=file.name) 

170 datasets1 = list(butler1.registry.queryDatasets(..., collections=...)) 

171 datasets2 = list(butler2.registry.queryDatasets(..., collections=...)) 

172 self.assertTrue(all(isinstance(ref.id, DatasetId) for ref in datasets1)) 

173 self.assertTrue(all(isinstance(ref.id, DatasetId) for ref in datasets2)) 

174 self.assertCountEqual( 

175 [self.comparableRef(ref) for ref in datasets1], 

176 [self.comparableRef(ref) for ref in datasets2], 

177 ) 

178 

179 def testImportTwice(self): 

180 """Test exporting dimension records and datasets from a repo and then 

181 importing them all back in again twice. 

182 """ 

183 # Import data to play with. 

184 butler1 = self.makeButler(writeable=True) 

185 butler1.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml")) 

186 butler1.import_(filename=os.path.join(TESTDIR, "data", "registry", self.datasetsImportFile)) 

187 with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml", delete=False) as file: 

188 # Export all datasets. 

189 with butler1.export(filename=file.name) as exporter: 

190 exporter.saveDatasets(butler1.registry.queryDatasets(..., collections=...)) 

191 butler2 = self.makeButler(writeable=True) 

192 # Import it once. 

193 butler2.import_(filename=file.name) 

194 # Import it again 

195 butler2.import_(filename=file.name) 

196 datasets1 = list(butler1.registry.queryDatasets(..., collections=...)) 

197 datasets2 = list(butler2.registry.queryDatasets(..., collections=...)) 

198 self.assertTrue(all(isinstance(ref.id, DatasetId) for ref in datasets1)) 

199 self.assertTrue(all(isinstance(ref.id, DatasetId) for ref in datasets2)) 

200 self.assertCountEqual( 

201 [self.comparableRef(ref) for ref in datasets1], 

202 [self.comparableRef(ref) for ref in datasets2], 

203 ) 

204 

205 def testCollectionTransfers(self): 

206 """Test exporting and then importing collections of various types.""" 

207 # Populate a registry with some datasets. 

208 butler1 = self.makeButler(writeable=True) 

209 butler1.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml")) 

210 butler1.import_(filename=os.path.join(TESTDIR, "data", "registry", self.datasetsImportFile)) 

211 registry1 = butler1.registry 

212 # Add some more collections. 

213 registry1.registerRun("run1") 

214 registry1.registerCollection("tag1", CollectionType.TAGGED) 

215 registry1.registerCollection("calibration1", CollectionType.CALIBRATION) 

216 registry1.registerCollection("chain1", CollectionType.CHAINED) 

217 registry1.registerCollection("chain2", CollectionType.CHAINED) 

218 registry1.setCollectionChain("chain1", ["tag1", "run1", "chain2"]) 

219 registry1.setCollectionChain("chain2", ["calibration1", "run1"]) 

220 # Associate some datasets into the TAGGED and CALIBRATION collections. 

221 flats1 = list(registry1.queryDatasets("flat", collections=...)) 

222 registry1.associate("tag1", flats1) 

223 t1 = astropy.time.Time("2020-01-01T01:00:00", format="isot", scale="tai") 

224 t2 = astropy.time.Time("2020-01-01T02:00:00", format="isot", scale="tai") 

225 t3 = astropy.time.Time("2020-01-01T03:00:00", format="isot", scale="tai") 

226 bias1a = registry1.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g") 

227 bias2a = registry1.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g") 

228 bias3a = registry1.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g") 

229 bias2b = registry1.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r") 

230 bias3b = registry1.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r") 

231 registry1.certify("calibration1", [bias2a, bias3a], Timespan(t1, t2)) 

232 registry1.certify("calibration1", [bias2b], Timespan(t2, None)) 

233 registry1.certify("calibration1", [bias3b], Timespan(t2, t3)) 

234 registry1.certify("calibration1", [bias1a], Timespan.makeEmpty()) 

235 

236 with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml") as file: 

237 # Export all collections, and some datasets. 

238 with butler1.export(filename=file.name) as exporter: 

239 # Sort results to put chain1 before chain2, which is 

240 # intentionally not topological order. 

241 for collection in sorted(registry1.queryCollections()): 

242 exporter.saveCollection(collection) 

243 exporter.saveDatasets(flats1) 

244 exporter.saveDatasets([bias1a, bias2a, bias2b, bias3a, bias3b]) 

245 # Import them into a new registry. 

246 butler2 = self.makeButler(writeable=True) 

247 butler2.import_(filename=file.name) 

248 registry2 = butler2.registry 

249 # Check that it all round-tripped, starting with the collections 

250 # themselves. 

251 self.assertIs(registry2.getCollectionType("run1"), CollectionType.RUN) 

252 self.assertIs(registry2.getCollectionType("tag1"), CollectionType.TAGGED) 

253 self.assertIs(registry2.getCollectionType("calibration1"), CollectionType.CALIBRATION) 

254 self.assertIs(registry2.getCollectionType("chain1"), CollectionType.CHAINED) 

255 self.assertIs(registry2.getCollectionType("chain2"), CollectionType.CHAINED) 

256 self.assertEqual( 

257 list(registry2.getCollectionChain("chain1")), 

258 ["tag1", "run1", "chain2"], 

259 ) 

260 self.assertEqual( 

261 list(registry2.getCollectionChain("chain2")), 

262 ["calibration1", "run1"], 

263 ) 

264 # Check that tag collection contents are the same. 

265 self.maxDiff = None 

266 self.assertCountEqual( 

267 [self.comparableRef(ref) for ref in registry1.queryDatasets(..., collections="tag1")], 

268 [self.comparableRef(ref) for ref in registry2.queryDatasets(..., collections="tag1")], 

269 ) 

270 # Check that calibration collection contents are the same. 

271 self.assertCountEqual( 

272 [ 

273 (self.comparableRef(assoc.ref), assoc.timespan) 

274 for assoc in registry1.queryDatasetAssociations("bias", collections="calibration1") 

275 ], 

276 [ 

277 (self.comparableRef(assoc.ref), assoc.timespan) 

278 for assoc in registry2.queryDatasetAssociations("bias", collections="calibration1") 

279 ], 

280 ) 

281 

282 def testButlerGet(self): 

283 """Test that butler.get can work with different variants.""" 

284 # Import data to play with. 

285 butler = self.makeButler(writeable=True) 

286 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml")) 

287 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", self.datasetsImportFile)) 

288 

289 # Find the DatasetRef for a flat 

290 coll = "imported_g" 

291 flat2g = butler.find_dataset( 

292 "flat", instrument="Cam1", full_name="Ab", physical_filter="Cam1-G", collections=coll 

293 ) 

294 

295 # Create a numpy integer to check that works fine 

296 detector_np = np.int64(2) if np else 2 

297 

298 # Try to get it using different variations of dataId + keyword 

299 # arguments 

300 # Note that instrument.class_name does not work 

301 variants = ( 

302 (None, {"instrument": "Cam1", "detector": 2, "physical_filter": "Cam1-G"}), 

303 (None, {"instrument": "Cam1", "detector": detector_np, "physical_filter": "Cam1-G"}), 

304 ({"instrument": "Cam1", "detector": 2, "physical_filter": "Cam1-G"}, {}), 

305 ({"instrument": "Cam1", "detector": detector_np, "physical_filter": "Cam1-G"}, {}), 

306 ({"instrument": "Cam1", "detector": 2}, {"physical_filter": "Cam1-G"}), 

307 ({"detector.full_name": "Ab"}, {"instrument": "Cam1", "physical_filter": "Cam1-G"}), 

308 ({"full_name": "Ab"}, {"instrument": "Cam1", "physical_filter": "Cam1-G"}), 

309 (None, {"full_name": "Ab", "instrument": "Cam1", "physical_filter": "Cam1-G"}), 

310 (None, {"detector": "Ab", "instrument": "Cam1", "physical_filter": "Cam1-G"}), 

311 ({"name_in_raft": "b", "raft": "A"}, {"instrument": "Cam1", "physical_filter": "Cam1-G"}), 

312 ({"name_in_raft": "b"}, {"raft": "A", "instrument": "Cam1", "physical_filter": "Cam1-G"}), 

313 (None, {"name_in_raft": "b", "raft": "A", "instrument": "Cam1", "physical_filter": "Cam1-G"}), 

314 ( 

315 {"detector.name_in_raft": "b", "detector.raft": "A"}, 

316 {"instrument": "Cam1", "physical_filter": "Cam1-G"}, 

317 ), 

318 ( 

319 { 

320 "detector.name_in_raft": "b", 

321 "detector.raft": "A", 

322 "instrument": "Cam1", 

323 "physical_filter": "Cam1-G", 

324 }, 

325 {}, 

326 ), 

327 # Duplicate (but valid) information. 

328 (None, {"instrument": "Cam1", "detector": 2, "raft": "A", "physical_filter": "Cam1-G"}), 

329 ({"detector": 2}, {"instrument": "Cam1", "raft": "A", "physical_filter": "Cam1-G"}), 

330 ({"raft": "A"}, {"instrument": "Cam1", "detector": 2, "physical_filter": "Cam1-G"}), 

331 ({"raft": "A"}, {"instrument": "Cam1", "detector": "Ab", "physical_filter": "Cam1-G"}), 

332 ) 

333 

334 for dataId, kwds in variants: 

335 try: 

336 flat_id, _ = butler.get("flat", dataId=dataId, collections=coll, **kwds) 

337 except Exception as e: 

338 e.add_note(f"dataId={dataId}, kwds={kwds}") 

339 raise 

340 self.assertEqual(flat_id, flat2g.id, msg=f"DataId: {dataId}, kwds: {kwds}") 

341 

342 # Check that bad combinations raise. 

343 variants = ( 

344 # Inconsistent detector information. 

345 (None, {"instrument": "Cam1", "detector": 2, "raft": "B", "physical_filter": "Cam1-G"}), 

346 ({"detector": 2}, {"instrument": "Cam1", "raft": "B", "physical_filter": "Cam1-G"}), 

347 ({"detector": 12}, {"instrument": "Cam1", "raft": "B", "physical_filter": "Cam1-G"}), 

348 ({"raft": "B"}, {"instrument": "Cam1", "detector": 2, "physical_filter": "Cam1-G"}), 

349 ({"raft": "B"}, {"instrument": "Cam1", "detector": "Ab", "physical_filter": "Cam1-G"}), 

350 # Under-specified. 

351 ({"raft": "B"}, {"instrument": "Cam1", "physical_filter": "Cam1-G"}), 

352 # Spurious kwargs. 

353 (None, {"instrument": "Cam1", "detector": 2, "physical_filter": "Cam1-G", "x": "y"}), 

354 ({"x": "y"}, {"instrument": "Cam1", "detector": 2, "physical_filter": "Cam1-G"}), 

355 ) 

356 for dataId, kwds in variants: 

357 with self.assertRaises((ValueError, LookupError)): 

358 butler.get("flat", dataId=dataId, collections=coll, **kwds) 

359 

360 def testGetCalibration(self): 

361 """Test that `Butler.get` can be used to fetch from 

362 `~CollectionType.CALIBRATION` collections if the data ID includes 

363 extra dimensions with temporal information. 

364 """ 

365 # Import data to play with. 

366 butler = self.makeButler(writeable=True) 

367 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml")) 

368 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", self.datasetsImportFile)) 

369 # Certify some biases into a CALIBRATION collection. 

370 registry = butler.registry 

371 registry.registerCollection("calibs", CollectionType.CALIBRATION) 

372 t1 = astropy.time.Time("2020-01-01T01:00:00", format="isot", scale="tai") 

373 t2 = astropy.time.Time("2020-01-01T02:00:00", format="isot", scale="tai") 

374 t3 = astropy.time.Time("2020-01-01T03:00:00", format="isot", scale="tai") 

375 bias2a = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g") 

376 bias3a = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g") 

377 bias2b = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r") 

378 bias3b = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r") 

379 registry.certify("calibs", [bias2a, bias3a], Timespan(t1, t2)) 

380 registry.certify("calibs", [bias2b], Timespan(t2, None)) 

381 registry.certify("calibs", [bias3b], Timespan(t2, t3)) 

382 # Insert some exposure dimension data. 

383 registry.insertDimensionData( 

384 "group", 

385 {"instrument": "Cam1", "group": "three"}, 

386 {"instrument": "Cam1", "group": "four"}, 

387 ) 

388 registry.insertDimensionData( 

389 "day_obs", 

390 {"instrument": "Cam1", "id": 20211114}, 

391 ) 

392 registry.insertDimensionData( 

393 "exposure", 

394 { 

395 "instrument": "Cam1", 

396 "id": 3, 

397 "obs_id": "three", 

398 "timespan": Timespan(t1, t2), 

399 "physical_filter": "Cam1-G", 

400 "group": "three", 

401 "day_obs": 20211114, 

402 "seq_num": 55, 

403 }, 

404 { 

405 "instrument": "Cam1", 

406 "id": 4, 

407 "obs_id": "four", 

408 "timespan": Timespan(t2, t3), 

409 "physical_filter": "Cam1-G", 

410 "group": "four", 

411 "day_obs": 20211114, 

412 "seq_num": 42, 

413 }, 

414 ) 

415 # Get some biases from raw-like data IDs. 

416 bias2a_id, _ = butler.get( 

417 "bias", {"instrument": "Cam1", "exposure": 3, "detector": 2}, collections="calibs" 

418 ) 

419 self.assertEqual(bias2a_id, bias2a.id) 

420 bias3b_id, _ = butler.get( 

421 "bias", {"instrument": "Cam1", "exposure": 4, "detector": 3}, collections="calibs" 

422 ) 

423 self.assertEqual(bias3b_id, bias3b.id) 

424 

425 # Get using the kwarg form 

426 bias3b_id, _ = butler.get("bias", instrument="Cam1", exposure=4, detector=3, collections="calibs") 

427 self.assertEqual(bias3b_id, bias3b.id) 

428 

429 # Do it again but using the record information 

430 bias2a_id, _ = butler.get( 

431 "bias", 

432 {"instrument": "Cam1", "exposure.obs_id": "three", "detector.full_name": "Ab"}, 

433 collections="calibs", 

434 ) 

435 self.assertEqual(bias2a_id, bias2a.id) 

436 bias3b_id, _ = butler.get( 

437 "bias", 

438 {"exposure.obs_id": "four", "detector.full_name": "Ba"}, 

439 collections="calibs", 

440 instrument="Cam1", 

441 ) 

442 self.assertEqual(bias3b_id, bias3b.id) 

443 

444 # And again but this time using the alternate value rather than 

445 # the primary. 

446 bias3b_id, _ = butler.get( 

447 "bias", {"exposure": "four", "detector": "Ba"}, collections="calibs", instrument="Cam1" 

448 ) 

449 self.assertEqual(bias3b_id, bias3b.id) 

450 

451 # And again but this time using the alternate value rather than 

452 # the primary and do it in the keyword arguments. 

453 bias3b_id, _ = butler.get( 

454 "bias", exposure="four", detector="Ba", collections="calibs", instrument="Cam1" 

455 ) 

456 self.assertEqual(bias3b_id, bias3b.id) 

457 

458 # Now with implied record columns 

459 bias3b_id, _ = butler.get( 

460 "bias", 

461 day_obs=20211114, 

462 seq_num=42, 

463 raft="B", 

464 name_in_raft="a", 

465 collections="calibs", 

466 instrument="Cam1", 

467 ) 

468 self.assertEqual(bias3b_id, bias3b.id) 

469 

470 # Allow a fully-specified dataId and unnecessary extra information 

471 # that comes from the record. 

472 bias3b_id, _ = butler.get( 

473 "bias", 

474 dataId=dict( 

475 exposure=4, 

476 day_obs=20211114, 

477 seq_num=42, 

478 detector=3, 

479 instrument="Cam1", 

480 ), 

481 collections="calibs", 

482 ) 

483 self.assertEqual(bias3b_id, bias3b.id) 

484 

485 # Extra but inconsistent record values are a problem. 

486 with self.assertRaises(ValueError): 

487 bias3b_id, _ = butler.get( 

488 "bias", 

489 exposure=3, 

490 day_obs=20211114, 

491 seq_num=42, 

492 detector=3, 

493 collections="calibs", 

494 instrument="Cam1", 

495 ) 

496 

497 # Ensure that spurious kwargs cause an exception. 

498 with self.assertRaises(ValueError): 

499 butler.get( 

500 "bias", 

501 {"exposure.obs_id": "four", "immediate": True, "detector.full_name": "Ba"}, 

502 collections="calibs", 

503 instrument="Cam1", 

504 ) 

505 

506 with self.assertRaises(ValueError): 

507 butler.get( 

508 "bias", 

509 day_obs=20211114, 

510 seq_num=42, 

511 raft="B", 

512 name_in_raft="a", 

513 collections="calibs", 

514 instrument="Cam1", 

515 immediate=True, 

516 ) 

517 

518 def testRegistryDefaults(self): 

519 """Test that we can default the collections and some data ID keys when 

520 constructing a butler. 

521 

522 Many tests that use default run already exist in ``test_butler.py``, so 

523 that isn't tested here. And while most of this functionality is 

524 implemented in `Registry`, we test it here instead of 

525 ``daf/butler/tests/registry.py`` because it shouldn't depend on the 

526 database backend at all. 

527 """ 

528 butler = self.makeButler(writeable=True) 

529 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml")) 

530 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", self.datasetsImportFile)) 

531 # Need to actually set defaults later, not at construction, because 

532 # we need to import the instrument before we can use it as a default. 

533 # Don't set a default instrument value for data IDs, because 'Cam1' 

534 # should be inferred by virtue of that being the only value in the 

535 # input collections. 

536 butler.registry.defaults = RegistryDefaults(collections=["imported_g"]) 

537 # Use findDataset without collections or instrument. 

538 ref = butler.find_dataset("flat", detector=2, physical_filter="Cam1-G") 

539 # Do the same with Butler.get; this should ultimately invoke a lot of 

540 # the same code, so it's a bit circular, but mostly we're checking that 

541 # it works at all. 

542 dataset_id, _ = butler.get("flat", detector=2, physical_filter="Cam1-G") 

543 self.assertEqual(ref.id, dataset_id) 

544 # Query for datasets. Test defaulting the data ID in both kwargs and 

545 # in the WHERE expression. 

546 queried_refs_1 = set(butler.registry.queryDatasets("flat", detector=2, physical_filter="Cam1-G")) 

547 self.assertEqual({ref}, queried_refs_1) 

548 queried_refs_2 = set( 

549 butler.registry.queryDatasets("flat", where="detector=2 AND physical_filter='Cam1-G'") 

550 ) 

551 self.assertEqual({ref}, queried_refs_2) 

552 # Query for data IDs with a dataset constraint. 

553 queried_data_ids = set( 

554 butler.registry.queryDataIds( 

555 {"instrument", "detector", "physical_filter"}, 

556 datasets={"flat"}, 

557 detector=2, 

558 physical_filter="Cam1-G", 

559 ) 

560 ) 

561 self.assertEqual({ref.dataId}, queried_data_ids) 

562 # Add another instrument to the repo, and a dataset that uses it to 

563 # the `imported_g` collection. 

564 butler.registry.insertDimensionData("instrument", {"name": "Cam2"}) 

565 camera = DatasetType( 

566 "camera", 

567 dimensions=butler.dimensions["instrument"].graph, 

568 storageClass="Camera", 

569 ) 

570 butler.registry.registerDatasetType(camera) 

571 butler.registry.insertDatasets(camera, [{"instrument": "Cam2"}], run="imported_g") 

572 # Initialize a new butler with `imported_g` as its default run. 

573 # This should not have a default instrument, because there are two. 

574 # Pass run instead of collections; this should set both. 

575 butler2 = Butler.from_config(butler=butler, run="imported_g") 

576 self.assertEqual(list(butler2.registry.defaults.collections), ["imported_g"]) 

577 self.assertEqual(butler2.registry.defaults.run, "imported_g") 

578 self.assertFalse(butler2.registry.defaults.dataId) 

579 # Initialize a new butler with an instrument default explicitly given. 

580 # Set collections instead of run, which should then be None. 

581 butler3 = Butler.from_config(butler=butler, collections=["imported_g"], instrument="Cam2") 

582 self.assertEqual(list(butler3.registry.defaults.collections), ["imported_g"]) 

583 self.assertIsNone(butler3.registry.defaults.run, None) 

584 self.assertEqual(butler3.registry.defaults.dataId.required, {"instrument": "Cam2"}) 

585 

586 # Check that repr() does not fail. 

587 defaults = RegistryDefaults(collections=["imported_g"], run="test") 

588 r = repr(defaults) 

589 self.assertIn("collections=('imported_g',)", r) 

590 self.assertIn("run='test'", r) 

591 

592 defaults = RegistryDefaults(run="test", instrument="DummyCam", skypix="pix") 

593 r = repr(defaults) 

594 self.assertIn("skypix='pix'", r) 

595 self.assertIn("instrument='DummyCam'", r) 

596 

597 def testJson(self): 

598 """Test JSON serialization mediated by registry.""" 

599 butler = self.makeButler(writeable=True) 

600 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml")) 

601 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", self.datasetsImportFile)) 

602 # Need to actually set defaults later, not at construction, because 

603 # we need to import the instrument before we can use it as a default. 

604 # Don't set a default instrument value for data IDs, because 'Cam1' 

605 # should be inferred by virtue of that being the only value in the 

606 # input collections. 

607 butler.registry.defaults = RegistryDefaults(collections=["imported_g"]) 

608 # Use findDataset without collections or instrument. 

609 ref = butler.find_dataset("flat", detector=2, physical_filter="Cam1-G") 

610 

611 # Transform the ref and dataset type to and from JSON 

612 # and check that it can be reconstructed properly 

613 

614 # Do it with the ref and a component ref in minimal and standard form 

615 compRef = ref.makeComponentRef("wcs") 

616 

617 for test_item in (ref, ref.datasetType, compRef, compRef.datasetType): 

618 for minimal in (False, True): 

619 json_str = test_item.to_json(minimal=minimal) 

620 from_json = type(test_item).from_json(json_str, registry=butler.registry) 

621 self.assertEqual(from_json, test_item, msg=f"From JSON '{json_str}' using registry") 

622 

623 # for minimal=False case also do a test without registry 

624 if not minimal: 

625 from_json = type(test_item).from_json(json_str, universe=butler.dimensions) 

626 self.assertEqual(from_json, test_item, msg=f"From JSON '{json_str}' using universe") 

627 

628 def test_populated_by(self): 

629 """Test that dimension records can find other records.""" 

630 butler = self.makeButler(writeable=True) 

631 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "hsc-rc2-subset.yaml")) 

632 

633 elements = frozenset(element for element in butler.dimensions.elements if element.has_own_table) 

634 

635 # Get a visit-based dataId. 

636 data_ids = set(butler.registry.queryDataIds("visit", visit=1232, instrument="HSC")) 

637 

638 # Request all the records related to it. 

639 records = butler._extract_all_dimension_records_from_data_ids(butler, data_ids, elements) 

640 

641 self.assertIn(butler.dimensions["visit_detector_region"], records, f"Keys: {records.keys()}") 

642 self.assertIn(butler.dimensions["visit_system_membership"], records) 

643 self.assertIn(butler.dimensions["visit_system"], records) 

644 

645 def testJsonDimensionRecordsAndHtmlRepresentation(self): 

646 # Dimension Records 

647 butler = self.makeButler(writeable=True) 

648 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "hsc-rc2-subset.yaml")) 

649 

650 for dimension in ("detector", "visit", "exposure", "day_obs", "group"): 

651 records = butler.registry.queryDimensionRecords(dimension, instrument="HSC") 

652 for r in records: 

653 for minimal in (True, False): 

654 json_str = r.to_json(minimal=minimal) 

655 r_json = type(r).from_json(json_str, registry=butler.registry) 

656 self.assertEqual(r_json, r) 

657 # check with direct method 

658 simple = r.to_simple() 

659 fromDirect = type(simple).direct(**json.loads(json_str)) 

660 self.assertEqual(simple, fromDirect) 

661 # Also check equality of each of the components as dicts 

662 self.assertEqual(r_json.toDict(), r.toDict()) 

663 

664 # check the html representation of records 

665 r_html = r._repr_html_() 

666 self.assertTrue(isinstance(r_html, str)) 

667 self.assertIn(dimension, r_html) 

668 

669 def test_dimension_records_import(self): 

670 # Dimension Records 

671 butler = self.makeButler(writeable=True) 

672 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "hsc-rc2-subset-v0.yaml")) 

673 

674 # Count records and assume this means it worked. 

675 dimensions = ( 

676 ("day_obs", 15), 

677 ("group", 1), 

678 ("exposure", 1), 

679 ("visit", 160), 

680 ("detector", 111), 

681 ("visit_system_membership", 160), 

682 ) 

683 for dimension, count in dimensions: 

684 records = list(butler.registry.queryDimensionRecords(dimension, instrument="HSC")) 

685 self.assertEqual(len(records), count) 

686 

687 def testWildcardQueries(self): 

688 """Test that different collection type queries work.""" 

689 # Import data to play with. 

690 butler = self.makeButler(writeable=True) 

691 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml")) 

692 

693 # Create some collections 

694 created = {"collection", "u/user/test", "coll3"} 

695 for collection in created: 

696 butler.registry.registerCollection(collection, type=CollectionType.RUN) 

697 

698 collections = butler.registry.queryCollections() 

699 self.assertEqual(set(collections), created) 

700 

701 expressions = ( 

702 ("collection", {"collection"}), 

703 (..., created), 

704 ("*", created), 

705 (("collection", "*"), created), 

706 ("u/*", {"u/user/test"}), 

707 (re.compile("u.*"), {"u/user/test"}), 

708 (re.compile(".*oll.*"), {"collection", "coll3"}), 

709 ("*oll*", {"collection", "coll3"}), 

710 ((re.compile(r".*\d$"), "u/user/test"), {"coll3", "u/user/test"}), 

711 ("*[0-9]", {"coll3"}), 

712 ) 

713 for expression, expected in expressions: 

714 result = butler.registry.queryCollections(expression) 

715 self.assertEqual(set(result), expected) 

716 

717 def test_skypix_templates(self): 

718 """Test that skypix templates can work.""" 

719 # Dimension Records 

720 butler = self.makeButler(writeable=True) 

721 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "hsc-rc2-subset.yaml")) 

722 

723 sc = StorageClass("null") 

724 dataset_type = DatasetType("warp", ("visit", "htm7"), sc, universe=butler.dimensions) 

725 dataId = butler.registry.expandDataId( 

726 DataCoordinate.standardize( 

727 dict(visit=27136, htm7=12345, instrument="HSC"), universe=butler.dimensions 

728 ) 

729 ) 

730 ref = DatasetRef(dataset_type, dataId, run="test") 

731 self.assertTrue(ref.dataId.hasRecords()) 

732 

733 tmplstr = "{run}/{datasetType}/{visit.name}_{skypix}_{htm7}_{skypix.id}_{htm7.id}" 

734 file_template = FileTemplate(tmplstr) 

735 path = file_template.format(ref) 

736 self.assertEqual(path, "test/warp/HSCA02713600_12345_12345_12345_12345") 

737 

738 

739if __name__ == "__main__": 

740 unittest.main()