Coverage for tests/test_simpleButler.py: 10%

276 statements  

« prev     ^ index     » next       coverage.py v7.4.1, created at 2024-02-13 10:57 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28from __future__ import annotations 

29 

30import json 

31import os 

32import re 

33import tempfile 

34import unittest 

35from typing import Any 

36 

37try: 

38 import numpy as np 

39except ImportError: 

40 np = None 

41 

42import astropy.time 

43from lsst.daf.butler import ( 

44 Butler, 

45 ButlerConfig, 

46 CollectionType, 

47 DataCoordinate, 

48 DatasetId, 

49 DatasetRef, 

50 DatasetType, 

51 StorageClass, 

52 Timespan, 

53) 

54from lsst.daf.butler.datastore.file_templates import FileTemplate 

55from lsst.daf.butler.registry import RegistryConfig, RegistryDefaults, _RegistryFactory 

56from lsst.daf.butler.tests import DatastoreMock 

57from lsst.daf.butler.tests.utils import makeTestTempDir, removeTestTempDir 

58 

59TESTDIR = os.path.abspath(os.path.dirname(__file__)) 

60 

61 

62class SimpleButlerTestCase(unittest.TestCase): 

63 """Tests for butler (including import/export functionality) that should not 

64 depend on the Registry Database backend or Datastore implementation, and 

65 can instead utilize an in-memory SQLite Registry and a mocked Datastore. 

66 """ 

67 

68 datasetsManager = ( 

69 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID" 

70 ) 

71 datasetsImportFile = "datasets.yaml" 

72 

73 def setUp(self): 

74 self.root = makeTestTempDir(TESTDIR) 

75 

76 def tearDown(self): 

77 removeTestTempDir(self.root) 

78 

79 def makeButler(self, **kwargs: Any) -> Butler: 

80 """Return new Butler instance on each call.""" 

81 config = ButlerConfig() 

82 

83 # make separate temporary directory for registry of this instance 

84 tmpdir = tempfile.mkdtemp(dir=self.root) 

85 config["registry", "db"] = f"sqlite:///{tmpdir}/gen3.sqlite3" 

86 config["registry", "managers", "datasets"] = self.datasetsManager 

87 config["root"] = self.root 

88 

89 # have to make a registry first 

90 registryConfig = RegistryConfig(config.get("registry")) 

91 _RegistryFactory(registryConfig).create_from_config() 

92 

93 butler = Butler.from_config(config, **kwargs) 

94 DatastoreMock.apply(butler) 

95 return butler 

96 

97 def comparableRef(self, ref: DatasetRef) -> DatasetRef: 

98 """Return a DatasetRef that can be compared to a DatasetRef from 

99 other repository. 

100 

101 For repositories that do not support round-trip of ID values this 

102 method returns unresolved DatasetRef, for round-trip-safe repos it 

103 returns unchanged ref. 

104 """ 

105 return ref 

106 

107 def testReadBackwardsCompatibility(self): 

108 """Test that we can read an export file written by a previous version 

109 and commit to the daf_butler git repo. 

110 

111 Notes 

112 ----- 

113 At present this export file includes only dimension data, not datasets, 

114 which greatly limits the usefulness of this test. We should address 

115 this at some point, but I think it's best to wait for the changes to 

116 the export format required for CALIBRATION collections to land. 

117 """ 

118 butler = self.makeButler(writeable=True) 

119 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "hsc-rc2-subset.yaml")) 

120 # Spot-check a few things, but the most important test is just that 

121 # the above does not raise. 

122 self.assertGreaterEqual( 

123 {record.id for record in butler.registry.queryDimensionRecords("detector", instrument="HSC")}, 

124 set(range(104)), # should have all science CCDs; may have some focus ones. 

125 ) 

126 self.assertGreaterEqual( 

127 { 

128 (record.id, record.physical_filter) 

129 for record in butler.registry.queryDimensionRecords("visit", instrument="HSC") 

130 }, 

131 { 

132 (27136, "HSC-Z"), 

133 (11694, "HSC-G"), 

134 (23910, "HSC-R"), 

135 (11720, "HSC-Y"), 

136 (23900, "HSC-R"), 

137 (22646, "HSC-Y"), 

138 (1248, "HSC-I"), 

139 (19680, "HSC-I"), 

140 (1240, "HSC-I"), 

141 (424, "HSC-Y"), 

142 (19658, "HSC-I"), 

143 (344, "HSC-Y"), 

144 (1218, "HSC-R"), 

145 (1190, "HSC-Z"), 

146 (23718, "HSC-R"), 

147 (11700, "HSC-G"), 

148 (26036, "HSC-G"), 

149 (23872, "HSC-R"), 

150 (1170, "HSC-Z"), 

151 (1876, "HSC-Y"), 

152 }, 

153 ) 

154 

155 def testDatasetTransfers(self): 

156 """Test exporting all datasets from a repo and then importing them all 

157 back in again. 

158 """ 

159 # Import data to play with. 

160 butler1 = self.makeButler(writeable=True) 

161 butler1.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml")) 

162 butler1.import_(filename=os.path.join(TESTDIR, "data", "registry", self.datasetsImportFile)) 

163 with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml") as file: 

164 # Export all datasets. 

165 with butler1.export(filename=file.name) as exporter: 

166 exporter.saveDatasets(butler1.registry.queryDatasets(..., collections=...)) 

167 # Import it all again. 

168 butler2 = self.makeButler(writeable=True) 

169 butler2.import_(filename=file.name) 

170 datasets1 = list(butler1.registry.queryDatasets(..., collections=...)) 

171 datasets2 = list(butler2.registry.queryDatasets(..., collections=...)) 

172 self.assertTrue(all(isinstance(ref.id, DatasetId) for ref in datasets1)) 

173 self.assertTrue(all(isinstance(ref.id, DatasetId) for ref in datasets2)) 

174 self.assertCountEqual( 

175 [self.comparableRef(ref) for ref in datasets1], 

176 [self.comparableRef(ref) for ref in datasets2], 

177 ) 

178 

179 def testImportTwice(self): 

180 """Test exporting dimension records and datasets from a repo and then 

181 importing them all back in again twice. 

182 """ 

183 # Import data to play with. 

184 butler1 = self.makeButler(writeable=True) 

185 butler1.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml")) 

186 butler1.import_(filename=os.path.join(TESTDIR, "data", "registry", self.datasetsImportFile)) 

187 with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml", delete=False) as file: 

188 # Export all datasets. 

189 with butler1.export(filename=file.name) as exporter: 

190 exporter.saveDatasets(butler1.registry.queryDatasets(..., collections=...)) 

191 butler2 = self.makeButler(writeable=True) 

192 # Import it once. 

193 butler2.import_(filename=file.name) 

194 # Import it again 

195 butler2.import_(filename=file.name) 

196 datasets1 = list(butler1.registry.queryDatasets(..., collections=...)) 

197 datasets2 = list(butler2.registry.queryDatasets(..., collections=...)) 

198 self.assertTrue(all(isinstance(ref.id, DatasetId) for ref in datasets1)) 

199 self.assertTrue(all(isinstance(ref.id, DatasetId) for ref in datasets2)) 

200 self.assertCountEqual( 

201 [self.comparableRef(ref) for ref in datasets1], 

202 [self.comparableRef(ref) for ref in datasets2], 

203 ) 

204 

205 def testCollectionTransfers(self): 

206 """Test exporting and then importing collections of various types.""" 

207 # Populate a registry with some datasets. 

208 butler1 = self.makeButler(writeable=True) 

209 butler1.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml")) 

210 butler1.import_(filename=os.path.join(TESTDIR, "data", "registry", self.datasetsImportFile)) 

211 registry1 = butler1.registry 

212 # Add some more collections. 

213 registry1.registerRun("run1") 

214 registry1.registerCollection("tag1", CollectionType.TAGGED) 

215 registry1.registerCollection("calibration1", CollectionType.CALIBRATION) 

216 registry1.registerCollection("chain1", CollectionType.CHAINED) 

217 registry1.registerCollection("chain2", CollectionType.CHAINED) 

218 registry1.setCollectionChain("chain1", ["tag1", "run1", "chain2"]) 

219 registry1.setCollectionChain("chain2", ["calibration1", "run1"]) 

220 # Associate some datasets into the TAGGED and CALIBRATION collections. 

221 flats1 = list(registry1.queryDatasets("flat", collections=...)) 

222 registry1.associate("tag1", flats1) 

223 t1 = astropy.time.Time("2020-01-01T01:00:00", format="isot", scale="tai") 

224 t2 = astropy.time.Time("2020-01-01T02:00:00", format="isot", scale="tai") 

225 t3 = astropy.time.Time("2020-01-01T03:00:00", format="isot", scale="tai") 

226 bias1a = registry1.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g") 

227 bias2a = registry1.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g") 

228 bias3a = registry1.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g") 

229 bias2b = registry1.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r") 

230 bias3b = registry1.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r") 

231 registry1.certify("calibration1", [bias2a, bias3a], Timespan(t1, t2)) 

232 registry1.certify("calibration1", [bias2b], Timespan(t2, None)) 

233 registry1.certify("calibration1", [bias3b], Timespan(t2, t3)) 

234 registry1.certify("calibration1", [bias1a], Timespan.makeEmpty()) 

235 

236 with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml") as file: 

237 # Export all collections, and some datasets. 

238 with butler1.export(filename=file.name) as exporter: 

239 # Sort results to put chain1 before chain2, which is 

240 # intentionally not topological order. 

241 for collection in sorted(registry1.queryCollections()): 

242 exporter.saveCollection(collection) 

243 exporter.saveDatasets(flats1) 

244 exporter.saveDatasets([bias1a, bias2a, bias2b, bias3a, bias3b]) 

245 # Import them into a new registry. 

246 butler2 = self.makeButler(writeable=True) 

247 butler2.import_(filename=file.name) 

248 registry2 = butler2.registry 

249 # Check that it all round-tripped, starting with the collections 

250 # themselves. 

251 self.assertIs(registry2.getCollectionType("run1"), CollectionType.RUN) 

252 self.assertIs(registry2.getCollectionType("tag1"), CollectionType.TAGGED) 

253 self.assertIs(registry2.getCollectionType("calibration1"), CollectionType.CALIBRATION) 

254 self.assertIs(registry2.getCollectionType("chain1"), CollectionType.CHAINED) 

255 self.assertIs(registry2.getCollectionType("chain2"), CollectionType.CHAINED) 

256 self.assertEqual( 

257 list(registry2.getCollectionChain("chain1")), 

258 ["tag1", "run1", "chain2"], 

259 ) 

260 self.assertEqual( 

261 list(registry2.getCollectionChain("chain2")), 

262 ["calibration1", "run1"], 

263 ) 

264 # Check that tag collection contents are the same. 

265 self.maxDiff = None 

266 self.assertCountEqual( 

267 [self.comparableRef(ref) for ref in registry1.queryDatasets(..., collections="tag1")], 

268 [self.comparableRef(ref) for ref in registry2.queryDatasets(..., collections="tag1")], 

269 ) 

270 # Check that calibration collection contents are the same. 

271 self.assertCountEqual( 

272 [ 

273 (self.comparableRef(assoc.ref), assoc.timespan) 

274 for assoc in registry1.queryDatasetAssociations("bias", collections="calibration1") 

275 ], 

276 [ 

277 (self.comparableRef(assoc.ref), assoc.timespan) 

278 for assoc in registry2.queryDatasetAssociations("bias", collections="calibration1") 

279 ], 

280 ) 

281 

282 def testButlerGet(self): 

283 """Test that butler.get can work with different variants.""" 

284 # Import data to play with. 

285 butler = self.makeButler(writeable=True) 

286 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml")) 

287 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", self.datasetsImportFile)) 

288 

289 # Find the DatasetRef for a flat 

290 coll = "imported_g" 

291 flat2g = butler.find_dataset( 

292 "flat", instrument="Cam1", full_name="Ab", physical_filter="Cam1-G", collections=coll 

293 ) 

294 

295 # Create a numpy integer to check that works fine 

296 detector_np = np.int64(2) if np else 2 

297 

298 # Try to get it using different variations of dataId + keyword 

299 # arguments 

300 # Note that instrument.class_name does not work 

301 variants = ( 

302 (None, {"instrument": "Cam1", "detector": 2, "physical_filter": "Cam1-G"}), 

303 (None, {"instrument": "Cam1", "detector": detector_np, "physical_filter": "Cam1-G"}), 

304 ({"instrument": "Cam1", "detector": 2, "physical_filter": "Cam1-G"}, {}), 

305 ({"instrument": "Cam1", "detector": detector_np, "physical_filter": "Cam1-G"}, {}), 

306 ({"instrument": "Cam1", "detector": 2}, {"physical_filter": "Cam1-G"}), 

307 ({"detector.full_name": "Ab"}, {"instrument": "Cam1", "physical_filter": "Cam1-G"}), 

308 ({"full_name": "Ab"}, {"instrument": "Cam1", "physical_filter": "Cam1-G"}), 

309 (None, {"full_name": "Ab", "instrument": "Cam1", "physical_filter": "Cam1-G"}), 

310 (None, {"detector": "Ab", "instrument": "Cam1", "physical_filter": "Cam1-G"}), 

311 ({"name_in_raft": "b", "raft": "A"}, {"instrument": "Cam1", "physical_filter": "Cam1-G"}), 

312 ({"name_in_raft": "b"}, {"raft": "A", "instrument": "Cam1", "physical_filter": "Cam1-G"}), 

313 (None, {"name_in_raft": "b", "raft": "A", "instrument": "Cam1", "physical_filter": "Cam1-G"}), 

314 ( 

315 {"detector.name_in_raft": "b", "detector.raft": "A"}, 

316 {"instrument": "Cam1", "physical_filter": "Cam1-G"}, 

317 ), 

318 ( 

319 { 

320 "detector.name_in_raft": "b", 

321 "detector.raft": "A", 

322 "instrument": "Cam1", 

323 "physical_filter": "Cam1-G", 

324 }, 

325 {}, 

326 ), 

327 # Duplicate (but valid) information. 

328 (None, {"instrument": "Cam1", "detector": 2, "raft": "A", "physical_filter": "Cam1-G"}), 

329 ({"detector": 2}, {"instrument": "Cam1", "raft": "A", "physical_filter": "Cam1-G"}), 

330 ({"raft": "A"}, {"instrument": "Cam1", "detector": 2, "physical_filter": "Cam1-G"}), 

331 ({"raft": "A"}, {"instrument": "Cam1", "detector": "Ab", "physical_filter": "Cam1-G"}), 

332 ) 

333 

334 for dataId, kwds in variants: 

335 try: 

336 flat_id, _ = butler.get("flat", dataId=dataId, collections=coll, **kwds) 

337 except Exception as e: 

338 e.add_note(f"dataId={dataId}, kwds={kwds}") 

339 raise 

340 self.assertEqual(flat_id, flat2g.id, msg=f"DataId: {dataId}, kwds: {kwds}") 

341 

342 # Check that bad combinations raise. 

343 variants = ( 

344 # Inconsistent detector information. 

345 (None, {"instrument": "Cam1", "detector": 2, "raft": "B", "physical_filter": "Cam1-G"}), 

346 ({"detector": 2}, {"instrument": "Cam1", "raft": "B", "physical_filter": "Cam1-G"}), 

347 ({"detector": 12}, {"instrument": "Cam1", "raft": "B", "physical_filter": "Cam1-G"}), 

348 ({"raft": "B"}, {"instrument": "Cam1", "detector": 2, "physical_filter": "Cam1-G"}), 

349 ({"raft": "B"}, {"instrument": "Cam1", "detector": "Ab", "physical_filter": "Cam1-G"}), 

350 # Under-specified. 

351 ({"raft": "B"}, {"instrument": "Cam1", "physical_filter": "Cam1-G"}), 

352 # Spurious kwargs. 

353 (None, {"instrument": "Cam1", "detector": 2, "physical_filter": "Cam1-G", "x": "y"}), 

354 ({"x": "y"}, {"instrument": "Cam1", "detector": 2, "physical_filter": "Cam1-G"}), 

355 ) 

356 for dataId, kwds in variants: 

357 with self.assertRaises((ValueError, LookupError)): 

358 butler.get("flat", dataId=dataId, collections=coll, **kwds) 

359 

360 def testGetCalibration(self): 

361 """Test that `Butler.get` can be used to fetch from 

362 `~CollectionType.CALIBRATION` collections if the data ID includes 

363 extra dimensions with temporal information. 

364 """ 

365 # Import data to play with. 

366 butler = self.makeButler(writeable=True) 

367 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml")) 

368 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", self.datasetsImportFile)) 

369 # Certify some biases into a CALIBRATION collection. 

370 registry = butler.registry 

371 registry.registerCollection("calibs", CollectionType.CALIBRATION) 

372 t1 = astropy.time.Time("2020-01-01T01:00:00", format="isot", scale="tai") 

373 t2 = astropy.time.Time("2020-01-01T02:00:00", format="isot", scale="tai") 

374 t3 = astropy.time.Time("2020-01-01T03:00:00", format="isot", scale="tai") 

375 bias2a = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g") 

376 bias3a = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g") 

377 bias2b = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r") 

378 bias3b = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r") 

379 registry.certify("calibs", [bias2a, bias3a], Timespan(t1, t2)) 

380 registry.certify("calibs", [bias2b], Timespan(t2, None)) 

381 registry.certify("calibs", [bias3b], Timespan(t2, t3)) 

382 # Insert some exposure dimension data. 

383 registry.insertDimensionData( 

384 "exposure", 

385 { 

386 "instrument": "Cam1", 

387 "id": 3, 

388 "obs_id": "three", 

389 "timespan": Timespan(t1, t2), 

390 "physical_filter": "Cam1-G", 

391 "day_obs": 20201114, 

392 "seq_num": 55, 

393 }, 

394 { 

395 "instrument": "Cam1", 

396 "id": 4, 

397 "obs_id": "four", 

398 "timespan": Timespan(t2, t3), 

399 "physical_filter": "Cam1-G", 

400 "day_obs": 20211114, 

401 "seq_num": 42, 

402 }, 

403 ) 

404 # Get some biases from raw-like data IDs. 

405 bias2a_id, _ = butler.get( 

406 "bias", {"instrument": "Cam1", "exposure": 3, "detector": 2}, collections="calibs" 

407 ) 

408 self.assertEqual(bias2a_id, bias2a.id) 

409 bias3b_id, _ = butler.get( 

410 "bias", {"instrument": "Cam1", "exposure": 4, "detector": 3}, collections="calibs" 

411 ) 

412 self.assertEqual(bias3b_id, bias3b.id) 

413 

414 # Get using the kwarg form 

415 bias3b_id, _ = butler.get("bias", instrument="Cam1", exposure=4, detector=3, collections="calibs") 

416 self.assertEqual(bias3b_id, bias3b.id) 

417 

418 # Do it again but using the record information 

419 bias2a_id, _ = butler.get( 

420 "bias", 

421 {"instrument": "Cam1", "exposure.obs_id": "three", "detector.full_name": "Ab"}, 

422 collections="calibs", 

423 ) 

424 self.assertEqual(bias2a_id, bias2a.id) 

425 bias3b_id, _ = butler.get( 

426 "bias", 

427 {"exposure.obs_id": "four", "detector.full_name": "Ba"}, 

428 collections="calibs", 

429 instrument="Cam1", 

430 ) 

431 self.assertEqual(bias3b_id, bias3b.id) 

432 

433 # And again but this time using the alternate value rather than 

434 # the primary. 

435 bias3b_id, _ = butler.get( 

436 "bias", {"exposure": "four", "detector": "Ba"}, collections="calibs", instrument="Cam1" 

437 ) 

438 self.assertEqual(bias3b_id, bias3b.id) 

439 

440 # And again but this time using the alternate value rather than 

441 # the primary and do it in the keyword arguments. 

442 bias3b_id, _ = butler.get( 

443 "bias", exposure="four", detector="Ba", collections="calibs", instrument="Cam1" 

444 ) 

445 self.assertEqual(bias3b_id, bias3b.id) 

446 

447 # Now with implied record columns 

448 bias3b_id, _ = butler.get( 

449 "bias", 

450 day_obs=20211114, 

451 seq_num=42, 

452 raft="B", 

453 name_in_raft="a", 

454 collections="calibs", 

455 instrument="Cam1", 

456 ) 

457 self.assertEqual(bias3b_id, bias3b.id) 

458 

459 # Allow a fully-specified dataId and unnecessary extra information 

460 # that comes from the record. 

461 bias3b_id, _ = butler.get( 

462 "bias", 

463 dataId=dict( 

464 exposure=4, 

465 day_obs=20211114, 

466 seq_num=42, 

467 detector=3, 

468 instrument="Cam1", 

469 ), 

470 collections="calibs", 

471 ) 

472 self.assertEqual(bias3b_id, bias3b.id) 

473 

474 # Extra but inconsistent record values are a problem. 

475 with self.assertRaises(ValueError): 

476 bias3b_id, _ = butler.get( 

477 "bias", 

478 exposure=3, 

479 day_obs=20211114, 

480 seq_num=42, 

481 detector=3, 

482 collections="calibs", 

483 instrument="Cam1", 

484 ) 

485 

486 # Ensure that spurious kwargs cause an exception. 

487 with self.assertRaises(ValueError): 

488 butler.get( 

489 "bias", 

490 {"exposure.obs_id": "four", "immediate": True, "detector.full_name": "Ba"}, 

491 collections="calibs", 

492 instrument="Cam1", 

493 ) 

494 

495 with self.assertRaises(ValueError): 

496 butler.get( 

497 "bias", 

498 day_obs=20211114, 

499 seq_num=42, 

500 raft="B", 

501 name_in_raft="a", 

502 collections="calibs", 

503 instrument="Cam1", 

504 immediate=True, 

505 ) 

506 

507 def testRegistryDefaults(self): 

508 """Test that we can default the collections and some data ID keys when 

509 constructing a butler. 

510 

511 Many tests that use default run already exist in ``test_butler.py``, so 

512 that isn't tested here. And while most of this functionality is 

513 implemented in `Registry`, we test it here instead of 

514 ``daf/butler/tests/registry.py`` because it shouldn't depend on the 

515 database backend at all. 

516 """ 

517 butler = self.makeButler(writeable=True) 

518 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml")) 

519 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", self.datasetsImportFile)) 

520 # Need to actually set defaults later, not at construction, because 

521 # we need to import the instrument before we can use it as a default. 

522 # Don't set a default instrument value for data IDs, because 'Cam1' 

523 # should be inferred by virtue of that being the only value in the 

524 # input collections. 

525 butler.registry.defaults = RegistryDefaults(collections=["imported_g"]) 

526 # Use findDataset without collections or instrument. 

527 ref = butler.find_dataset("flat", detector=2, physical_filter="Cam1-G") 

528 # Do the same with Butler.get; this should ultimately invoke a lot of 

529 # the same code, so it's a bit circular, but mostly we're checking that 

530 # it works at all. 

531 dataset_id, _ = butler.get("flat", detector=2, physical_filter="Cam1-G") 

532 self.assertEqual(ref.id, dataset_id) 

533 # Query for datasets. Test defaulting the data ID in both kwargs and 

534 # in the WHERE expression. 

535 queried_refs_1 = set(butler.registry.queryDatasets("flat", detector=2, physical_filter="Cam1-G")) 

536 self.assertEqual({ref}, queried_refs_1) 

537 queried_refs_2 = set( 

538 butler.registry.queryDatasets("flat", where="detector=2 AND physical_filter='Cam1-G'") 

539 ) 

540 self.assertEqual({ref}, queried_refs_2) 

541 # Query for data IDs with a dataset constraint. 

542 queried_data_ids = set( 

543 butler.registry.queryDataIds( 

544 {"instrument", "detector", "physical_filter"}, 

545 datasets={"flat"}, 

546 detector=2, 

547 physical_filter="Cam1-G", 

548 ) 

549 ) 

550 self.assertEqual({ref.dataId}, queried_data_ids) 

551 # Add another instrument to the repo, and a dataset that uses it to 

552 # the `imported_g` collection. 

553 butler.registry.insertDimensionData("instrument", {"name": "Cam2"}) 

554 camera = DatasetType( 

555 "camera", 

556 dimensions=butler.dimensions["instrument"].graph, 

557 storageClass="Camera", 

558 ) 

559 butler.registry.registerDatasetType(camera) 

560 butler.registry.insertDatasets(camera, [{"instrument": "Cam2"}], run="imported_g") 

561 # Initialize a new butler with `imported_g` as its default run. 

562 # This should not have a default instrument, because there are two. 

563 # Pass run instead of collections; this should set both. 

564 butler2 = Butler.from_config(butler=butler, run="imported_g") 

565 self.assertEqual(list(butler2.registry.defaults.collections), ["imported_g"]) 

566 self.assertEqual(butler2.registry.defaults.run, "imported_g") 

567 self.assertFalse(butler2.registry.defaults.dataId) 

568 # Initialize a new butler with an instrument default explicitly given. 

569 # Set collections instead of run, which should then be None. 

570 butler3 = Butler.from_config(butler=butler, collections=["imported_g"], instrument="Cam2") 

571 self.assertEqual(list(butler3.registry.defaults.collections), ["imported_g"]) 

572 self.assertIsNone(butler3.registry.defaults.run, None) 

573 self.assertEqual(butler3.registry.defaults.dataId.required, {"instrument": "Cam2"}) 

574 

575 # Check that repr() does not fail. 

576 defaults = RegistryDefaults(collections=["imported_g"], run="test") 

577 r = repr(defaults) 

578 self.assertIn("collections=('imported_g',)", r) 

579 self.assertIn("run='test'", r) 

580 

581 defaults = RegistryDefaults(run="test", instrument="DummyCam", skypix="pix") 

582 r = repr(defaults) 

583 self.assertIn("skypix='pix'", r) 

584 self.assertIn("instrument='DummyCam'", r) 

585 

586 def testJson(self): 

587 """Test JSON serialization mediated by registry.""" 

588 butler = self.makeButler(writeable=True) 

589 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml")) 

590 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", self.datasetsImportFile)) 

591 # Need to actually set defaults later, not at construction, because 

592 # we need to import the instrument before we can use it as a default. 

593 # Don't set a default instrument value for data IDs, because 'Cam1' 

594 # should be inferred by virtue of that being the only value in the 

595 # input collections. 

596 butler.registry.defaults = RegistryDefaults(collections=["imported_g"]) 

597 # Use findDataset without collections or instrument. 

598 ref = butler.find_dataset("flat", detector=2, physical_filter="Cam1-G") 

599 

600 # Transform the ref and dataset type to and from JSON 

601 # and check that it can be reconstructed properly 

602 

603 # Do it with the ref and a component ref in minimal and standard form 

604 compRef = ref.makeComponentRef("wcs") 

605 

606 for test_item in (ref, ref.datasetType, compRef, compRef.datasetType): 

607 for minimal in (False, True): 

608 json_str = test_item.to_json(minimal=minimal) 

609 from_json = type(test_item).from_json(json_str, registry=butler.registry) 

610 self.assertEqual(from_json, test_item, msg=f"From JSON '{json_str}' using registry") 

611 

612 # for minimal=False case also do a test without registry 

613 if not minimal: 

614 from_json = type(test_item).from_json(json_str, universe=butler.dimensions) 

615 self.assertEqual(from_json, test_item, msg=f"From JSON '{json_str}' using universe") 

616 

617 def test_populated_by(self): 

618 """Test that dimension records can find other records.""" 

619 butler = self.makeButler(writeable=True) 

620 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "hsc-rc2-subset.yaml")) 

621 

622 elements = frozenset(element for element in butler.dimensions.elements if element.has_own_table) 

623 

624 # Get a visit-based dataId. 

625 data_ids = set(butler.registry.queryDataIds("visit", visit=1232, instrument="HSC")) 

626 

627 # Request all the records related to it. 

628 records = butler._extract_all_dimension_records_from_data_ids(butler, data_ids, elements) 

629 

630 self.assertIn(butler.dimensions["visit_detector_region"], records, f"Keys: {records.keys()}") 

631 self.assertIn(butler.dimensions["visit_system_membership"], records) 

632 self.assertIn(butler.dimensions["visit_system"], records) 

633 

634 def testJsonDimensionRecordsAndHtmlRepresentation(self): 

635 # Dimension Records 

636 butler = self.makeButler(writeable=True) 

637 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "hsc-rc2-subset.yaml")) 

638 

639 for dimension in ("detector", "visit", "exposure"): 

640 records = butler.registry.queryDimensionRecords(dimension, instrument="HSC") 

641 for r in records: 

642 for minimal in (True, False): 

643 json_str = r.to_json(minimal=minimal) 

644 r_json = type(r).from_json(json_str, registry=butler.registry) 

645 self.assertEqual(r_json, r) 

646 # check with direct method 

647 simple = r.to_simple() 

648 fromDirect = type(simple).direct(**json.loads(json_str)) 

649 self.assertEqual(simple, fromDirect) 

650 # Also check equality of each of the components as dicts 

651 self.assertEqual(r_json.toDict(), r.toDict()) 

652 

653 # check the html representation of records 

654 r_html = r._repr_html_() 

655 self.assertTrue(isinstance(r_html, str)) 

656 self.assertIn(dimension, r_html) 

657 

658 def testWildcardQueries(self): 

659 """Test that different collection type queries work.""" 

660 # Import data to play with. 

661 butler = self.makeButler(writeable=True) 

662 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml")) 

663 

664 # Create some collections 

665 created = {"collection", "u/user/test", "coll3"} 

666 for collection in created: 

667 butler.registry.registerCollection(collection, type=CollectionType.RUN) 

668 

669 collections = butler.registry.queryCollections() 

670 self.assertEqual(set(collections), created) 

671 

672 expressions = ( 

673 ("collection", {"collection"}), 

674 (..., created), 

675 ("*", created), 

676 (("collection", "*"), created), 

677 ("u/*", {"u/user/test"}), 

678 (re.compile("u.*"), {"u/user/test"}), 

679 (re.compile(".*oll.*"), {"collection", "coll3"}), 

680 ("*oll*", {"collection", "coll3"}), 

681 ((re.compile(r".*\d$"), "u/user/test"), {"coll3", "u/user/test"}), 

682 ("*[0-9]", {"coll3"}), 

683 ) 

684 for expression, expected in expressions: 

685 result = butler.registry.queryCollections(expression) 

686 self.assertEqual(set(result), expected) 

687 

688 def test_skypix_templates(self): 

689 """Test that skypix templates can work.""" 

690 # Dimension Records 

691 butler = self.makeButler(writeable=True) 

692 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "hsc-rc2-subset.yaml")) 

693 

694 sc = StorageClass("null") 

695 dataset_type = DatasetType("warp", ("visit", "htm7"), sc, universe=butler.dimensions) 

696 dataId = butler.registry.expandDataId( 

697 DataCoordinate.standardize( 

698 dict(visit=27136, htm7=12345, instrument="HSC"), universe=butler.dimensions 

699 ) 

700 ) 

701 ref = DatasetRef(dataset_type, dataId, run="test") 

702 self.assertTrue(ref.dataId.hasRecords()) 

703 

704 tmplstr = "{run}/{datasetType}/{visit.name}_{skypix}_{htm7}_{skypix.id}_{htm7.id}" 

705 file_template = FileTemplate(tmplstr) 

706 path = file_template.format(ref) 

707 self.assertEqual(path, "test/warp/HSCA02713600_12345_12345_12345_12345") 

708 

709 

710if __name__ == "__main__": 

711 unittest.main()