Coverage for tests/test_simpleButler.py: 14%

289 statements  

« prev     ^ index     » next       coverage.py v6.4.2, created at 2022-07-14 22:50 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24import json 

25import os 

26import re 

27import tempfile 

28import unittest 

29import uuid 

30from typing import Any 

31 

32try: 

33 import numpy as np 

34except ImportError: 

35 np = None 

36 

37import astropy.time 

38from lsst.daf.butler import Butler, ButlerConfig, CollectionType, DatasetRef, DatasetType, Registry, Timespan 

39from lsst.daf.butler.registry import ConflictingDefinitionError, RegistryConfig, RegistryDefaults 

40from lsst.daf.butler.tests import DatastoreMock 

41from lsst.daf.butler.tests.utils import makeTestTempDir, removeTestTempDir 

42 

43TESTDIR = os.path.abspath(os.path.dirname(__file__)) 

44 

45 

46class SimpleButlerTestCase(unittest.TestCase): 

47 """Tests for butler (including import/export functionality) that should not 

48 depend on the Registry Database backend or Datastore implementation, and 

49 can instead utilize an in-memory SQLite Registry and a mocked Datastore. 

50 """ 

51 

52 datasetsManager = "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManager" 

53 datasetsImportFile = "datasets.yaml" 

54 datasetsIdType = int 

55 

56 def setUp(self): 

57 self.root = makeTestTempDir(TESTDIR) 

58 

59 def tearDown(self): 

60 removeTestTempDir(self.root) 

61 

62 def makeButler(self, **kwargs: Any) -> Butler: 

63 """Return new Butler instance on each call.""" 

64 config = ButlerConfig() 

65 

66 # make separate temporary directory for registry of this instance 

67 tmpdir = tempfile.mkdtemp(dir=self.root) 

68 config["registry", "db"] = f"sqlite:///{tmpdir}/gen3.sqlite3" 

69 config["registry", "managers", "datasets"] = self.datasetsManager 

70 config["root"] = self.root 

71 

72 # have to make a registry first 

73 registryConfig = RegistryConfig(config.get("registry")) 

74 if self.datasetsIdType is int: 

75 with self.assertWarns(FutureWarning): 

76 Registry.createFromConfig(registryConfig) 

77 else: 

78 Registry.createFromConfig(registryConfig) 

79 

80 butler = Butler(config, **kwargs) 

81 DatastoreMock.apply(butler) 

82 return butler 

83 

84 def comparableRef(self, ref: DatasetRef) -> DatasetRef: 

85 """Return a DatasetRef that can be compared to a DatasetRef from 

86 other repository. 

87 

88 For repositories that do not support round-trip of ID values this 

89 method returns unresolved DatasetRef, for round-trip-safe repos it 

90 returns unchanged ref. 

91 """ 

92 return ref if self.datasetsIdType is uuid.UUID else ref.unresolved() 

93 

94 def testReadBackwardsCompatibility(self): 

95 """Test that we can read an export file written by a previous version 

96 and commit to the daf_butler git repo. 

97 

98 Notes 

99 ----- 

100 At present this export file includes only dimension data, not datasets, 

101 which greatly limits the usefulness of this test. We should address 

102 this at some point, but I think it's best to wait for the changes to 

103 the export format required for CALIBRATION collections to land. 

104 """ 

105 butler = self.makeButler(writeable=True) 

106 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "hsc-rc2-subset.yaml")) 

107 # Spot-check a few things, but the most important test is just that 

108 # the above does not raise. 

109 self.assertGreaterEqual( 

110 set(record.id for record in butler.registry.queryDimensionRecords("detector", instrument="HSC")), 

111 set(range(104)), # should have all science CCDs; may have some focus ones. 

112 ) 

113 self.assertGreaterEqual( 

114 { 

115 (record.id, record.physical_filter) 

116 for record in butler.registry.queryDimensionRecords("visit", instrument="HSC") 

117 }, 

118 { 

119 (27136, "HSC-Z"), 

120 (11694, "HSC-G"), 

121 (23910, "HSC-R"), 

122 (11720, "HSC-Y"), 

123 (23900, "HSC-R"), 

124 (22646, "HSC-Y"), 

125 (1248, "HSC-I"), 

126 (19680, "HSC-I"), 

127 (1240, "HSC-I"), 

128 (424, "HSC-Y"), 

129 (19658, "HSC-I"), 

130 (344, "HSC-Y"), 

131 (1218, "HSC-R"), 

132 (1190, "HSC-Z"), 

133 (23718, "HSC-R"), 

134 (11700, "HSC-G"), 

135 (26036, "HSC-G"), 

136 (23872, "HSC-R"), 

137 (1170, "HSC-Z"), 

138 (1876, "HSC-Y"), 

139 }, 

140 ) 

141 

142 def testDatasetTransfers(self): 

143 """Test exporting all datasets from a repo and then importing them all 

144 back in again. 

145 """ 

146 # Import data to play with. 

147 butler1 = self.makeButler(writeable=True) 

148 butler1.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml")) 

149 butler1.import_(filename=os.path.join(TESTDIR, "data", "registry", self.datasetsImportFile)) 

150 with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml") as file: 

151 # Export all datasets. 

152 with butler1.export(filename=file.name) as exporter: 

153 exporter.saveDatasets(butler1.registry.queryDatasets(..., collections=...)) 

154 # Import it all again. 

155 butler2 = self.makeButler(writeable=True) 

156 butler2.import_(filename=file.name) 

157 datasets1 = list(butler1.registry.queryDatasets(..., collections=...)) 

158 datasets2 = list(butler2.registry.queryDatasets(..., collections=...)) 

159 self.assertTrue(all(isinstance(ref.id, self.datasetsIdType) for ref in datasets1)) 

160 self.assertTrue(all(isinstance(ref.id, self.datasetsIdType) for ref in datasets2)) 

161 self.assertCountEqual( 

162 [self.comparableRef(ref) for ref in datasets1], 

163 [self.comparableRef(ref) for ref in datasets2], 

164 ) 

165 

166 def testComponentExport(self): 

167 """Test exporting component datasets and then importing them. 

168 

169 This test intentionally does not depend on whether just the component 

170 is exported and then imported vs. the full composite dataset, because 

171 I don't want it to assume more than it needs to about the 

172 implementation. 

173 """ 

174 # Import data to play with. 

175 butler1 = self.makeButler(writeable=True) 

176 butler1.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml")) 

177 butler1.import_(filename=os.path.join(TESTDIR, "data", "registry", self.datasetsImportFile)) 

178 with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml") as file: 

179 # Export all datasets. 

180 with butler1.export(filename=file.name) as exporter: 

181 exporter.saveDatasets(butler1.registry.queryDatasets("flat.psf", collections=...)) 

182 # Import it all again. 

183 butler2 = self.makeButler(writeable=True) 

184 butler2.import_(filename=file.name) 

185 datasets1 = list(butler1.registry.queryDatasets("flat.psf", collections=...)) 

186 datasets2 = list(butler2.registry.queryDatasets("flat.psf", collections=...)) 

187 self.assertTrue(all(isinstance(ref.id, self.datasetsIdType) for ref in datasets1)) 

188 self.assertTrue(all(isinstance(ref.id, self.datasetsIdType) for ref in datasets2)) 

189 self.assertCountEqual( 

190 [self.comparableRef(ref) for ref in datasets1], 

191 [self.comparableRef(ref) for ref in datasets2], 

192 ) 

193 

194 def testImportTwice(self): 

195 """Test exporting dimension records and datasets from a repo and then 

196 importing them all back in again twice. 

197 """ 

198 if self.datasetsIdType is not uuid.UUID: 

199 self.skipTest("This test can only work for UUIDs") 

200 # Import data to play with. 

201 butler1 = self.makeButler(writeable=True) 

202 butler1.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml")) 

203 butler1.import_(filename=os.path.join(TESTDIR, "data", "registry", self.datasetsImportFile)) 

204 with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml", delete=False) as file: 

205 # Export all datasets. 

206 with butler1.export(filename=file.name) as exporter: 

207 exporter.saveDatasets(butler1.registry.queryDatasets(..., collections=...)) 

208 butler2 = self.makeButler(writeable=True) 

209 # Import it once. 

210 butler2.import_(filename=file.name) 

211 # Import it again 

212 butler2.import_(filename=file.name) 

213 datasets1 = list(butler1.registry.queryDatasets(..., collections=...)) 

214 datasets2 = list(butler2.registry.queryDatasets(..., collections=...)) 

215 self.assertTrue(all(isinstance(ref.id, self.datasetsIdType) for ref in datasets1)) 

216 self.assertTrue(all(isinstance(ref.id, self.datasetsIdType) for ref in datasets2)) 

217 self.assertCountEqual( 

218 [self.comparableRef(ref) for ref in datasets1], 

219 [self.comparableRef(ref) for ref in datasets2], 

220 ) 

221 

222 def testDatasetImportReuseIds(self): 

223 """Test for import that should preserve dataset IDs. 

224 

225 This test assumes that dataset IDs in datasets YAML are different from 

226 what auto-incremental insert would produce. 

227 """ 

228 if self.datasetsIdType is not int: 

229 self.skipTest("This test can only work for UUIDs") 

230 # Import data to play with. 

231 butler = self.makeButler(writeable=True) 

232 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml")) 

233 filename = os.path.join(TESTDIR, "data", "registry", self.datasetsImportFile) 

234 butler.import_(filename=filename, reuseIds=True) 

235 datasets = list(butler.registry.queryDatasets(..., collections=...)) 

236 self.assertTrue(all(isinstance(ref.id, self.datasetsIdType) for ref in datasets)) 

237 # IDs are copied from YAML, list needs to be updated if file contents 

238 # is changed. 

239 self.assertCountEqual( 

240 [ref.id for ref in datasets], 

241 [1001, 1002, 1003, 1010, 1020, 1030, 2001, 2002, 2003, 2010, 2020, 2030, 2040], 

242 ) 

243 

244 # Try once again, it will raise 

245 with self.assertRaises(ConflictingDefinitionError): 

246 butler.import_(filename=filename, reuseIds=True) 

247 

248 def testCollectionTransfers(self): 

249 """Test exporting and then importing collections of various types.""" 

250 # Populate a registry with some datasets. 

251 butler1 = self.makeButler(writeable=True) 

252 butler1.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml")) 

253 butler1.import_(filename=os.path.join(TESTDIR, "data", "registry", self.datasetsImportFile)) 

254 registry1 = butler1.registry 

255 # Add some more collections. 

256 registry1.registerRun("run1") 

257 registry1.registerCollection("tag1", CollectionType.TAGGED) 

258 registry1.registerCollection("calibration1", CollectionType.CALIBRATION) 

259 registry1.registerCollection("chain1", CollectionType.CHAINED) 

260 registry1.registerCollection("chain2", CollectionType.CHAINED) 

261 registry1.setCollectionChain("chain1", ["tag1", "run1", "chain2"]) 

262 registry1.setCollectionChain("chain2", ["calibration1", "run1"]) 

263 # Associate some datasets into the TAGGED and CALIBRATION collections. 

264 flats1 = list(registry1.queryDatasets("flat", collections=...)) 

265 registry1.associate("tag1", flats1) 

266 t1 = astropy.time.Time("2020-01-01T01:00:00", format="isot", scale="tai") 

267 t2 = astropy.time.Time("2020-01-01T02:00:00", format="isot", scale="tai") 

268 t3 = astropy.time.Time("2020-01-01T03:00:00", format="isot", scale="tai") 

269 bias1a = registry1.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g") 

270 bias2a = registry1.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g") 

271 bias3a = registry1.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g") 

272 bias2b = registry1.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r") 

273 bias3b = registry1.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r") 

274 registry1.certify("calibration1", [bias2a, bias3a], Timespan(t1, t2)) 

275 registry1.certify("calibration1", [bias2b], Timespan(t2, None)) 

276 registry1.certify("calibration1", [bias3b], Timespan(t2, t3)) 

277 registry1.certify("calibration1", [bias1a], Timespan.makeEmpty()) 

278 

279 with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml") as file: 

280 # Export all collections, and some datasets. 

281 with butler1.export(filename=file.name) as exporter: 

282 # Sort results to put chain1 before chain2, which is 

283 # intentionally not topological order. 

284 for collection in sorted(registry1.queryCollections()): 

285 exporter.saveCollection(collection) 

286 exporter.saveDatasets(flats1) 

287 exporter.saveDatasets([bias1a, bias2a, bias2b, bias3a, bias3b]) 

288 # Import them into a new registry. 

289 butler2 = self.makeButler(writeable=True) 

290 butler2.import_(filename=file.name) 

291 registry2 = butler2.registry 

292 # Check that it all round-tripped, starting with the collections 

293 # themselves. 

294 self.assertIs(registry2.getCollectionType("run1"), CollectionType.RUN) 

295 self.assertIs(registry2.getCollectionType("tag1"), CollectionType.TAGGED) 

296 self.assertIs(registry2.getCollectionType("calibration1"), CollectionType.CALIBRATION) 

297 self.assertIs(registry2.getCollectionType("chain1"), CollectionType.CHAINED) 

298 self.assertIs(registry2.getCollectionType("chain2"), CollectionType.CHAINED) 

299 self.assertEqual( 

300 list(registry2.getCollectionChain("chain1")), 

301 ["tag1", "run1", "chain2"], 

302 ) 

303 self.assertEqual( 

304 list(registry2.getCollectionChain("chain2")), 

305 ["calibration1", "run1"], 

306 ) 

307 # Check that tag collection contents are the same. 

308 self.maxDiff = None 

309 self.assertCountEqual( 

310 [self.comparableRef(ref) for ref in registry1.queryDatasets(..., collections="tag1")], 

311 [self.comparableRef(ref) for ref in registry2.queryDatasets(..., collections="tag1")], 

312 ) 

313 # Check that calibration collection contents are the same. 

314 self.assertCountEqual( 

315 [ 

316 (self.comparableRef(assoc.ref), assoc.timespan) 

317 for assoc in registry1.queryDatasetAssociations("bias", collections="calibration1") 

318 ], 

319 [ 

320 (self.comparableRef(assoc.ref), assoc.timespan) 

321 for assoc in registry2.queryDatasetAssociations("bias", collections="calibration1") 

322 ], 

323 ) 

324 

325 def testButlerGet(self): 

326 """Test that butler.get can work with different variants.""" 

327 

328 # Import data to play with. 

329 butler = self.makeButler(writeable=True) 

330 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml")) 

331 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", self.datasetsImportFile)) 

332 

333 # Find the DatasetRef for a flat 

334 coll = "imported_g" 

335 flat2g = butler.registry.findDataset( 

336 "flat", instrument="Cam1", detector=2, physical_filter="Cam1-G", collections=coll 

337 ) 

338 

339 # Create a numpy integer to check that works fine 

340 detector_np = np.int64(2) if np else 2 

341 print(type(detector_np)) 

342 

343 # Try to get it using different variations of dataId + keyword 

344 # arguments 

345 # Note that instrument.class_name does not work 

346 variants = ( 

347 (None, {"instrument": "Cam1", "detector": 2, "physical_filter": "Cam1-G"}), 

348 (None, {"instrument": "Cam1", "detector": detector_np, "physical_filter": "Cam1-G"}), 

349 ({"instrument": "Cam1", "detector": 2, "physical_filter": "Cam1-G"}, {}), 

350 ({"instrument": "Cam1", "detector": detector_np, "physical_filter": "Cam1-G"}, {}), 

351 ({"instrument": "Cam1", "detector": 2}, {"physical_filter": "Cam1-G"}), 

352 ({"detector.full_name": "Ab"}, {"instrument": "Cam1", "physical_filter": "Cam1-G"}), 

353 ({"full_name": "Ab"}, {"instrument": "Cam1", "physical_filter": "Cam1-G"}), 

354 (None, {"full_name": "Ab", "instrument": "Cam1", "physical_filter": "Cam1-G"}), 

355 (None, {"detector": "Ab", "instrument": "Cam1", "physical_filter": "Cam1-G"}), 

356 ({"name_in_raft": "b", "raft": "A"}, {"instrument": "Cam1", "physical_filter": "Cam1-G"}), 

357 ({"name_in_raft": "b"}, {"raft": "A", "instrument": "Cam1", "physical_filter": "Cam1-G"}), 

358 (None, {"name_in_raft": "b", "raft": "A", "instrument": "Cam1", "physical_filter": "Cam1-G"}), 

359 ( 

360 {"detector.name_in_raft": "b", "detector.raft": "A"}, 

361 {"instrument": "Cam1", "physical_filter": "Cam1-G"}, 

362 ), 

363 ( 

364 { 

365 "detector.name_in_raft": "b", 

366 "detector.raft": "A", 

367 "instrument": "Cam1", 

368 "physical_filter": "Cam1-G", 

369 }, 

370 {}, 

371 ), 

372 # Duplicate (but valid) information. 

373 (None, {"instrument": "Cam1", "detector": 2, "raft": "A", "physical_filter": "Cam1-G"}), 

374 ({"detector": 2}, {"instrument": "Cam1", "raft": "A", "physical_filter": "Cam1-G"}), 

375 ({"raft": "A"}, {"instrument": "Cam1", "detector": 2, "physical_filter": "Cam1-G"}), 

376 ({"raft": "A"}, {"instrument": "Cam1", "detector": "Ab", "physical_filter": "Cam1-G"}), 

377 ) 

378 

379 for dataId, kwds in variants: 

380 try: 

381 flat_id, _ = butler.get("flat", dataId=dataId, collections=coll, **kwds) 

382 except Exception as e: 

383 raise type(e)(f"{str(e)}: dataId={dataId}, kwds={kwds}") from e 

384 self.assertEqual(flat_id, flat2g.id, msg=f"DataId: {dataId}, kwds: {kwds}") 

385 

386 # Check that bad combinations raise. 

387 variants = ( 

388 # Inconsistent detector information. 

389 (None, {"instrument": "Cam1", "detector": 2, "raft": "B", "physical_filter": "Cam1-G"}), 

390 ({"detector": 2}, {"instrument": "Cam1", "raft": "B", "physical_filter": "Cam1-G"}), 

391 ({"detector": 12}, {"instrument": "Cam1", "raft": "B", "physical_filter": "Cam1-G"}), 

392 ({"raft": "B"}, {"instrument": "Cam1", "detector": 2, "physical_filter": "Cam1-G"}), 

393 ({"raft": "B"}, {"instrument": "Cam1", "detector": "Ab", "physical_filter": "Cam1-G"}), 

394 # Under-specified. 

395 ({"raft": "B"}, {"instrument": "Cam1", "physical_filter": "Cam1-G"}), 

396 # Spurious kwargs. 

397 (None, {"instrument": "Cam1", "detector": 2, "physical_filter": "Cam1-G", "x": "y"}), 

398 ({"x": "y"}, {"instrument": "Cam1", "detector": 2, "physical_filter": "Cam1-G"}), 

399 ) 

400 for dataId, kwds in variants: 

401 with self.assertRaises(ValueError): 

402 butler.get("flat", dataId=dataId, collections=coll, **kwds) 

403 

404 def testGetCalibration(self): 

405 """Test that `Butler.get` can be used to fetch from 

406 `~CollectionType.CALIBRATION` collections if the data ID includes 

407 extra dimensions with temporal information. 

408 """ 

409 # Import data to play with. 

410 butler = self.makeButler(writeable=True) 

411 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml")) 

412 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", self.datasetsImportFile)) 

413 # Certify some biases into a CALIBRATION collection. 

414 registry = butler.registry 

415 registry.registerCollection("calibs", CollectionType.CALIBRATION) 

416 t1 = astropy.time.Time("2020-01-01T01:00:00", format="isot", scale="tai") 

417 t2 = astropy.time.Time("2020-01-01T02:00:00", format="isot", scale="tai") 

418 t3 = astropy.time.Time("2020-01-01T03:00:00", format="isot", scale="tai") 

419 bias2a = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g") 

420 bias3a = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g") 

421 bias2b = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r") 

422 bias3b = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r") 

423 registry.certify("calibs", [bias2a, bias3a], Timespan(t1, t2)) 

424 registry.certify("calibs", [bias2b], Timespan(t2, None)) 

425 registry.certify("calibs", [bias3b], Timespan(t2, t3)) 

426 # Insert some exposure dimension data. 

427 registry.insertDimensionData( 

428 "exposure", 

429 { 

430 "instrument": "Cam1", 

431 "id": 3, 

432 "obs_id": "three", 

433 "timespan": Timespan(t1, t2), 

434 "physical_filter": "Cam1-G", 

435 "day_obs": 20201114, 

436 "seq_num": 55, 

437 }, 

438 { 

439 "instrument": "Cam1", 

440 "id": 4, 

441 "obs_id": "four", 

442 "timespan": Timespan(t2, t3), 

443 "physical_filter": "Cam1-G", 

444 "day_obs": 20211114, 

445 "seq_num": 42, 

446 }, 

447 ) 

448 # Get some biases from raw-like data IDs. 

449 bias2a_id, _ = butler.get( 

450 "bias", {"instrument": "Cam1", "exposure": 3, "detector": 2}, collections="calibs" 

451 ) 

452 self.assertEqual(bias2a_id, bias2a.id) 

453 bias3b_id, _ = butler.get( 

454 "bias", {"instrument": "Cam1", "exposure": 4, "detector": 3}, collections="calibs" 

455 ) 

456 self.assertEqual(bias3b_id, bias3b.id) 

457 

458 # Get using the kwarg form 

459 bias3b_id, _ = butler.get("bias", instrument="Cam1", exposure=4, detector=3, collections="calibs") 

460 self.assertEqual(bias3b_id, bias3b.id) 

461 

462 # Do it again but using the record information 

463 bias2a_id, _ = butler.get( 

464 "bias", 

465 {"instrument": "Cam1", "exposure.obs_id": "three", "detector.full_name": "Ab"}, 

466 collections="calibs", 

467 ) 

468 self.assertEqual(bias2a_id, bias2a.id) 

469 bias3b_id, _ = butler.get( 

470 "bias", 

471 {"exposure.obs_id": "four", "detector.full_name": "Ba"}, 

472 collections="calibs", 

473 instrument="Cam1", 

474 ) 

475 self.assertEqual(bias3b_id, bias3b.id) 

476 

477 # And again but this time using the alternate value rather than 

478 # the primary. 

479 bias3b_id, _ = butler.get( 

480 "bias", {"exposure": "four", "detector": "Ba"}, collections="calibs", instrument="Cam1" 

481 ) 

482 self.assertEqual(bias3b_id, bias3b.id) 

483 

484 # And again but this time using the alternate value rather than 

485 # the primary and do it in the keyword arguments. 

486 bias3b_id, _ = butler.get( 

487 "bias", exposure="four", detector="Ba", collections="calibs", instrument="Cam1" 

488 ) 

489 self.assertEqual(bias3b_id, bias3b.id) 

490 

491 # Now with implied record columns 

492 bias3b_id, _ = butler.get( 

493 "bias", 

494 day_obs=20211114, 

495 seq_num=42, 

496 raft="B", 

497 name_in_raft="a", 

498 collections="calibs", 

499 instrument="Cam1", 

500 ) 

501 self.assertEqual(bias3b_id, bias3b.id) 

502 

503 # Allow a fully-specified dataId and unnecessary extra information 

504 # that comes from the record. 

505 bias3b_id, _ = butler.get( 

506 "bias", 

507 dataId=dict( 

508 exposure=4, 

509 day_obs=20211114, 

510 seq_num=42, 

511 detector=3, 

512 instrument="Cam1", 

513 ), 

514 collections="calibs", 

515 ) 

516 self.assertEqual(bias3b_id, bias3b.id) 

517 

518 # Extra but inconsistent record values are a problem. 

519 with self.assertRaises(ValueError): 

520 bias3b_id, _ = butler.get( 

521 "bias", 

522 exposure=3, 

523 day_obs=20211114, 

524 seq_num=42, 

525 detector=3, 

526 collections="calibs", 

527 instrument="Cam1", 

528 ) 

529 

530 # Ensure that spurious kwargs cause an exception. 

531 with self.assertRaises(ValueError): 

532 butler.get( 

533 "bias", 

534 {"exposure.obs_id": "four", "immediate": True, "detector.full_name": "Ba"}, 

535 collections="calibs", 

536 instrument="Cam1", 

537 ) 

538 

539 with self.assertRaises(ValueError): 

540 butler.get( 

541 "bias", 

542 day_obs=20211114, 

543 seq_num=42, 

544 raft="B", 

545 name_in_raft="a", 

546 collections="calibs", 

547 instrument="Cam1", 

548 immediate=True, 

549 ) 

550 

551 def testRegistryDefaults(self): 

552 """Test that we can default the collections and some data ID keys when 

553 constructing a butler. 

554 

555 Many tests that use default run already exist in ``test_butler.py``, so 

556 that isn't tested here. And while most of this functionality is 

557 implemented in `Registry`, we test it here instead of 

558 ``daf/butler/tests/registry.py`` because it shouldn't depend on the 

559 database backend at all. 

560 """ 

561 butler = self.makeButler(writeable=True) 

562 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml")) 

563 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", self.datasetsImportFile)) 

564 # Need to actually set defaults later, not at construction, because 

565 # we need to import the instrument before we can use it as a default. 

566 # Don't set a default instrument value for data IDs, because 'Cam1' 

567 # should be inferred by virtue of that being the only value in the 

568 # input collections. 

569 butler.registry.defaults = RegistryDefaults(collections=["imported_g"]) 

570 # Use findDataset without collections or instrument. 

571 ref = butler.registry.findDataset("flat", detector=2, physical_filter="Cam1-G") 

572 # Do the same with Butler.get; this should ultimately invoke a lot of 

573 # the same code, so it's a bit circular, but mostly we're checking that 

574 # it works at all. 

575 dataset_id, _ = butler.get("flat", detector=2, physical_filter="Cam1-G") 

576 self.assertEqual(ref.id, dataset_id) 

577 # Query for datasets. Test defaulting the data ID in both kwargs and 

578 # in the WHERE expression. 

579 queried_refs_1 = set(butler.registry.queryDatasets("flat", detector=2, physical_filter="Cam1-G")) 

580 self.assertEqual({ref}, queried_refs_1) 

581 queried_refs_2 = set( 

582 butler.registry.queryDatasets("flat", where="detector=2 AND physical_filter='Cam1-G'") 

583 ) 

584 self.assertEqual({ref}, queried_refs_2) 

585 # Query for data IDs with a dataset constraint. 

586 queried_data_ids = set( 

587 butler.registry.queryDataIds( 

588 {"instrument", "detector", "physical_filter"}, 

589 datasets={"flat"}, 

590 detector=2, 

591 physical_filter="Cam1-G", 

592 ) 

593 ) 

594 self.assertEqual({ref.dataId}, queried_data_ids) 

595 # Add another instrument to the repo, and a dataset that uses it to 

596 # the `imported_g` collection. 

597 butler.registry.insertDimensionData("instrument", {"name": "Cam2"}) 

598 camera = DatasetType( 

599 "camera", 

600 dimensions=butler.registry.dimensions["instrument"].graph, 

601 storageClass="Camera", 

602 ) 

603 butler.registry.registerDatasetType(camera) 

604 butler.registry.insertDatasets(camera, [{"instrument": "Cam2"}], run="imported_g") 

605 # Initialize a new butler with `imported_g` as its default run. 

606 # This should not have a default instrument, because there are two. 

607 # Pass run instead of collections; this should set both. 

608 butler2 = Butler(butler=butler, run="imported_g") 

609 self.assertEqual(list(butler2.registry.defaults.collections), ["imported_g"]) 

610 self.assertEqual(butler2.registry.defaults.run, "imported_g") 

611 self.assertFalse(butler2.registry.defaults.dataId) 

612 # Initialize a new butler with an instrument default explicitly given. 

613 # Set collections instead of run, which should then be None. 

614 butler3 = Butler(butler=butler, collections=["imported_g"], instrument="Cam2") 

615 self.assertEqual(list(butler3.registry.defaults.collections), ["imported_g"]) 

616 self.assertIsNone(butler3.registry.defaults.run, None) 

617 self.assertEqual(butler3.registry.defaults.dataId.byName(), {"instrument": "Cam2"}) 

618 

619 def testJson(self): 

620 """Test JSON serialization mediated by registry.""" 

621 butler = self.makeButler(writeable=True) 

622 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml")) 

623 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", self.datasetsImportFile)) 

624 # Need to actually set defaults later, not at construction, because 

625 # we need to import the instrument before we can use it as a default. 

626 # Don't set a default instrument value for data IDs, because 'Cam1' 

627 # should be inferred by virtue of that being the only value in the 

628 # input collections. 

629 butler.registry.defaults = RegistryDefaults(collections=["imported_g"]) 

630 # Use findDataset without collections or instrument. 

631 ref = butler.registry.findDataset("flat", detector=2, physical_filter="Cam1-G") 

632 

633 # Transform the ref and dataset type to and from JSON 

634 # and check that it can be reconstructed properly 

635 

636 # Do it with the ref and a component ref in minimal and standard form 

637 compRef = ref.makeComponentRef("wcs") 

638 

639 for test_item in (ref, ref.datasetType, compRef, compRef.datasetType): 

640 for minimal in (False, True): 

641 json_str = test_item.to_json(minimal=minimal) 

642 from_json = type(test_item).from_json(json_str, registry=butler.registry) 

643 self.assertEqual(from_json, test_item, msg=f"From JSON '{json_str}' using registry") 

644 

645 # for minimal=False case also do a test without registry 

646 if not minimal: 

647 from_json = type(test_item).from_json(json_str, universe=butler.registry.dimensions) 

648 self.assertEqual(from_json, test_item, msg=f"From JSON '{json_str}' using universe") 

649 

650 def testJsonDimensionRecordsAndHtmlRepresentation(self): 

651 # Dimension Records 

652 butler = self.makeButler(writeable=True) 

653 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "hsc-rc2-subset.yaml")) 

654 

655 for dimension in ("detector", "visit"): 

656 records = butler.registry.queryDimensionRecords(dimension, instrument="HSC") 

657 for r in records: 

658 for minimal in (True, False): 

659 json_str = r.to_json(minimal=minimal) 

660 r_json = type(r).from_json(json_str, registry=butler.registry) 

661 self.assertEqual(r_json, r) 

662 # check with direct method 

663 simple = r.to_simple() 

664 fromDirect = type(simple).direct(**json.loads(json_str)) 

665 self.assertEqual(simple, fromDirect) 

666 # Also check equality of each of the components as dicts 

667 self.assertEqual(r_json.toDict(), r.toDict()) 

668 

669 # check the html representation of records 

670 r_html = r._repr_html_() 

671 self.assertTrue(isinstance(r_html, str)) 

672 self.assertIn(dimension, r_html) 

673 

674 def testWildcardQueries(self): 

675 """Test that different collection type queries work.""" 

676 

677 # Import data to play with. 

678 butler = self.makeButler(writeable=True) 

679 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml")) 

680 

681 # Create some collections 

682 created = {"collection", "u/user/test", "coll3"} 

683 for collection in created: 

684 butler.registry.registerCollection(collection, type=CollectionType.RUN) 

685 

686 collections = butler.registry.queryCollections() 

687 self.assertEqual(set(collections), created) 

688 

689 expressions = ( 

690 ("collection", {"collection"}), 

691 (..., created), 

692 ("*", created), 

693 (("collection", "*"), created), 

694 ("u/*", {"u/user/test"}), 

695 (re.compile("u.*"), {"u/user/test"}), 

696 (re.compile(".*oll.*"), {"collection", "coll3"}), 

697 ("*oll*", {"collection", "coll3"}), 

698 ((re.compile(r".*\d$"), "u/user/test"), {"coll3", "u/user/test"}), 

699 ("*[0-9]", {"coll3"}), 

700 ) 

701 for expression, expected in expressions: 

702 result = butler.registry.queryCollections(expression) 

703 self.assertEqual(set(result), expected) 

704 

705 

706class SimpleButlerUUIDTestCase(SimpleButlerTestCase): 

707 """Same as SimpleButlerTestCase but uses UUID-based datasets manager and 

708 loads datasets from YAML file with UUIDs. 

709 """ 

710 

711 datasetsManager = ( 

712 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID" 

713 ) 

714 datasetsImportFile = "datasets-uuid.yaml" 

715 datasetsIdType = uuid.UUID 

716 

717 

718class SimpleButlerMixedUUIDTestCase(SimpleButlerTestCase): 

719 """Same as SimpleButlerTestCase but uses UUID-based datasets manager and 

720 loads datasets from YAML file with integer IDs. 

721 """ 

722 

723 datasetsManager = ( 

724 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID" 

725 ) 

726 datasetsImportFile = "datasets.yaml" 

727 datasetsIdType = uuid.UUID 

728 

729 

730if __name__ == "__main__": 730 ↛ 731line 730 didn't jump to line 731, because the condition on line 730 was never true

731 unittest.main()