Coverage for tests/test_simpleButler.py: 15%

286 statements  

« prev     ^ index     » next       coverage.py v6.4, created at 2022-05-24 02:27 -0700

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24import json 

25import os 

26import re 

27import tempfile 

28import unittest 

29import uuid 

30from typing import Any 

31 

32try: 

33 import numpy as np 

34except ImportError: 

35 np = None 

36 

37import astropy.time 

38from lsst.daf.butler import Butler, ButlerConfig, CollectionType, DatasetRef, DatasetType, Registry, Timespan 

39from lsst.daf.butler.registry import ConflictingDefinitionError, RegistryConfig, RegistryDefaults 

40from lsst.daf.butler.tests import DatastoreMock 

41from lsst.daf.butler.tests.utils import makeTestTempDir, removeTestTempDir 

42 

43TESTDIR = os.path.abspath(os.path.dirname(__file__)) 

44 

45 

46class SimpleButlerTestCase(unittest.TestCase): 

47 """Tests for butler (including import/export functionality) that should not 

48 depend on the Registry Database backend or Datastore implementation, and 

49 can instead utilize an in-memory SQLite Registry and a mocked Datastore. 

50 """ 

51 

52 datasetsManager = "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManager" 

53 datasetsImportFile = "datasets.yaml" 

54 datasetsIdType = int 

55 

56 def setUp(self): 

57 self.root = makeTestTempDir(TESTDIR) 

58 

59 def tearDown(self): 

60 removeTestTempDir(self.root) 

61 

62 def makeButler(self, **kwargs: Any) -> Butler: 

63 """Return new Butler instance on each call.""" 

64 config = ButlerConfig() 

65 

66 # make separate temporary directory for registry of this instance 

67 tmpdir = tempfile.mkdtemp(dir=self.root) 

68 config["registry", "db"] = f"sqlite:///{tmpdir}/gen3.sqlite3" 

69 config["registry", "managers", "datasets"] = self.datasetsManager 

70 config["root"] = self.root 

71 

72 # have to make a registry first 

73 registryConfig = RegistryConfig(config.get("registry")) 

74 Registry.createFromConfig(registryConfig) 

75 

76 butler = Butler(config, **kwargs) 

77 DatastoreMock.apply(butler) 

78 return butler 

79 

80 def comparableRef(self, ref: DatasetRef) -> DatasetRef: 

81 """Return a DatasetRef that can be compared to a DatasetRef from 

82 other repository. 

83 

84 For repositories that do not support round-trip of ID values this 

85 method returns unresolved DatasetRef, for round-trip-safe repos it 

86 returns unchanged ref. 

87 """ 

88 return ref if self.datasetsIdType is uuid.UUID else ref.unresolved() 

89 

90 def testReadBackwardsCompatibility(self): 

91 """Test that we can read an export file written by a previous version 

92 and commit to the daf_butler git repo. 

93 

94 Notes 

95 ----- 

96 At present this export file includes only dimension data, not datasets, 

97 which greatly limits the usefulness of this test. We should address 

98 this at some point, but I think it's best to wait for the changes to 

99 the export format required for CALIBRATION collections to land. 

100 """ 

101 butler = self.makeButler(writeable=True) 

102 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "hsc-rc2-subset.yaml")) 

103 # Spot-check a few things, but the most important test is just that 

104 # the above does not raise. 

105 self.assertGreaterEqual( 

106 set(record.id for record in butler.registry.queryDimensionRecords("detector", instrument="HSC")), 

107 set(range(104)), # should have all science CCDs; may have some focus ones. 

108 ) 

109 self.assertGreaterEqual( 

110 { 

111 (record.id, record.physical_filter) 

112 for record in butler.registry.queryDimensionRecords("visit", instrument="HSC") 

113 }, 

114 { 

115 (27136, "HSC-Z"), 

116 (11694, "HSC-G"), 

117 (23910, "HSC-R"), 

118 (11720, "HSC-Y"), 

119 (23900, "HSC-R"), 

120 (22646, "HSC-Y"), 

121 (1248, "HSC-I"), 

122 (19680, "HSC-I"), 

123 (1240, "HSC-I"), 

124 (424, "HSC-Y"), 

125 (19658, "HSC-I"), 

126 (344, "HSC-Y"), 

127 (1218, "HSC-R"), 

128 (1190, "HSC-Z"), 

129 (23718, "HSC-R"), 

130 (11700, "HSC-G"), 

131 (26036, "HSC-G"), 

132 (23872, "HSC-R"), 

133 (1170, "HSC-Z"), 

134 (1876, "HSC-Y"), 

135 }, 

136 ) 

137 

138 def testDatasetTransfers(self): 

139 """Test exporting all datasets from a repo and then importing them all 

140 back in again. 

141 """ 

142 # Import data to play with. 

143 butler1 = self.makeButler(writeable=True) 

144 butler1.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml")) 

145 butler1.import_(filename=os.path.join(TESTDIR, "data", "registry", self.datasetsImportFile)) 

146 with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml") as file: 

147 # Export all datasets. 

148 with butler1.export(filename=file.name) as exporter: 

149 exporter.saveDatasets(butler1.registry.queryDatasets(..., collections=...)) 

150 # Import it all again. 

151 butler2 = self.makeButler(writeable=True) 

152 butler2.import_(filename=file.name) 

153 datasets1 = list(butler1.registry.queryDatasets(..., collections=...)) 

154 datasets2 = list(butler2.registry.queryDatasets(..., collections=...)) 

155 self.assertTrue(all(isinstance(ref.id, self.datasetsIdType) for ref in datasets1)) 

156 self.assertTrue(all(isinstance(ref.id, self.datasetsIdType) for ref in datasets2)) 

157 self.assertCountEqual( 

158 [self.comparableRef(ref) for ref in datasets1], 

159 [self.comparableRef(ref) for ref in datasets2], 

160 ) 

161 

162 def testComponentExport(self): 

163 """Test exporting component datasets and then importing them. 

164 

165 This test intentionally does not depend on whether just the component 

166 is exported and then imported vs. the full composite dataset, because 

167 I don't want it to assume more than it needs to about the 

168 implementation. 

169 """ 

170 # Import data to play with. 

171 butler1 = self.makeButler(writeable=True) 

172 butler1.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml")) 

173 butler1.import_(filename=os.path.join(TESTDIR, "data", "registry", self.datasetsImportFile)) 

174 with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml") as file: 

175 # Export all datasets. 

176 with butler1.export(filename=file.name) as exporter: 

177 exporter.saveDatasets(butler1.registry.queryDatasets("flat.psf", collections=...)) 

178 # Import it all again. 

179 butler2 = self.makeButler(writeable=True) 

180 butler2.import_(filename=file.name) 

181 datasets1 = list(butler1.registry.queryDatasets("flat.psf", collections=...)) 

182 datasets2 = list(butler2.registry.queryDatasets("flat.psf", collections=...)) 

183 self.assertTrue(all(isinstance(ref.id, self.datasetsIdType) for ref in datasets1)) 

184 self.assertTrue(all(isinstance(ref.id, self.datasetsIdType) for ref in datasets2)) 

185 self.assertCountEqual( 

186 [self.comparableRef(ref) for ref in datasets1], 

187 [self.comparableRef(ref) for ref in datasets2], 

188 ) 

189 

190 def testImportTwice(self): 

191 """Test exporting dimension records and datasets from a repo and then 

192 importing them all back in again twice. 

193 """ 

194 if self.datasetsIdType is not uuid.UUID: 

195 self.skipTest("This test can only work for UUIDs") 

196 # Import data to play with. 

197 butler1 = self.makeButler(writeable=True) 

198 butler1.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml")) 

199 butler1.import_(filename=os.path.join(TESTDIR, "data", "registry", self.datasetsImportFile)) 

200 with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml", delete=False) as file: 

201 # Export all datasets. 

202 with butler1.export(filename=file.name) as exporter: 

203 exporter.saveDatasets(butler1.registry.queryDatasets(..., collections=...)) 

204 butler2 = self.makeButler(writeable=True) 

205 # Import it once. 

206 butler2.import_(filename=file.name) 

207 # Import it again 

208 butler2.import_(filename=file.name) 

209 datasets1 = list(butler1.registry.queryDatasets(..., collections=...)) 

210 datasets2 = list(butler2.registry.queryDatasets(..., collections=...)) 

211 self.assertTrue(all(isinstance(ref.id, self.datasetsIdType) for ref in datasets1)) 

212 self.assertTrue(all(isinstance(ref.id, self.datasetsIdType) for ref in datasets2)) 

213 self.assertCountEqual( 

214 [self.comparableRef(ref) for ref in datasets1], 

215 [self.comparableRef(ref) for ref in datasets2], 

216 ) 

217 

218 def testDatasetImportReuseIds(self): 

219 """Test for import that should preserve dataset IDs. 

220 

221 This test assumes that dataset IDs in datasets YAML are different from 

222 what auto-incremental insert would produce. 

223 """ 

224 if self.datasetsIdType is not int: 

225 self.skipTest("This test can only work for UUIDs") 

226 # Import data to play with. 

227 butler = self.makeButler(writeable=True) 

228 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml")) 

229 filename = os.path.join(TESTDIR, "data", "registry", self.datasetsImportFile) 

230 butler.import_(filename=filename, reuseIds=True) 

231 datasets = list(butler.registry.queryDatasets(..., collections=...)) 

232 self.assertTrue(all(isinstance(ref.id, self.datasetsIdType) for ref in datasets)) 

233 # IDs are copied from YAML, list needs to be updated if file contents 

234 # is changed. 

235 self.assertCountEqual( 

236 [ref.id for ref in datasets], 

237 [1001, 1002, 1003, 1010, 1020, 1030, 2001, 2002, 2003, 2010, 2020, 2030, 2040], 

238 ) 

239 

240 # Try once again, it will raise 

241 with self.assertRaises(ConflictingDefinitionError): 

242 butler.import_(filename=filename, reuseIds=True) 

243 

244 def testCollectionTransfers(self): 

245 """Test exporting and then importing collections of various types.""" 

246 # Populate a registry with some datasets. 

247 butler1 = self.makeButler(writeable=True) 

248 butler1.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml")) 

249 butler1.import_(filename=os.path.join(TESTDIR, "data", "registry", self.datasetsImportFile)) 

250 registry1 = butler1.registry 

251 # Add some more collections. 

252 registry1.registerRun("run1") 

253 registry1.registerCollection("tag1", CollectionType.TAGGED) 

254 registry1.registerCollection("calibration1", CollectionType.CALIBRATION) 

255 registry1.registerCollection("chain1", CollectionType.CHAINED) 

256 registry1.registerCollection("chain2", CollectionType.CHAINED) 

257 registry1.setCollectionChain("chain1", ["tag1", "run1", "chain2"]) 

258 registry1.setCollectionChain("chain2", ["calibration1", "run1"]) 

259 # Associate some datasets into the TAGGED and CALIBRATION collections. 

260 flats1 = list(registry1.queryDatasets("flat", collections=...)) 

261 registry1.associate("tag1", flats1) 

262 t1 = astropy.time.Time("2020-01-01T01:00:00", format="isot", scale="tai") 

263 t2 = astropy.time.Time("2020-01-01T02:00:00", format="isot", scale="tai") 

264 t3 = astropy.time.Time("2020-01-01T03:00:00", format="isot", scale="tai") 

265 bias1a = registry1.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g") 

266 bias2a = registry1.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g") 

267 bias3a = registry1.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g") 

268 bias2b = registry1.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r") 

269 bias3b = registry1.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r") 

270 registry1.certify("calibration1", [bias2a, bias3a], Timespan(t1, t2)) 

271 registry1.certify("calibration1", [bias2b], Timespan(t2, None)) 

272 registry1.certify("calibration1", [bias3b], Timespan(t2, t3)) 

273 registry1.certify("calibration1", [bias1a], Timespan.makeEmpty()) 

274 

275 with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml") as file: 

276 # Export all collections, and some datasets. 

277 with butler1.export(filename=file.name) as exporter: 

278 # Sort results to put chain1 before chain2, which is 

279 # intentionally not topological order. 

280 for collection in sorted(registry1.queryCollections()): 

281 exporter.saveCollection(collection) 

282 exporter.saveDatasets(flats1) 

283 exporter.saveDatasets([bias1a, bias2a, bias2b, bias3a, bias3b]) 

284 # Import them into a new registry. 

285 butler2 = self.makeButler(writeable=True) 

286 butler2.import_(filename=file.name) 

287 registry2 = butler2.registry 

288 # Check that it all round-tripped, starting with the collections 

289 # themselves. 

290 self.assertIs(registry2.getCollectionType("run1"), CollectionType.RUN) 

291 self.assertIs(registry2.getCollectionType("tag1"), CollectionType.TAGGED) 

292 self.assertIs(registry2.getCollectionType("calibration1"), CollectionType.CALIBRATION) 

293 self.assertIs(registry2.getCollectionType("chain1"), CollectionType.CHAINED) 

294 self.assertIs(registry2.getCollectionType("chain2"), CollectionType.CHAINED) 

295 self.assertEqual( 

296 list(registry2.getCollectionChain("chain1")), 

297 ["tag1", "run1", "chain2"], 

298 ) 

299 self.assertEqual( 

300 list(registry2.getCollectionChain("chain2")), 

301 ["calibration1", "run1"], 

302 ) 

303 # Check that tag collection contents are the same. 

304 self.maxDiff = None 

305 self.assertCountEqual( 

306 [self.comparableRef(ref) for ref in registry1.queryDatasets(..., collections="tag1")], 

307 [self.comparableRef(ref) for ref in registry2.queryDatasets(..., collections="tag1")], 

308 ) 

309 # Check that calibration collection contents are the same. 

310 self.assertCountEqual( 

311 [ 

312 (self.comparableRef(assoc.ref), assoc.timespan) 

313 for assoc in registry1.queryDatasetAssociations("bias", collections="calibration1") 

314 ], 

315 [ 

316 (self.comparableRef(assoc.ref), assoc.timespan) 

317 for assoc in registry2.queryDatasetAssociations("bias", collections="calibration1") 

318 ], 

319 ) 

320 

321 def testButlerGet(self): 

322 """Test that butler.get can work with different variants.""" 

323 

324 # Import data to play with. 

325 butler = self.makeButler(writeable=True) 

326 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml")) 

327 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", self.datasetsImportFile)) 

328 

329 # Find the DatasetRef for a flat 

330 coll = "imported_g" 

331 flat2g = butler.registry.findDataset( 

332 "flat", instrument="Cam1", detector=2, physical_filter="Cam1-G", collections=coll 

333 ) 

334 

335 # Create a numpy integer to check that works fine 

336 detector_np = np.int64(2) if np else 2 

337 print(type(detector_np)) 

338 

339 # Try to get it using different variations of dataId + keyword 

340 # arguments 

341 # Note that instrument.class_name does not work 

342 variants = ( 

343 (None, {"instrument": "Cam1", "detector": 2, "physical_filter": "Cam1-G"}), 

344 (None, {"instrument": "Cam1", "detector": detector_np, "physical_filter": "Cam1-G"}), 

345 ({"instrument": "Cam1", "detector": 2, "physical_filter": "Cam1-G"}, {}), 

346 ({"instrument": "Cam1", "detector": detector_np, "physical_filter": "Cam1-G"}, {}), 

347 ({"instrument": "Cam1", "detector": 2}, {"physical_filter": "Cam1-G"}), 

348 ({"detector.full_name": "Ab"}, {"instrument": "Cam1", "physical_filter": "Cam1-G"}), 

349 ({"full_name": "Ab"}, {"instrument": "Cam1", "physical_filter": "Cam1-G"}), 

350 (None, {"full_name": "Ab", "instrument": "Cam1", "physical_filter": "Cam1-G"}), 

351 (None, {"detector": "Ab", "instrument": "Cam1", "physical_filter": "Cam1-G"}), 

352 ({"name_in_raft": "b", "raft": "A"}, {"instrument": "Cam1", "physical_filter": "Cam1-G"}), 

353 ({"name_in_raft": "b"}, {"raft": "A", "instrument": "Cam1", "physical_filter": "Cam1-G"}), 

354 (None, {"name_in_raft": "b", "raft": "A", "instrument": "Cam1", "physical_filter": "Cam1-G"}), 

355 ( 

356 {"detector.name_in_raft": "b", "detector.raft": "A"}, 

357 {"instrument": "Cam1", "physical_filter": "Cam1-G"}, 

358 ), 

359 ( 

360 { 

361 "detector.name_in_raft": "b", 

362 "detector.raft": "A", 

363 "instrument": "Cam1", 

364 "physical_filter": "Cam1-G", 

365 }, 

366 {}, 

367 ), 

368 # Duplicate (but valid) information. 

369 (None, {"instrument": "Cam1", "detector": 2, "raft": "A", "physical_filter": "Cam1-G"}), 

370 ({"detector": 2}, {"instrument": "Cam1", "raft": "A", "physical_filter": "Cam1-G"}), 

371 ({"raft": "A"}, {"instrument": "Cam1", "detector": 2, "physical_filter": "Cam1-G"}), 

372 ({"raft": "A"}, {"instrument": "Cam1", "detector": "Ab", "physical_filter": "Cam1-G"}), 

373 ) 

374 

375 for dataId, kwds in variants: 

376 try: 

377 flat_id, _ = butler.get("flat", dataId=dataId, collections=coll, **kwds) 

378 except Exception as e: 

379 raise type(e)(f"{str(e)}: dataId={dataId}, kwds={kwds}") from e 

380 self.assertEqual(flat_id, flat2g.id, msg=f"DataId: {dataId}, kwds: {kwds}") 

381 

382 # Check that bad combinations raise. 

383 variants = ( 

384 # Inconsistent detector information. 

385 (None, {"instrument": "Cam1", "detector": 2, "raft": "B", "physical_filter": "Cam1-G"}), 

386 ({"detector": 2}, {"instrument": "Cam1", "raft": "B", "physical_filter": "Cam1-G"}), 

387 ({"detector": 12}, {"instrument": "Cam1", "raft": "B", "physical_filter": "Cam1-G"}), 

388 ({"raft": "B"}, {"instrument": "Cam1", "detector": 2, "physical_filter": "Cam1-G"}), 

389 ({"raft": "B"}, {"instrument": "Cam1", "detector": "Ab", "physical_filter": "Cam1-G"}), 

390 # Under-specified. 

391 ({"raft": "B"}, {"instrument": "Cam1", "physical_filter": "Cam1-G"}), 

392 # Spurious kwargs. 

393 (None, {"instrument": "Cam1", "detector": 2, "physical_filter": "Cam1-G", "x": "y"}), 

394 ({"x": "y"}, {"instrument": "Cam1", "detector": 2, "physical_filter": "Cam1-G"}), 

395 ) 

396 for dataId, kwds in variants: 

397 with self.assertRaises(ValueError): 

398 butler.get("flat", dataId=dataId, collections=coll, **kwds) 

399 

400 def testGetCalibration(self): 

401 """Test that `Butler.get` can be used to fetch from 

402 `~CollectionType.CALIBRATION` collections if the data ID includes 

403 extra dimensions with temporal information. 

404 """ 

405 # Import data to play with. 

406 butler = self.makeButler(writeable=True) 

407 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml")) 

408 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", self.datasetsImportFile)) 

409 # Certify some biases into a CALIBRATION collection. 

410 registry = butler.registry 

411 registry.registerCollection("calibs", CollectionType.CALIBRATION) 

412 t1 = astropy.time.Time("2020-01-01T01:00:00", format="isot", scale="tai") 

413 t2 = astropy.time.Time("2020-01-01T02:00:00", format="isot", scale="tai") 

414 t3 = astropy.time.Time("2020-01-01T03:00:00", format="isot", scale="tai") 

415 bias2a = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g") 

416 bias3a = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g") 

417 bias2b = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r") 

418 bias3b = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r") 

419 registry.certify("calibs", [bias2a, bias3a], Timespan(t1, t2)) 

420 registry.certify("calibs", [bias2b], Timespan(t2, None)) 

421 registry.certify("calibs", [bias3b], Timespan(t2, t3)) 

422 # Insert some exposure dimension data. 

423 registry.insertDimensionData( 

424 "exposure", 

425 { 

426 "instrument": "Cam1", 

427 "id": 3, 

428 "obs_id": "three", 

429 "timespan": Timespan(t1, t2), 

430 "physical_filter": "Cam1-G", 

431 "day_obs": 20201114, 

432 "seq_num": 55, 

433 }, 

434 { 

435 "instrument": "Cam1", 

436 "id": 4, 

437 "obs_id": "four", 

438 "timespan": Timespan(t2, t3), 

439 "physical_filter": "Cam1-G", 

440 "day_obs": 20211114, 

441 "seq_num": 42, 

442 }, 

443 ) 

444 # Get some biases from raw-like data IDs. 

445 bias2a_id, _ = butler.get( 

446 "bias", {"instrument": "Cam1", "exposure": 3, "detector": 2}, collections="calibs" 

447 ) 

448 self.assertEqual(bias2a_id, bias2a.id) 

449 bias3b_id, _ = butler.get( 

450 "bias", {"instrument": "Cam1", "exposure": 4, "detector": 3}, collections="calibs" 

451 ) 

452 self.assertEqual(bias3b_id, bias3b.id) 

453 

454 # Get using the kwarg form 

455 bias3b_id, _ = butler.get("bias", instrument="Cam1", exposure=4, detector=3, collections="calibs") 

456 self.assertEqual(bias3b_id, bias3b.id) 

457 

458 # Do it again but using the record information 

459 bias2a_id, _ = butler.get( 

460 "bias", 

461 {"instrument": "Cam1", "exposure.obs_id": "three", "detector.full_name": "Ab"}, 

462 collections="calibs", 

463 ) 

464 self.assertEqual(bias2a_id, bias2a.id) 

465 bias3b_id, _ = butler.get( 

466 "bias", 

467 {"exposure.obs_id": "four", "detector.full_name": "Ba"}, 

468 collections="calibs", 

469 instrument="Cam1", 

470 ) 

471 self.assertEqual(bias3b_id, bias3b.id) 

472 

473 # And again but this time using the alternate value rather than 

474 # the primary. 

475 bias3b_id, _ = butler.get( 

476 "bias", {"exposure": "four", "detector": "Ba"}, collections="calibs", instrument="Cam1" 

477 ) 

478 self.assertEqual(bias3b_id, bias3b.id) 

479 

480 # And again but this time using the alternate value rather than 

481 # the primary and do it in the keyword arguments. 

482 bias3b_id, _ = butler.get( 

483 "bias", exposure="four", detector="Ba", collections="calibs", instrument="Cam1" 

484 ) 

485 self.assertEqual(bias3b_id, bias3b.id) 

486 

487 # Now with implied record columns 

488 bias3b_id, _ = butler.get( 

489 "bias", 

490 day_obs=20211114, 

491 seq_num=42, 

492 raft="B", 

493 name_in_raft="a", 

494 collections="calibs", 

495 instrument="Cam1", 

496 ) 

497 self.assertEqual(bias3b_id, bias3b.id) 

498 

499 # Allow a fully-specified dataId and unnecessary extra information 

500 # that comes from the record. 

501 bias3b_id, _ = butler.get( 

502 "bias", 

503 dataId=dict( 

504 exposure=4, 

505 day_obs=20211114, 

506 seq_num=42, 

507 detector=3, 

508 instrument="Cam1", 

509 ), 

510 collections="calibs", 

511 ) 

512 self.assertEqual(bias3b_id, bias3b.id) 

513 

514 # Extra but inconsistent record values are a problem. 

515 with self.assertRaises(ValueError): 

516 bias3b_id, _ = butler.get( 

517 "bias", 

518 exposure=3, 

519 day_obs=20211114, 

520 seq_num=42, 

521 detector=3, 

522 collections="calibs", 

523 instrument="Cam1", 

524 ) 

525 

526 # Ensure that spurious kwargs cause an exception. 

527 with self.assertRaises(ValueError): 

528 butler.get( 

529 "bias", 

530 {"exposure.obs_id": "four", "immediate": True, "detector.full_name": "Ba"}, 

531 collections="calibs", 

532 instrument="Cam1", 

533 ) 

534 

535 with self.assertRaises(ValueError): 

536 butler.get( 

537 "bias", 

538 day_obs=20211114, 

539 seq_num=42, 

540 raft="B", 

541 name_in_raft="a", 

542 collections="calibs", 

543 instrument="Cam1", 

544 immediate=True, 

545 ) 

546 

547 def testRegistryDefaults(self): 

548 """Test that we can default the collections and some data ID keys when 

549 constructing a butler. 

550 

551 Many tests that use default run already exist in ``test_butler.py``, so 

552 that isn't tested here. And while most of this functionality is 

553 implemented in `Registry`, we test it here instead of 

554 ``daf/butler/tests/registry.py`` because it shouldn't depend on the 

555 database backend at all. 

556 """ 

557 butler = self.makeButler(writeable=True) 

558 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml")) 

559 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", self.datasetsImportFile)) 

560 # Need to actually set defaults later, not at construction, because 

561 # we need to import the instrument before we can use it as a default. 

562 # Don't set a default instrument value for data IDs, because 'Cam1' 

563 # should be inferred by virtue of that being the only value in the 

564 # input collections. 

565 butler.registry.defaults = RegistryDefaults(collections=["imported_g"]) 

566 # Use findDataset without collections or instrument. 

567 ref = butler.registry.findDataset("flat", detector=2, physical_filter="Cam1-G") 

568 # Do the same with Butler.get; this should ultimately invoke a lot of 

569 # the same code, so it's a bit circular, but mostly we're checking that 

570 # it works at all. 

571 dataset_id, _ = butler.get("flat", detector=2, physical_filter="Cam1-G") 

572 self.assertEqual(ref.id, dataset_id) 

573 # Query for datasets. Test defaulting the data ID in both kwargs and 

574 # in the WHERE expression. 

575 queried_refs_1 = set(butler.registry.queryDatasets("flat", detector=2, physical_filter="Cam1-G")) 

576 self.assertEqual({ref}, queried_refs_1) 

577 queried_refs_2 = set( 

578 butler.registry.queryDatasets("flat", where="detector=2 AND physical_filter='Cam1-G'") 

579 ) 

580 self.assertEqual({ref}, queried_refs_2) 

581 # Query for data IDs with a dataset constraint. 

582 queried_data_ids = set( 

583 butler.registry.queryDataIds( 

584 {"instrument", "detector", "physical_filter"}, 

585 datasets={"flat"}, 

586 detector=2, 

587 physical_filter="Cam1-G", 

588 ) 

589 ) 

590 self.assertEqual({ref.dataId}, queried_data_ids) 

591 # Add another instrument to the repo, and a dataset that uses it to 

592 # the `imported_g` collection. 

593 butler.registry.insertDimensionData("instrument", {"name": "Cam2"}) 

594 camera = DatasetType( 

595 "camera", 

596 dimensions=butler.registry.dimensions["instrument"].graph, 

597 storageClass="Camera", 

598 ) 

599 butler.registry.registerDatasetType(camera) 

600 butler.registry.insertDatasets(camera, [{"instrument": "Cam2"}], run="imported_g") 

601 # Initialize a new butler with `imported_g` as its default run. 

602 # This should not have a default instrument, because there are two. 

603 # Pass run instead of collections; this should set both. 

604 butler2 = Butler(butler=butler, run="imported_g") 

605 self.assertEqual(list(butler2.registry.defaults.collections), ["imported_g"]) 

606 self.assertEqual(butler2.registry.defaults.run, "imported_g") 

607 self.assertFalse(butler2.registry.defaults.dataId) 

608 # Initialize a new butler with an instrument default explicitly given. 

609 # Set collections instead of run, which should then be None. 

610 butler3 = Butler(butler=butler, collections=["imported_g"], instrument="Cam2") 

611 self.assertEqual(list(butler3.registry.defaults.collections), ["imported_g"]) 

612 self.assertIsNone(butler3.registry.defaults.run, None) 

613 self.assertEqual(butler3.registry.defaults.dataId.byName(), {"instrument": "Cam2"}) 

614 

615 def testJson(self): 

616 """Test JSON serialization mediated by registry.""" 

617 butler = self.makeButler(writeable=True) 

618 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml")) 

619 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", self.datasetsImportFile)) 

620 # Need to actually set defaults later, not at construction, because 

621 # we need to import the instrument before we can use it as a default. 

622 # Don't set a default instrument value for data IDs, because 'Cam1' 

623 # should be inferred by virtue of that being the only value in the 

624 # input collections. 

625 butler.registry.defaults = RegistryDefaults(collections=["imported_g"]) 

626 # Use findDataset without collections or instrument. 

627 ref = butler.registry.findDataset("flat", detector=2, physical_filter="Cam1-G") 

628 

629 # Transform the ref and dataset type to and from JSON 

630 # and check that it can be reconstructed properly 

631 

632 # Do it with the ref and a component ref in minimal and standard form 

633 compRef = ref.makeComponentRef("wcs") 

634 

635 for test_item in (ref, ref.datasetType, compRef, compRef.datasetType): 

636 for minimal in (False, True): 

637 json_str = test_item.to_json(minimal=minimal) 

638 from_json = type(test_item).from_json(json_str, registry=butler.registry) 

639 self.assertEqual(from_json, test_item, msg=f"From JSON '{json_str}' using registry") 

640 

641 # for minimal=False case also do a test without registry 

642 if not minimal: 

643 from_json = type(test_item).from_json(json_str, universe=butler.registry.dimensions) 

644 self.assertEqual(from_json, test_item, msg=f"From JSON '{json_str}' using universe") 

645 

646 def testJsonDimensionRecordsAndHtmlRepresentation(self): 

647 # Dimension Records 

648 butler = self.makeButler(writeable=True) 

649 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "hsc-rc2-subset.yaml")) 

650 

651 for dimension in ("detector", "visit"): 

652 records = butler.registry.queryDimensionRecords(dimension, instrument="HSC") 

653 for r in records: 

654 for minimal in (True, False): 

655 json_str = r.to_json(minimal=minimal) 

656 r_json = type(r).from_json(json_str, registry=butler.registry) 

657 self.assertEqual(r_json, r) 

658 # check with direct method 

659 simple = r.to_simple() 

660 fromDirect = type(simple).direct(**json.loads(json_str)) 

661 self.assertEqual(simple, fromDirect) 

662 # Also check equality of each of the components as dicts 

663 self.assertEqual(r_json.toDict(), r.toDict()) 

664 

665 # check the html representation of records 

666 r_html = r._repr_html_() 

667 self.assertTrue(isinstance(r_html, str)) 

668 self.assertIn(dimension, r_html) 

669 

670 def testWildcardQueries(self): 

671 """Test that different collection type queries work.""" 

672 

673 # Import data to play with. 

674 butler = self.makeButler(writeable=True) 

675 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml")) 

676 

677 # Create some collections 

678 created = {"collection", "u/user/test", "coll3"} 

679 for collection in created: 

680 butler.registry.registerCollection(collection, type=CollectionType.RUN) 

681 

682 collections = butler.registry.queryCollections() 

683 self.assertEqual(set(collections), created) 

684 

685 expressions = ( 

686 ("collection", {"collection"}), 

687 (..., created), 

688 ("*", created), 

689 (("collection", "*"), created), 

690 ("u/*", {"u/user/test"}), 

691 (re.compile("u.*"), {"u/user/test"}), 

692 (re.compile(".*oll.*"), {"collection", "coll3"}), 

693 ("*oll*", {"collection", "coll3"}), 

694 ((re.compile(r".*\d$"), "u/user/test"), {"coll3", "u/user/test"}), 

695 ("*[0-9]", {"coll3"}), 

696 ) 

697 for expression, expected in expressions: 

698 result = butler.registry.queryCollections(expression) 

699 self.assertEqual(set(result), expected) 

700 

701 

702class SimpleButlerUUIDTestCase(SimpleButlerTestCase): 

703 """Same as SimpleButlerTestCase but uses UUID-based datasets manager and 

704 loads datasets from YAML file with UUIDs. 

705 """ 

706 

707 datasetsManager = ( 

708 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID" 

709 ) 

710 datasetsImportFile = "datasets-uuid.yaml" 

711 datasetsIdType = uuid.UUID 

712 

713 

714class SimpleButlerMixedUUIDTestCase(SimpleButlerTestCase): 

715 """Same as SimpleButlerTestCase but uses UUID-based datasets manager and 

716 loads datasets from YAML file with integer IDs. 

717 """ 

718 

719 datasetsManager = ( 

720 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID" 

721 ) 

722 datasetsImportFile = "datasets.yaml" 

723 datasetsIdType = uuid.UUID 

724 

725 

726if __name__ == "__main__": 726 ↛ 727line 726 didn't jump to line 727, because the condition on line 726 was never true

727 unittest.main()