Coverage for tests/test_simpleButler.py: 15%

Shortcuts on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

287 statements  

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24import json 

25import os 

26import re 

27import tempfile 

28import unittest 

29import uuid 

30from typing import Any 

31 

32try: 

33 import numpy as np 

34except ImportError: 

35 np = None 

36 

37import astropy.time 

38from lsst.daf.butler import Butler, ButlerConfig, CollectionType, DatasetRef, DatasetType, Registry, Timespan 

39from lsst.daf.butler.registry import ConflictingDefinitionError, DataIdError, RegistryConfig, RegistryDefaults 

40from lsst.daf.butler.tests import DatastoreMock 

41from lsst.daf.butler.tests.utils import makeTestTempDir, removeTestTempDir 

42 

43TESTDIR = os.path.abspath(os.path.dirname(__file__)) 

44 

45 

46class SimpleButlerTestCase(unittest.TestCase): 

47 """Tests for butler (including import/export functionality) that should not 

48 depend on the Registry Database backend or Datastore implementation, and 

49 can instead utilize an in-memory SQLite Registry and a mocked Datastore. 

50 """ 

51 

52 datasetsManager = "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManager" 

53 datasetsImportFile = "datasets.yaml" 

54 datasetsIdType = int 

55 

56 def setUp(self): 

57 self.root = makeTestTempDir(TESTDIR) 

58 

59 def tearDown(self): 

60 removeTestTempDir(self.root) 

61 

62 def makeButler(self, **kwargs: Any) -> Butler: 

63 """Return new Butler instance on each call.""" 

64 config = ButlerConfig() 

65 

66 # make separate temporary directory for registry of this instance 

67 tmpdir = tempfile.mkdtemp(dir=self.root) 

68 config["registry", "db"] = f"sqlite:///{tmpdir}/gen3.sqlite3" 

69 config["registry", "managers", "datasets"] = self.datasetsManager 

70 config["root"] = self.root 

71 

72 # have to make a registry first 

73 registryConfig = RegistryConfig(config.get("registry")) 

74 Registry.createFromConfig(registryConfig) 

75 

76 butler = Butler(config, **kwargs) 

77 DatastoreMock.apply(butler) 

78 return butler 

79 

80 def comparableRef(self, ref: DatasetRef) -> DatasetRef: 

81 """Return a DatasetRef that can be compared to a DatasetRef from 

82 other repository. 

83 

84 For repositories that do not support round-trip of ID values this 

85 method returns unresolved DatasetRef, for round-trip-safe repos it 

86 returns unchanged ref. 

87 """ 

88 return ref if self.datasetsIdType is uuid.UUID else ref.unresolved() 

89 

90 def testReadBackwardsCompatibility(self): 

91 """Test that we can read an export file written by a previous version 

92 and commit to the daf_butler git repo. 

93 

94 Notes 

95 ----- 

96 At present this export file includes only dimension data, not datasets, 

97 which greatly limits the usefulness of this test. We should address 

98 this at some point, but I think it's best to wait for the changes to 

99 the export format required for CALIBRATION collections to land. 

100 """ 

101 butler = self.makeButler(writeable=True) 

102 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "hsc-rc2-subset.yaml")) 

103 # Spot-check a few things, but the most important test is just that 

104 # the above does not raise. 

105 self.assertGreaterEqual( 

106 set(record.id for record in butler.registry.queryDimensionRecords("detector", instrument="HSC")), 

107 set(range(104)), # should have all science CCDs; may have some focus ones. 

108 ) 

109 self.assertGreaterEqual( 

110 { 

111 (record.id, record.physical_filter) 

112 for record in butler.registry.queryDimensionRecords("visit", instrument="HSC") 

113 }, 

114 { 

115 (27136, "HSC-Z"), 

116 (11694, "HSC-G"), 

117 (23910, "HSC-R"), 

118 (11720, "HSC-Y"), 

119 (23900, "HSC-R"), 

120 (22646, "HSC-Y"), 

121 (1248, "HSC-I"), 

122 (19680, "HSC-I"), 

123 (1240, "HSC-I"), 

124 (424, "HSC-Y"), 

125 (19658, "HSC-I"), 

126 (344, "HSC-Y"), 

127 (1218, "HSC-R"), 

128 (1190, "HSC-Z"), 

129 (23718, "HSC-R"), 

130 (11700, "HSC-G"), 

131 (26036, "HSC-G"), 

132 (23872, "HSC-R"), 

133 (1170, "HSC-Z"), 

134 (1876, "HSC-Y"), 

135 }, 

136 ) 

137 

138 def testDatasetTransfers(self): 

139 """Test exporting all datasets from a repo and then importing them all 

140 back in again. 

141 """ 

142 # Import data to play with. 

143 butler1 = self.makeButler(writeable=True) 

144 butler1.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml")) 

145 butler1.import_(filename=os.path.join(TESTDIR, "data", "registry", self.datasetsImportFile)) 

146 with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml") as file: 

147 # Export all datasets. 

148 with butler1.export(filename=file.name) as exporter: 

149 exporter.saveDatasets(butler1.registry.queryDatasets(..., collections=...)) 

150 # Import it all again. 

151 butler2 = self.makeButler(writeable=True) 

152 butler2.import_(filename=file.name) 

153 datasets1 = list(butler1.registry.queryDatasets(..., collections=...)) 

154 datasets2 = list(butler2.registry.queryDatasets(..., collections=...)) 

155 self.assertTrue(all(isinstance(ref.id, self.datasetsIdType) for ref in datasets1)) 

156 self.assertTrue(all(isinstance(ref.id, self.datasetsIdType) for ref in datasets2)) 

157 self.assertCountEqual( 

158 [self.comparableRef(ref) for ref in datasets1], 

159 [self.comparableRef(ref) for ref in datasets2], 

160 ) 

161 

162 def testComponentExport(self): 

163 """Test exporting component datasets and then importing them. 

164 

165 This test intentionally does not depend on whether just the component 

166 is exported and then imported vs. the full composite dataset, because 

167 I don't want it to assume more than it needs to about the 

168 implementation. 

169 """ 

170 # Import data to play with. 

171 butler1 = self.makeButler(writeable=True) 

172 butler1.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml")) 

173 butler1.import_(filename=os.path.join(TESTDIR, "data", "registry", self.datasetsImportFile)) 

174 with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml") as file: 

175 # Export all datasets. 

176 with butler1.export(filename=file.name) as exporter: 

177 exporter.saveDatasets(butler1.registry.queryDatasets("flat.psf", collections=...)) 

178 # Import it all again. 

179 butler2 = self.makeButler(writeable=True) 

180 butler2.import_(filename=file.name) 

181 datasets1 = list(butler1.registry.queryDatasets("flat.psf", collections=...)) 

182 datasets2 = list(butler2.registry.queryDatasets("flat.psf", collections=...)) 

183 self.assertTrue(all(isinstance(ref.id, self.datasetsIdType) for ref in datasets1)) 

184 self.assertTrue(all(isinstance(ref.id, self.datasetsIdType) for ref in datasets2)) 

185 self.assertCountEqual( 

186 [self.comparableRef(ref) for ref in datasets1], 

187 [self.comparableRef(ref) for ref in datasets2], 

188 ) 

189 

190 def testDatasetImportTwice(self): 

191 """Test exporting all datasets from a repo and then importing them all 

192 back in again twice. 

193 """ 

194 if self.datasetsIdType is not uuid.UUID: 

195 self.skipTest("This test can only work for UUIDs") 

196 # Import data to play with. 

197 butler1 = self.makeButler(writeable=True) 

198 butler1.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml")) 

199 butler1.import_(filename=os.path.join(TESTDIR, "data", "registry", self.datasetsImportFile)) 

200 with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml", delete=False) as file: 

201 # Export all datasets. 

202 with butler1.export(filename=file.name) as exporter: 

203 exporter.saveDatasets(butler1.registry.queryDatasets(..., collections=...)) 

204 butler2 = self.makeButler(writeable=True) 

205 # Import it once. 

206 butler2.import_(filename=file.name) 

207 # Import it again, but ignore all dimensions 

208 dimensions = set( 

209 dimension.name for dimension in butler2.registry.dimensions.getStaticDimensions() 

210 ) 

211 butler2.import_(filename=file.name, skip_dimensions=dimensions) 

212 datasets1 = list(butler1.registry.queryDatasets(..., collections=...)) 

213 datasets2 = list(butler2.registry.queryDatasets(..., collections=...)) 

214 self.assertTrue(all(isinstance(ref.id, self.datasetsIdType) for ref in datasets1)) 

215 self.assertTrue(all(isinstance(ref.id, self.datasetsIdType) for ref in datasets2)) 

216 self.assertCountEqual( 

217 [self.comparableRef(ref) for ref in datasets1], 

218 [self.comparableRef(ref) for ref in datasets2], 

219 ) 

220 

221 def testDatasetImportReuseIds(self): 

222 """Test for import that should preserve dataset IDs. 

223 

224 This test assumes that dataset IDs in datasets YAML are different from 

225 what auto-incremental insert would produce. 

226 """ 

227 if self.datasetsIdType is not int: 

228 self.skipTest("This test can only work for UUIDs") 

229 # Import data to play with. 

230 butler = self.makeButler(writeable=True) 

231 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml")) 

232 filename = os.path.join(TESTDIR, "data", "registry", self.datasetsImportFile) 

233 butler.import_(filename=filename, reuseIds=True) 

234 datasets = list(butler.registry.queryDatasets(..., collections=...)) 

235 self.assertTrue(all(isinstance(ref.id, self.datasetsIdType) for ref in datasets)) 

236 # IDs are copied from YAML, list needs to be updated if file contents 

237 # is changed. 

238 self.assertCountEqual( 

239 [ref.id for ref in datasets], 

240 [1001, 1002, 1003, 1010, 1020, 1030, 2001, 2002, 2003, 2010, 2020, 2030, 2040], 

241 ) 

242 

243 # Try once again, it will raise 

244 with self.assertRaises(ConflictingDefinitionError): 

245 butler.import_(filename=filename, reuseIds=True) 

246 

247 def testCollectionTransfers(self): 

248 """Test exporting and then importing collections of various types.""" 

249 # Populate a registry with some datasets. 

250 butler1 = self.makeButler(writeable=True) 

251 butler1.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml")) 

252 butler1.import_(filename=os.path.join(TESTDIR, "data", "registry", self.datasetsImportFile)) 

253 registry1 = butler1.registry 

254 # Add some more collections. 

255 registry1.registerRun("run1") 

256 registry1.registerCollection("tag1", CollectionType.TAGGED) 

257 registry1.registerCollection("calibration1", CollectionType.CALIBRATION) 

258 registry1.registerCollection("chain1", CollectionType.CHAINED) 

259 registry1.registerCollection("chain2", CollectionType.CHAINED) 

260 registry1.setCollectionChain("chain1", ["tag1", "run1", "chain2"]) 

261 registry1.setCollectionChain("chain2", ["calibration1", "run1"]) 

262 # Associate some datasets into the TAGGED and CALIBRATION collections. 

263 flats1 = list(registry1.queryDatasets("flat", collections=...)) 

264 registry1.associate("tag1", flats1) 

265 t1 = astropy.time.Time("2020-01-01T01:00:00", format="isot", scale="tai") 

266 t2 = astropy.time.Time("2020-01-01T02:00:00", format="isot", scale="tai") 

267 t3 = astropy.time.Time("2020-01-01T03:00:00", format="isot", scale="tai") 

268 bias1a = registry1.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g") 

269 bias2a = registry1.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g") 

270 bias3a = registry1.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g") 

271 bias2b = registry1.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r") 

272 bias3b = registry1.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r") 

273 registry1.certify("calibration1", [bias2a, bias3a], Timespan(t1, t2)) 

274 registry1.certify("calibration1", [bias2b], Timespan(t2, None)) 

275 registry1.certify("calibration1", [bias3b], Timespan(t2, t3)) 

276 registry1.certify("calibration1", [bias1a], Timespan.makeEmpty()) 

277 

278 with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml") as file: 

279 # Export all collections, and some datasets. 

280 with butler1.export(filename=file.name) as exporter: 

281 # Sort results to put chain1 before chain2, which is 

282 # intentionally not topological order. 

283 for collection in sorted(registry1.queryCollections()): 

284 exporter.saveCollection(collection) 

285 exporter.saveDatasets(flats1) 

286 exporter.saveDatasets([bias1a, bias2a, bias2b, bias3a, bias3b]) 

287 # Import them into a new registry. 

288 butler2 = self.makeButler(writeable=True) 

289 butler2.import_(filename=file.name) 

290 registry2 = butler2.registry 

291 # Check that it all round-tripped, starting with the collections 

292 # themselves. 

293 self.assertIs(registry2.getCollectionType("run1"), CollectionType.RUN) 

294 self.assertIs(registry2.getCollectionType("tag1"), CollectionType.TAGGED) 

295 self.assertIs(registry2.getCollectionType("calibration1"), CollectionType.CALIBRATION) 

296 self.assertIs(registry2.getCollectionType("chain1"), CollectionType.CHAINED) 

297 self.assertIs(registry2.getCollectionType("chain2"), CollectionType.CHAINED) 

298 self.assertEqual( 

299 list(registry2.getCollectionChain("chain1")), 

300 ["tag1", "run1", "chain2"], 

301 ) 

302 self.assertEqual( 

303 list(registry2.getCollectionChain("chain2")), 

304 ["calibration1", "run1"], 

305 ) 

306 # Check that tag collection contents are the same. 

307 self.maxDiff = None 

308 self.assertCountEqual( 

309 [self.comparableRef(ref) for ref in registry1.queryDatasets(..., collections="tag1")], 

310 [self.comparableRef(ref) for ref in registry2.queryDatasets(..., collections="tag1")], 

311 ) 

312 # Check that calibration collection contents are the same. 

313 self.assertCountEqual( 

314 [ 

315 (self.comparableRef(assoc.ref), assoc.timespan) 

316 for assoc in registry1.queryDatasetAssociations("bias", collections="calibration1") 

317 ], 

318 [ 

319 (self.comparableRef(assoc.ref), assoc.timespan) 

320 for assoc in registry2.queryDatasetAssociations("bias", collections="calibration1") 

321 ], 

322 ) 

323 

324 def testButlerGet(self): 

325 """Test that butler.get can work with different variants.""" 

326 

327 # Import data to play with. 

328 butler = self.makeButler(writeable=True) 

329 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml")) 

330 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", self.datasetsImportFile)) 

331 

332 # Find the DatasetRef for a flat 

333 coll = "imported_g" 

334 flat2g = butler.registry.findDataset( 

335 "flat", instrument="Cam1", detector=2, physical_filter="Cam1-G", collections=coll 

336 ) 

337 

338 # Create a numpy integer to check that works fine 

339 detector_np = np.int64(2) if np else 2 

340 print(type(detector_np)) 

341 

342 # Try to get it using different variations of dataId + keyword 

343 # arguments 

344 # Note that instrument.class_name does not work 

345 variants = ( 

346 (None, {"instrument": "Cam1", "detector": 2, "physical_filter": "Cam1-G"}), 

347 (None, {"instrument": "Cam1", "detector": detector_np, "physical_filter": "Cam1-G"}), 

348 ({"instrument": "Cam1", "detector": 2, "physical_filter": "Cam1-G"}, {}), 

349 ({"instrument": "Cam1", "detector": detector_np, "physical_filter": "Cam1-G"}, {}), 

350 ({"instrument": "Cam1", "detector": 2}, {"physical_filter": "Cam1-G"}), 

351 ({"detector.full_name": "Ab"}, {"instrument": "Cam1", "physical_filter": "Cam1-G"}), 

352 ({"full_name": "Ab"}, {"instrument": "Cam1", "physical_filter": "Cam1-G"}), 

353 (None, {"full_name": "Ab", "instrument": "Cam1", "physical_filter": "Cam1-G"}), 

354 (None, {"detector": "Ab", "instrument": "Cam1", "physical_filter": "Cam1-G"}), 

355 ({"name_in_raft": "b", "raft": "A"}, {"instrument": "Cam1", "physical_filter": "Cam1-G"}), 

356 ({"name_in_raft": "b"}, {"raft": "A", "instrument": "Cam1", "physical_filter": "Cam1-G"}), 

357 (None, {"name_in_raft": "b", "raft": "A", "instrument": "Cam1", "physical_filter": "Cam1-G"}), 

358 ( 

359 {"detector.name_in_raft": "b", "detector.raft": "A"}, 

360 {"instrument": "Cam1", "physical_filter": "Cam1-G"}, 

361 ), 

362 ( 

363 { 

364 "detector.name_in_raft": "b", 

365 "detector.raft": "A", 

366 "instrument": "Cam1", 

367 "physical_filter": "Cam1-G", 

368 }, 

369 {}, 

370 ), 

371 # Duplicate (but valid) information. 

372 (None, {"instrument": "Cam1", "detector": 2, "raft": "A", "physical_filter": "Cam1-G"}), 

373 ({"detector": 2}, {"instrument": "Cam1", "raft": "A", "physical_filter": "Cam1-G"}), 

374 ({"raft": "A"}, {"instrument": "Cam1", "detector": 2, "physical_filter": "Cam1-G"}), 

375 ({"raft": "A"}, {"instrument": "Cam1", "detector": "Ab", "physical_filter": "Cam1-G"}), 

376 ) 

377 

378 for dataId, kwds in variants: 

379 try: 

380 flat_id, _ = butler.get("flat", dataId=dataId, collections=coll, **kwds) 

381 except Exception as e: 

382 raise type(e)(f"{str(e)}: dataId={dataId}, kwds={kwds}") from e 

383 self.assertEqual(flat_id, flat2g.id, msg=f"DataId: {dataId}, kwds: {kwds}") 

384 

385 # Check that bad combinations raise. 

386 variants = ( 

387 # Inconsistent detector information. 

388 (None, {"instrument": "Cam1", "detector": 2, "raft": "B", "physical_filter": "Cam1-G"}), 

389 ({"detector": 2}, {"instrument": "Cam1", "raft": "B", "physical_filter": "Cam1-G"}), 

390 ({"detector": 12}, {"instrument": "Cam1", "raft": "B", "physical_filter": "Cam1-G"}), 

391 ({"raft": "B"}, {"instrument": "Cam1", "detector": 2, "physical_filter": "Cam1-G"}), 

392 ({"raft": "B"}, {"instrument": "Cam1", "detector": "Ab", "physical_filter": "Cam1-G"}), 

393 # Under-specified. 

394 ({"raft": "B"}, {"instrument": "Cam1", "physical_filter": "Cam1-G"}), 

395 # Spurious kwargs. 

396 (None, {"instrument": "Cam1", "detector": 2, "physical_filter": "Cam1-G", "x": "y"}), 

397 ({"x": "y"}, {"instrument": "Cam1", "detector": 2, "physical_filter": "Cam1-G"}), 

398 ) 

399 for dataId, kwds in variants: 

400 with self.assertRaises(DataIdError): 

401 butler.get("flat", dataId=dataId, collections=coll, **kwds) 

402 

403 def testGetCalibration(self): 

404 """Test that `Butler.get` can be used to fetch from 

405 `~CollectionType.CALIBRATION` collections if the data ID includes 

406 extra dimensions with temporal information. 

407 """ 

408 # Import data to play with. 

409 butler = self.makeButler(writeable=True) 

410 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml")) 

411 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", self.datasetsImportFile)) 

412 # Certify some biases into a CALIBRATION collection. 

413 registry = butler.registry 

414 registry.registerCollection("calibs", CollectionType.CALIBRATION) 

415 t1 = astropy.time.Time("2020-01-01T01:00:00", format="isot", scale="tai") 

416 t2 = astropy.time.Time("2020-01-01T02:00:00", format="isot", scale="tai") 

417 t3 = astropy.time.Time("2020-01-01T03:00:00", format="isot", scale="tai") 

418 bias2a = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g") 

419 bias3a = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g") 

420 bias2b = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r") 

421 bias3b = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r") 

422 registry.certify("calibs", [bias2a, bias3a], Timespan(t1, t2)) 

423 registry.certify("calibs", [bias2b], Timespan(t2, None)) 

424 registry.certify("calibs", [bias3b], Timespan(t2, t3)) 

425 # Insert some exposure dimension data. 

426 registry.insertDimensionData( 

427 "exposure", 

428 { 

429 "instrument": "Cam1", 

430 "id": 3, 

431 "obs_id": "three", 

432 "timespan": Timespan(t1, t2), 

433 "physical_filter": "Cam1-G", 

434 "day_obs": 20201114, 

435 "seq_num": 55, 

436 }, 

437 { 

438 "instrument": "Cam1", 

439 "id": 4, 

440 "obs_id": "four", 

441 "timespan": Timespan(t2, t3), 

442 "physical_filter": "Cam1-G", 

443 "day_obs": 20211114, 

444 "seq_num": 42, 

445 }, 

446 ) 

447 # Get some biases from raw-like data IDs. 

448 bias2a_id, _ = butler.get( 

449 "bias", {"instrument": "Cam1", "exposure": 3, "detector": 2}, collections="calibs" 

450 ) 

451 self.assertEqual(bias2a_id, bias2a.id) 

452 bias3b_id, _ = butler.get( 

453 "bias", {"instrument": "Cam1", "exposure": 4, "detector": 3}, collections="calibs" 

454 ) 

455 self.assertEqual(bias3b_id, bias3b.id) 

456 

457 # Get using the kwarg form 

458 bias3b_id, _ = butler.get("bias", instrument="Cam1", exposure=4, detector=3, collections="calibs") 

459 self.assertEqual(bias3b_id, bias3b.id) 

460 

461 # Do it again but using the record information 

462 bias2a_id, _ = butler.get( 

463 "bias", 

464 {"instrument": "Cam1", "exposure.obs_id": "three", "detector.full_name": "Ab"}, 

465 collections="calibs", 

466 ) 

467 self.assertEqual(bias2a_id, bias2a.id) 

468 bias3b_id, _ = butler.get( 

469 "bias", 

470 {"exposure.obs_id": "four", "detector.full_name": "Ba"}, 

471 collections="calibs", 

472 instrument="Cam1", 

473 ) 

474 self.assertEqual(bias3b_id, bias3b.id) 

475 

476 # And again but this time using the alternate value rather than 

477 # the primary. 

478 bias3b_id, _ = butler.get( 

479 "bias", {"exposure": "four", "detector": "Ba"}, collections="calibs", instrument="Cam1" 

480 ) 

481 self.assertEqual(bias3b_id, bias3b.id) 

482 

483 # And again but this time using the alternate value rather than 

484 # the primary and do it in the keyword arguments. 

485 bias3b_id, _ = butler.get( 

486 "bias", exposure="four", detector="Ba", collections="calibs", instrument="Cam1" 

487 ) 

488 self.assertEqual(bias3b_id, bias3b.id) 

489 

490 # Now with implied record columns 

491 bias3b_id, _ = butler.get( 

492 "bias", 

493 day_obs=20211114, 

494 seq_num=42, 

495 raft="B", 

496 name_in_raft="a", 

497 collections="calibs", 

498 instrument="Cam1", 

499 ) 

500 self.assertEqual(bias3b_id, bias3b.id) 

501 

502 # Allow a fully-specified dataId and unnecessary extra information 

503 # that comes from the record. 

504 bias3b_id, _ = butler.get( 

505 "bias", 

506 dataId=dict( 

507 exposure=4, 

508 day_obs=20211114, 

509 seq_num=42, 

510 detector=3, 

511 instrument="Cam1", 

512 ), 

513 collections="calibs", 

514 ) 

515 self.assertEqual(bias3b_id, bias3b.id) 

516 

517 # Extra but inconsistent record values are a problem. 

518 with self.assertRaises(DataIdError): 

519 bias3b_id, _ = butler.get( 

520 "bias", 

521 exposure=3, 

522 day_obs=20211114, 

523 seq_num=42, 

524 detector=3, 

525 collections="calibs", 

526 instrument="Cam1", 

527 ) 

528 

529 # Ensure that spurious kwargs cause an exception. 

530 with self.assertRaises(DataIdError): 

531 butler.get( 

532 "bias", 

533 {"exposure.obs_id": "four", "immediate": True, "detector.full_name": "Ba"}, 

534 collections="calibs", 

535 instrument="Cam1", 

536 ) 

537 

538 with self.assertRaises(DataIdError): 

539 butler.get( 

540 "bias", 

541 day_obs=20211114, 

542 seq_num=42, 

543 raft="B", 

544 name_in_raft="a", 

545 collections="calibs", 

546 instrument="Cam1", 

547 immediate=True, 

548 ) 

549 

550 def testRegistryDefaults(self): 

551 """Test that we can default the collections and some data ID keys when 

552 constructing a butler. 

553 

554 Many tests that use default run already exist in ``test_butler.py``, so 

555 that isn't tested here. And while most of this functionality is 

556 implemented in `Registry`, we test it here instead of 

557 ``daf/butler/tests/registry.py`` because it shouldn't depend on the 

558 database backend at all. 

559 """ 

560 butler = self.makeButler(writeable=True) 

561 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml")) 

562 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", self.datasetsImportFile)) 

563 # Need to actually set defaults later, not at construction, because 

564 # we need to import the instrument before we can use it as a default. 

565 # Don't set a default instrument value for data IDs, because 'Cam1' 

566 # should be inferred by virtue of that being the only value in the 

567 # input collections. 

568 butler.registry.defaults = RegistryDefaults(collections=["imported_g"]) 

569 # Use findDataset without collections or instrument. 

570 ref = butler.registry.findDataset("flat", detector=2, physical_filter="Cam1-G") 

571 # Do the same with Butler.get; this should ultimately invoke a lot of 

572 # the same code, so it's a bit circular, but mostly we're checking that 

573 # it works at all. 

574 dataset_id, _ = butler.get("flat", detector=2, physical_filter="Cam1-G") 

575 self.assertEqual(ref.id, dataset_id) 

576 # Query for datasets. Test defaulting the data ID in both kwargs and 

577 # in the WHERE expression. 

578 queried_refs_1 = set(butler.registry.queryDatasets("flat", detector=2, physical_filter="Cam1-G")) 

579 self.assertEqual({ref}, queried_refs_1) 

580 queried_refs_2 = set( 

581 butler.registry.queryDatasets("flat", where="detector=2 AND physical_filter='Cam1-G'") 

582 ) 

583 self.assertEqual({ref}, queried_refs_2) 

584 # Query for data IDs with a dataset constraint. 

585 queried_data_ids = set( 

586 butler.registry.queryDataIds( 

587 {"instrument", "detector", "physical_filter"}, 

588 datasets={"flat"}, 

589 detector=2, 

590 physical_filter="Cam1-G", 

591 ) 

592 ) 

593 self.assertEqual({ref.dataId}, queried_data_ids) 

594 # Add another instrument to the repo, and a dataset that uses it to 

595 # the `imported_g` collection. 

596 butler.registry.insertDimensionData("instrument", {"name": "Cam2"}) 

597 camera = DatasetType( 

598 "camera", 

599 dimensions=butler.registry.dimensions["instrument"].graph, 

600 storageClass="Camera", 

601 ) 

602 butler.registry.registerDatasetType(camera) 

603 butler.registry.insertDatasets(camera, [{"instrument": "Cam2"}], run="imported_g") 

604 # Initialize a new butler with `imported_g` as its default run. 

605 # This should not have a default instrument, because there are two. 

606 # Pass run instead of collections; this should set both. 

607 butler2 = Butler(butler=butler, run="imported_g") 

608 self.assertEqual(list(butler2.registry.defaults.collections), ["imported_g"]) 

609 self.assertEqual(butler2.registry.defaults.run, "imported_g") 

610 self.assertFalse(butler2.registry.defaults.dataId) 

611 # Initialize a new butler with an instrument default explicitly given. 

612 # Set collections instead of run, which should then be None. 

613 butler3 = Butler(butler=butler, collections=["imported_g"], instrument="Cam2") 

614 self.assertEqual(list(butler3.registry.defaults.collections), ["imported_g"]) 

615 self.assertIsNone(butler3.registry.defaults.run, None) 

616 self.assertEqual(butler3.registry.defaults.dataId.byName(), {"instrument": "Cam2"}) 

617 

618 def testJson(self): 

619 """Test JSON serialization mediated by registry.""" 

620 butler = self.makeButler(writeable=True) 

621 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml")) 

622 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", self.datasetsImportFile)) 

623 # Need to actually set defaults later, not at construction, because 

624 # we need to import the instrument before we can use it as a default. 

625 # Don't set a default instrument value for data IDs, because 'Cam1' 

626 # should be inferred by virtue of that being the only value in the 

627 # input collections. 

628 butler.registry.defaults = RegistryDefaults(collections=["imported_g"]) 

629 # Use findDataset without collections or instrument. 

630 ref = butler.registry.findDataset("flat", detector=2, physical_filter="Cam1-G") 

631 

632 # Transform the ref and dataset type to and from JSON 

633 # and check that it can be reconstructed properly 

634 

635 # Do it with the ref and a component ref in minimal and standard form 

636 compRef = ref.makeComponentRef("wcs") 

637 

638 for test_item in (ref, ref.datasetType, compRef, compRef.datasetType): 

639 for minimal in (False, True): 

640 json_str = test_item.to_json(minimal=minimal) 

641 from_json = type(test_item).from_json(json_str, registry=butler.registry) 

642 self.assertEqual(from_json, test_item, msg=f"From JSON '{json_str}' using registry") 

643 

644 # for minimal=False case also do a test without registry 

645 if not minimal: 

646 from_json = type(test_item).from_json(json_str, universe=butler.registry.dimensions) 

647 self.assertEqual(from_json, test_item, msg=f"From JSON '{json_str}' using universe") 

648 

649 def testJsonDimensionRecordsAndHtmlRepresentation(self): 

650 # Dimension Records 

651 butler = self.makeButler(writeable=True) 

652 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "hsc-rc2-subset.yaml")) 

653 

654 for dimension in ("detector", "visit"): 

655 records = butler.registry.queryDimensionRecords(dimension, instrument="HSC") 

656 for r in records: 

657 for minimal in (True, False): 

658 json_str = r.to_json(minimal=minimal) 

659 r_json = type(r).from_json(json_str, registry=butler.registry) 

660 self.assertEqual(r_json, r) 

661 # check with direct method 

662 simple = r.to_simple() 

663 fromDirect = type(simple).direct(**json.loads(json_str)) 

664 self.assertEqual(simple, fromDirect) 

665 # Also check equality of each of the components as dicts 

666 self.assertEqual(r_json.toDict(), r.toDict()) 

667 

668 # check the html representation of records 

669 r_html = r._repr_html_() 

670 self.assertTrue(isinstance(r_html, str)) 

671 self.assertIn(dimension, r_html) 

672 

673 def testWildcardQueries(self): 

674 """Test that different collection type queries work.""" 

675 

676 # Import data to play with. 

677 butler = self.makeButler(writeable=True) 

678 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml")) 

679 

680 # Create some collections 

681 created = {"collection", "u/user/test", "coll3"} 

682 for collection in created: 

683 butler.registry.registerCollection(collection, type=CollectionType.RUN) 

684 

685 collections = butler.registry.queryCollections() 

686 self.assertEqual(set(collections), created) 

687 

688 expressions = ( 

689 ("collection", {"collection"}), 

690 (..., created), 

691 ("*", created), 

692 (("collection", "*"), created), 

693 ("u/*", {"u/user/test"}), 

694 (re.compile("u.*"), {"u/user/test"}), 

695 (re.compile(".*oll.*"), {"collection", "coll3"}), 

696 ("*oll*", {"collection", "coll3"}), 

697 ((re.compile(r".*\d$"), "u/user/test"), {"coll3", "u/user/test"}), 

698 ("*[0-9]", {"coll3"}), 

699 ) 

700 for expression, expected in expressions: 

701 result = butler.registry.queryCollections(expression) 

702 self.assertEqual(set(result), expected) 

703 

704 

705class SimpleButlerUUIDTestCase(SimpleButlerTestCase): 

706 """Same as SimpleButlerTestCase but uses UUID-based datasets manager and 

707 loads datasets from YAML file with UUIDs. 

708 """ 

709 

710 datasetsManager = ( 

711 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID" 

712 ) 

713 datasetsImportFile = "datasets-uuid.yaml" 

714 datasetsIdType = uuid.UUID 

715 

716 

717class SimpleButlerMixedUUIDTestCase(SimpleButlerTestCase): 

718 """Same as SimpleButlerTestCase but uses UUID-based datasets manager and 

719 loads datasets from YAML file with integer IDs. 

720 """ 

721 

722 datasetsManager = ( 

723 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID" 

724 ) 

725 datasetsImportFile = "datasets.yaml" 

726 datasetsIdType = uuid.UUID 

727 

728 

729if __name__ == "__main__": 729 ↛ 730line 729 didn't jump to line 730, because the condition on line 729 was never true

730 unittest.main()