Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = ["RegistryTests"] 

24 

25from abc import ABC, abstractmethod 

26from collections import defaultdict 

27import itertools 

28import os 

29import re 

30import unittest 

31 

32import astropy.time 

33import sqlalchemy 

34from typing import Optional, Type, Union 

35 

36try: 

37 import numpy as np 

38except ImportError: 

39 np = None 

40 

41from ...core import ( 

42 DataCoordinate, 

43 DataCoordinateSequence, 

44 DataCoordinateSet, 

45 DatasetAssociation, 

46 DatasetRef, 

47 DatasetType, 

48 DimensionGraph, 

49 NamedValueSet, 

50 StorageClass, 

51 ddl, 

52 Timespan, 

53) 

54from .._registry import ( 

55 CollectionType, 

56 ConflictingDefinitionError, 

57 InconsistentDataIdError, 

58 Registry, 

59 RegistryConfig, 

60) 

61from ..interfaces import MissingCollectionError, ButlerAttributeExistsError 

62 

63 

64class RegistryTests(ABC): 

65 """Generic tests for the `Registry` class that can be subclassed to 

66 generate tests for different configurations. 

67 """ 

68 

69 collectionsManager: Optional[str] = None 

70 """Name of the collections manager class, if subclass provides value for 

71 this member then it overrides name specified in default configuration 

72 (`str`). 

73 """ 

74 

75 @classmethod 

76 @abstractmethod 

77 def getDataDir(cls) -> str: 

78 """Return the root directory containing test data YAML files. 

79 """ 

80 raise NotImplementedError() 

81 

82 def makeRegistryConfig(self) -> RegistryConfig: 

83 """Create RegistryConfig used to create a registry. 

84 

85 This method should be called by a subclass from `makeRegistry`. 

86 Returned instance will be pre-configured based on the values of class 

87 members, and default-configured for all other parametrs. Subclasses 

88 that need default configuration should just instantiate 

89 `RegistryConfig` directly. 

90 """ 

91 config = RegistryConfig() 

92 if self.collectionsManager: 

93 config["managers"]["collections"] = self.collectionsManager 

94 return config 

95 

96 @abstractmethod 

97 def makeRegistry(self) -> Registry: 

98 """Return the Registry instance to be tested. 

99 """ 

100 raise NotImplementedError() 

101 

102 def loadData(self, registry: Registry, filename: str): 

103 """Load registry test data from ``getDataDir/<filename>``, 

104 which should be a YAML import/export file. 

105 """ 

106 from ...transfers import YamlRepoImportBackend 

107 with open(os.path.join(self.getDataDir(), filename), 'r') as stream: 

108 backend = YamlRepoImportBackend(stream, registry) 

109 backend.register() 

110 backend.load(datastore=None) 

111 

112 def testOpaque(self): 

113 """Tests for `Registry.registerOpaqueTable`, 

114 `Registry.insertOpaqueData`, `Registry.fetchOpaqueData`, and 

115 `Registry.deleteOpaqueData`. 

116 """ 

117 registry = self.makeRegistry() 

118 table = "opaque_table_for_testing" 

119 registry.registerOpaqueTable( 

120 table, 

121 spec=ddl.TableSpec( 

122 fields=[ 

123 ddl.FieldSpec("id", dtype=sqlalchemy.BigInteger, primaryKey=True), 

124 ddl.FieldSpec("name", dtype=sqlalchemy.String, length=16, nullable=False), 

125 ddl.FieldSpec("count", dtype=sqlalchemy.SmallInteger, nullable=True), 

126 ], 

127 ) 

128 ) 

129 rows = [ 

130 {"id": 1, "name": "one", "count": None}, 

131 {"id": 2, "name": "two", "count": 5}, 

132 {"id": 3, "name": "three", "count": 6}, 

133 ] 

134 registry.insertOpaqueData(table, *rows) 

135 self.assertCountEqual(rows, list(registry.fetchOpaqueData(table))) 

136 self.assertEqual(rows[0:1], list(registry.fetchOpaqueData(table, id=1))) 

137 self.assertEqual(rows[1:2], list(registry.fetchOpaqueData(table, name="two"))) 

138 self.assertEqual([], list(registry.fetchOpaqueData(table, id=1, name="two"))) 

139 registry.deleteOpaqueData(table, id=3) 

140 self.assertCountEqual(rows[:2], list(registry.fetchOpaqueData(table))) 

141 registry.deleteOpaqueData(table) 

142 self.assertEqual([], list(registry.fetchOpaqueData(table))) 

143 

144 def testDatasetType(self): 

145 """Tests for `Registry.registerDatasetType` and 

146 `Registry.getDatasetType`. 

147 """ 

148 registry = self.makeRegistry() 

149 # Check valid insert 

150 datasetTypeName = "test" 

151 storageClass = StorageClass("testDatasetType") 

152 registry.storageClasses.registerStorageClass(storageClass) 

153 dimensions = registry.dimensions.extract(("instrument", "visit")) 

154 differentDimensions = registry.dimensions.extract(("instrument", "patch")) 

155 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

156 # Inserting for the first time should return True 

157 self.assertTrue(registry.registerDatasetType(inDatasetType)) 

158 outDatasetType1 = registry.getDatasetType(datasetTypeName) 

159 self.assertEqual(outDatasetType1, inDatasetType) 

160 

161 # Re-inserting should work 

162 self.assertFalse(registry.registerDatasetType(inDatasetType)) 

163 # Except when they are not identical 

164 with self.assertRaises(ConflictingDefinitionError): 

165 nonIdenticalDatasetType = DatasetType(datasetTypeName, differentDimensions, storageClass) 

166 registry.registerDatasetType(nonIdenticalDatasetType) 

167 

168 # Template can be None 

169 datasetTypeName = "testNoneTemplate" 

170 storageClass = StorageClass("testDatasetType2") 

171 registry.storageClasses.registerStorageClass(storageClass) 

172 dimensions = registry.dimensions.extract(("instrument", "visit")) 

173 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

174 registry.registerDatasetType(inDatasetType) 

175 outDatasetType2 = registry.getDatasetType(datasetTypeName) 

176 self.assertEqual(outDatasetType2, inDatasetType) 

177 

178 allTypes = set(registry.queryDatasetTypes()) 

179 self.assertEqual(allTypes, {outDatasetType1, outDatasetType2}) 

180 

181 def testDimensions(self): 

182 """Tests for `Registry.insertDimensionData`, 

183 `Registry.syncDimensionData`, and `Registry.expandDataId`. 

184 """ 

185 registry = self.makeRegistry() 

186 dimensionName = "instrument" 

187 dimension = registry.dimensions[dimensionName] 

188 dimensionValue = {"name": "DummyCam", "visit_max": 10, "exposure_max": 10, "detector_max": 2, 

189 "class_name": "lsst.obs.base.Instrument"} 

190 registry.insertDimensionData(dimensionName, dimensionValue) 

191 # Inserting the same value twice should fail 

192 with self.assertRaises(sqlalchemy.exc.IntegrityError): 

193 registry.insertDimensionData(dimensionName, dimensionValue) 

194 # expandDataId should retrieve the record we just inserted 

195 self.assertEqual( 

196 registry.expandDataId( 

197 instrument="DummyCam", 

198 graph=dimension.graph 

199 ).records[dimensionName].toDict(), 

200 dimensionValue 

201 ) 

202 # expandDataId should raise if there is no record with the given ID. 

203 with self.assertRaises(LookupError): 

204 registry.expandDataId({"instrument": "Unknown"}, graph=dimension.graph) 

205 # band doesn't have a table; insert should fail. 

206 with self.assertRaises(TypeError): 

207 registry.insertDimensionData("band", {"band": "i"}) 

208 dimensionName2 = "physical_filter" 

209 dimension2 = registry.dimensions[dimensionName2] 

210 dimensionValue2 = {"name": "DummyCam_i", "band": "i"} 

211 # Missing required dependency ("instrument") should fail 

212 with self.assertRaises(KeyError): 

213 registry.insertDimensionData(dimensionName2, dimensionValue2) 

214 # Adding required dependency should fix the failure 

215 dimensionValue2["instrument"] = "DummyCam" 

216 registry.insertDimensionData(dimensionName2, dimensionValue2) 

217 # expandDataId should retrieve the record we just inserted. 

218 self.assertEqual( 

219 registry.expandDataId( 

220 instrument="DummyCam", physical_filter="DummyCam_i", 

221 graph=dimension2.graph 

222 ).records[dimensionName2].toDict(), 

223 dimensionValue2 

224 ) 

225 # Use syncDimensionData to insert a new record successfully. 

226 dimensionName3 = "detector" 

227 dimensionValue3 = {"instrument": "DummyCam", "id": 1, "full_name": "one", 

228 "name_in_raft": "zero", "purpose": "SCIENCE"} 

229 self.assertTrue(registry.syncDimensionData(dimensionName3, dimensionValue3)) 

230 # Sync that again. Note that one field ("raft") is NULL, and that 

231 # should be okay. 

232 self.assertFalse(registry.syncDimensionData(dimensionName3, dimensionValue3)) 

233 # Now try that sync with the same primary key but a different value. 

234 # This should fail. 

235 with self.assertRaises(ConflictingDefinitionError): 

236 registry.syncDimensionData( 

237 dimensionName3, 

238 {"instrument": "DummyCam", "id": 1, "full_name": "one", 

239 "name_in_raft": "four", "purpose": "SCIENCE"} 

240 ) 

241 

242 @unittest.skipIf(np is None, "numpy not available.") 

243 def testNumpyDataId(self): 

244 """Test that we can use a numpy int in a dataId.""" 

245 registry = self.makeRegistry() 

246 dimensionEntries = [ 

247 ("instrument", {"instrument": "DummyCam"}), 

248 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}), 

249 # Using an np.int64 here fails unless Records.fromDict is also 

250 # patched to look for numbers.Integral 

251 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}), 

252 ] 

253 for args in dimensionEntries: 

254 registry.insertDimensionData(*args) 

255 

256 # Try a normal integer and something that looks like an int but 

257 # is not. 

258 for visit_id in (42, np.int64(42)): 

259 with self.subTest(visit_id=visit_id, id_type=type(visit_id).__name__): 

260 expanded = registry.expandDataId({"instrument": "DummyCam", "visit": visit_id}) 

261 self.assertEqual(expanded["visit"], int(visit_id)) 

262 self.assertIsInstance(expanded["visit"], int) 

263 

264 def testDataIdRelationships(self): 

265 """Test that `Registry.expandDataId` raises an exception when the given 

266 keys are inconsistent. 

267 """ 

268 registry = self.makeRegistry() 

269 self.loadData(registry, "base.yaml") 

270 # Insert a few more dimension records for the next test. 

271 registry.insertDimensionData( 

272 "exposure", 

273 {"instrument": "Cam1", "id": 1, "obs_id": "one", "physical_filter": "Cam1-G"}, 

274 ) 

275 registry.insertDimensionData( 

276 "exposure", 

277 {"instrument": "Cam1", "id": 2, "obs_id": "two", "physical_filter": "Cam1-G"}, 

278 ) 

279 registry.insertDimensionData( 

280 "visit_system", 

281 {"instrument": "Cam1", "id": 0, "name": "one-to-one"}, 

282 ) 

283 registry.insertDimensionData( 

284 "visit", 

285 {"instrument": "Cam1", "id": 1, "name": "one", "physical_filter": "Cam1-G", "visit_system": 0}, 

286 ) 

287 registry.insertDimensionData( 

288 "visit_definition", 

289 {"instrument": "Cam1", "visit": 1, "exposure": 1, "visit_system": 0}, 

290 ) 

291 with self.assertRaises(InconsistentDataIdError): 

292 registry.expandDataId( 

293 {"instrument": "Cam1", "visit": 1, "exposure": 2}, 

294 ) 

295 

296 def testDataset(self): 

297 """Basic tests for `Registry.insertDatasets`, `Registry.getDataset`, 

298 and `Registry.removeDatasets`. 

299 """ 

300 registry = self.makeRegistry() 

301 self.loadData(registry, "base.yaml") 

302 run = "test" 

303 registry.registerRun(run) 

304 datasetType = registry.getDatasetType("bias") 

305 dataId = {"instrument": "Cam1", "detector": 2} 

306 ref, = registry.insertDatasets(datasetType, dataIds=[dataId], run=run) 

307 outRef = registry.getDataset(ref.id) 

308 self.assertIsNotNone(ref.id) 

309 self.assertEqual(ref, outRef) 

310 with self.assertRaises(ConflictingDefinitionError): 

311 registry.insertDatasets(datasetType, dataIds=[dataId], run=run) 

312 registry.removeDatasets([ref]) 

313 self.assertIsNone(registry.findDataset(datasetType, dataId, collections=[run])) 

314 

315 def testFindDataset(self): 

316 """Tests for `Registry.findDataset`. 

317 """ 

318 registry = self.makeRegistry() 

319 self.loadData(registry, "base.yaml") 

320 run = "test" 

321 datasetType = registry.getDatasetType("bias") 

322 dataId = {"instrument": "Cam1", "detector": 4} 

323 registry.registerRun(run) 

324 inputRef, = registry.insertDatasets(datasetType, dataIds=[dataId], run=run) 

325 outputRef = registry.findDataset(datasetType, dataId, collections=[run]) 

326 self.assertEqual(outputRef, inputRef) 

327 # Check that retrieval with invalid dataId raises 

328 with self.assertRaises(LookupError): 

329 dataId = {"instrument": "Cam1"} # no detector 

330 registry.findDataset(datasetType, dataId, collections=run) 

331 # Check that different dataIds match to different datasets 

332 dataId1 = {"instrument": "Cam1", "detector": 1} 

333 inputRef1, = registry.insertDatasets(datasetType, dataIds=[dataId1], run=run) 

334 dataId2 = {"instrument": "Cam1", "detector": 2} 

335 inputRef2, = registry.insertDatasets(datasetType, dataIds=[dataId2], run=run) 

336 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef1) 

337 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef2) 

338 self.assertNotEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef2) 

339 self.assertNotEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef1) 

340 # Check that requesting a non-existing dataId returns None 

341 nonExistingDataId = {"instrument": "Cam1", "detector": 3} 

342 self.assertIsNone(registry.findDataset(datasetType, nonExistingDataId, collections=run)) 

343 

344 def testDatasetTypeComponentQueries(self): 

345 """Test component options when querying for dataset types. 

346 """ 

347 registry = self.makeRegistry() 

348 self.loadData(registry, "base.yaml") 

349 self.loadData(registry, "datasets.yaml") 

350 # Test querying for dataset types with different inputs. 

351 # First query for all dataset types; components should only be included 

352 # when components=True. 

353 self.assertEqual( 

354 {"bias", "flat"}, 

355 NamedValueSet(registry.queryDatasetTypes()).names 

356 ) 

357 self.assertEqual( 

358 {"bias", "flat"}, 

359 NamedValueSet(registry.queryDatasetTypes(components=False)).names 

360 ) 

361 self.assertLess( 

362 {"bias", "flat", "bias.wcs", "flat.photoCalib"}, 

363 NamedValueSet(registry.queryDatasetTypes(components=True)).names 

364 ) 

365 # Use a pattern that can match either parent or components. Again, 

366 # components are only returned if components=True. 

367 self.assertEqual( 

368 {"bias"}, 

369 NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"))).names 

370 ) 

371 self.assertEqual( 

372 {"bias"}, 

373 NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"), components=False)).names 

374 ) 

375 self.assertLess( 

376 {"bias", "bias.wcs"}, 

377 NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"), components=True)).names 

378 ) 

379 # This pattern matches only a component. In this case we also return 

380 # that component dataset type if components=None. 

381 self.assertEqual( 

382 {"bias.wcs"}, 

383 NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"))).names 

384 ) 

385 self.assertEqual( 

386 set(), 

387 NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"), components=False)).names 

388 ) 

389 self.assertEqual( 

390 {"bias.wcs"}, 

391 NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"), components=True)).names 

392 ) 

393 

394 def testComponentLookups(self): 

395 """Test searching for component datasets via their parents. 

396 """ 

397 registry = self.makeRegistry() 

398 self.loadData(registry, "base.yaml") 

399 self.loadData(registry, "datasets.yaml") 

400 # Test getting the child dataset type (which does still exist in the 

401 # Registry), and check for consistency with 

402 # DatasetRef.makeComponentRef. 

403 collection = "imported_g" 

404 parentType = registry.getDatasetType("bias") 

405 childType = registry.getDatasetType("bias.wcs") 

406 parentRefResolved = registry.findDataset(parentType, collections=collection, 

407 instrument="Cam1", detector=1) 

408 self.assertIsInstance(parentRefResolved, DatasetRef) 

409 self.assertEqual(childType, parentRefResolved.makeComponentRef("wcs").datasetType) 

410 # Search for a single dataset with findDataset. 

411 childRef1 = registry.findDataset("bias.wcs", collections=collection, 

412 dataId=parentRefResolved.dataId) 

413 self.assertEqual(childRef1, parentRefResolved.makeComponentRef("wcs")) 

414 # Search for detector data IDs constrained by component dataset 

415 # existence with queryDataIds. 

416 dataIds = registry.queryDataIds( 

417 ["detector"], 

418 datasets=["bias.wcs"], 

419 collections=collection, 

420 ).toSet() 

421 self.assertEqual( 

422 dataIds, 

423 DataCoordinateSet( 

424 { 

425 DataCoordinate.standardize(instrument="Cam1", detector=d, graph=parentType.dimensions) 

426 for d in (1, 2, 3) 

427 }, 

428 parentType.dimensions, 

429 ) 

430 ) 

431 # Search for multiple datasets of a single type with queryDatasets. 

432 childRefs2 = set(registry.queryDatasets( 

433 "bias.wcs", 

434 collections=collection, 

435 )) 

436 self.assertEqual( 

437 {ref.unresolved() for ref in childRefs2}, 

438 {DatasetRef(childType, dataId) for dataId in dataIds} 

439 ) 

440 

441 def testCollections(self): 

442 """Tests for registry methods that manage collections. 

443 """ 

444 registry = self.makeRegistry() 

445 self.loadData(registry, "base.yaml") 

446 self.loadData(registry, "datasets.yaml") 

447 run1 = "imported_g" 

448 run2 = "imported_r" 

449 datasetType = "bias" 

450 # Find some datasets via their run's collection. 

451 dataId1 = {"instrument": "Cam1", "detector": 1} 

452 ref1 = registry.findDataset(datasetType, dataId1, collections=run1) 

453 self.assertIsNotNone(ref1) 

454 dataId2 = {"instrument": "Cam1", "detector": 2} 

455 ref2 = registry.findDataset(datasetType, dataId2, collections=run1) 

456 self.assertIsNotNone(ref2) 

457 # Associate those into a new collection,then look for them there. 

458 tag1 = "tag1" 

459 registry.registerCollection(tag1, type=CollectionType.TAGGED) 

460 registry.associate(tag1, [ref1, ref2]) 

461 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1) 

462 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

463 # Disassociate one and verify that we can't it there anymore... 

464 registry.disassociate(tag1, [ref1]) 

465 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=tag1)) 

466 # ...but we can still find ref2 in tag1, and ref1 in the run. 

467 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run1), ref1) 

468 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

469 collections = set(registry.queryCollections()) 

470 self.assertEqual(collections, {run1, run2, tag1}) 

471 # Associate both refs into tag1 again; ref2 is already there, but that 

472 # should be a harmless no-op. 

473 registry.associate(tag1, [ref1, ref2]) 

474 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1) 

475 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

476 # Get a different dataset (from a different run) that has the same 

477 # dataset type and data ID as ref2. 

478 ref2b = registry.findDataset(datasetType, dataId2, collections=run2) 

479 self.assertNotEqual(ref2, ref2b) 

480 # Attempting to associate that into tag1 should be an error. 

481 with self.assertRaises(ConflictingDefinitionError): 

482 registry.associate(tag1, [ref2b]) 

483 # That error shouldn't have messed up what we had before. 

484 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1) 

485 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

486 # Attempt to associate the conflicting dataset again, this time with 

487 # a dataset that isn't in the collection and won't cause a conflict. 

488 # Should also fail without modifying anything. 

489 dataId3 = {"instrument": "Cam1", "detector": 3} 

490 ref3 = registry.findDataset(datasetType, dataId3, collections=run1) 

491 with self.assertRaises(ConflictingDefinitionError): 

492 registry.associate(tag1, [ref3, ref2b]) 

493 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1) 

494 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

495 self.assertIsNone(registry.findDataset(datasetType, dataId3, collections=tag1)) 

496 # Register a chained collection that searches [tag1, run2] 

497 chain1 = "chain1" 

498 registry.registerCollection(chain1, type=CollectionType.CHAINED) 

499 self.assertIs(registry.getCollectionType(chain1), CollectionType.CHAINED) 

500 # Chained collection exists, but has no collections in it. 

501 self.assertFalse(registry.getCollectionChain(chain1)) 

502 # If we query for all collections, we should get the chained collection 

503 # only if we don't ask to flatten it (i.e. yield only its children). 

504 self.assertEqual(set(registry.queryCollections(flattenChains=False)), {tag1, run1, run2, chain1}) 

505 self.assertEqual(set(registry.queryCollections(flattenChains=True)), {tag1, run1, run2}) 

506 # Attempt to set its child collections to something circular; that 

507 # should fail. 

508 with self.assertRaises(ValueError): 

509 registry.setCollectionChain(chain1, [tag1, chain1]) 

510 # Add the child collections. 

511 registry.setCollectionChain(chain1, [tag1, run2]) 

512 self.assertEqual( 

513 list(registry.getCollectionChain(chain1)), 

514 [tag1, run2] 

515 ) 

516 # Searching for dataId1 or dataId2 in the chain should return ref1 and 

517 # ref2, because both are in tag1. 

518 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain1), ref1) 

519 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain1), ref2) 

520 # Now disassociate ref2 from tag1. The search (for bias) with 

521 # dataId2 in chain1 should then: 

522 # 1. not find it in tag1 

523 # 2. find a different dataset in run2 

524 registry.disassociate(tag1, [ref2]) 

525 ref2b = registry.findDataset(datasetType, dataId2, collections=chain1) 

526 self.assertNotEqual(ref2b, ref2) 

527 self.assertEqual(ref2b, registry.findDataset(datasetType, dataId2, collections=run2)) 

528 # Define a new chain so we can test recursive chains. 

529 chain2 = "chain2" 

530 registry.registerCollection(chain2, type=CollectionType.CHAINED) 

531 registry.setCollectionChain(chain2, [run2, chain1]) 

532 # Query for collections matching a regex. 

533 self.assertCountEqual( 

534 list(registry.queryCollections(re.compile("imported_."), flattenChains=False)), 

535 ["imported_r", "imported_g"] 

536 ) 

537 # Query for collections matching a regex or an explicit str. 

538 self.assertCountEqual( 

539 list(registry.queryCollections([re.compile("imported_."), "chain1"], flattenChains=False)), 

540 ["imported_r", "imported_g", "chain1"] 

541 ) 

542 # Search for bias with dataId1 should find it via tag1 in chain2, 

543 # recursing, because is not in run1. 

544 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=run2)) 

545 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain2), ref1) 

546 # Search for bias with dataId2 should find it in run2 (ref2b). 

547 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain2), ref2b) 

548 # Search for a flat that is in run2. That should not be found 

549 # at the front of chain2, because of the restriction to bias 

550 # on run2 there, but it should be found in at the end of chain1. 

551 dataId4 = {"instrument": "Cam1", "detector": 3, "physical_filter": "Cam1-R2"} 

552 ref4 = registry.findDataset("flat", dataId4, collections=run2) 

553 self.assertIsNotNone(ref4) 

554 self.assertEqual(ref4, registry.findDataset("flat", dataId4, collections=chain2)) 

555 # Deleting a collection that's part of a CHAINED collection is not 

556 # allowed, and is exception-safe. 

557 with self.assertRaises(Exception): 

558 registry.removeCollection(run2) 

559 self.assertEqual(registry.getCollectionType(run2), CollectionType.RUN) 

560 with self.assertRaises(Exception): 

561 registry.removeCollection(chain1) 

562 self.assertEqual(registry.getCollectionType(chain1), CollectionType.CHAINED) 

563 # Actually remove chain2, test that it's gone by asking for its type. 

564 registry.removeCollection(chain2) 

565 with self.assertRaises(MissingCollectionError): 

566 registry.getCollectionType(chain2) 

567 # Actually remove run2 and chain1, which should work now. 

568 registry.removeCollection(chain1) 

569 registry.removeCollection(run2) 

570 with self.assertRaises(MissingCollectionError): 

571 registry.getCollectionType(run2) 

572 with self.assertRaises(MissingCollectionError): 

573 registry.getCollectionType(chain1) 

574 # Remove tag1 as well, just to test that we can remove TAGGED 

575 # collections. 

576 registry.removeCollection(tag1) 

577 with self.assertRaises(MissingCollectionError): 

578 registry.getCollectionType(tag1) 

579 

580 def testBasicTransaction(self): 

581 """Test that all operations within a single transaction block are 

582 rolled back if an exception propagates out of the block. 

583 """ 

584 registry = self.makeRegistry() 

585 storageClass = StorageClass("testDatasetType") 

586 registry.storageClasses.registerStorageClass(storageClass) 

587 with registry.transaction(): 

588 registry.insertDimensionData("instrument", {"name": "Cam1", "class_name": "A"}) 

589 with self.assertRaises(ValueError): 

590 with registry.transaction(): 

591 registry.insertDimensionData("instrument", {"name": "Cam2"}) 

592 raise ValueError("Oops, something went wrong") 

593 # Cam1 should exist 

594 self.assertEqual(registry.expandDataId(instrument="Cam1").records["instrument"].class_name, "A") 

595 # But Cam2 and Cam3 should both not exist 

596 with self.assertRaises(LookupError): 

597 registry.expandDataId(instrument="Cam2") 

598 with self.assertRaises(LookupError): 

599 registry.expandDataId(instrument="Cam3") 

600 

601 def testNestedTransaction(self): 

602 """Test that operations within a transaction block are not rolled back 

603 if an exception propagates out of an inner transaction block and is 

604 then caught. 

605 """ 

606 registry = self.makeRegistry() 

607 dimension = registry.dimensions["instrument"] 

608 dataId1 = {"instrument": "DummyCam"} 

609 dataId2 = {"instrument": "DummyCam2"} 

610 checkpointReached = False 

611 with registry.transaction(): 

612 # This should be added and (ultimately) committed. 

613 registry.insertDimensionData(dimension, dataId1) 

614 with self.assertRaises(sqlalchemy.exc.IntegrityError): 

615 with registry.transaction(savepoint=True): 

616 # This does not conflict, and should succeed (but not 

617 # be committed). 

618 registry.insertDimensionData(dimension, dataId2) 

619 checkpointReached = True 

620 # This should conflict and raise, triggerring a rollback 

621 # of the previous insertion within the same transaction 

622 # context, but not the original insertion in the outer 

623 # block. 

624 registry.insertDimensionData(dimension, dataId1) 

625 self.assertTrue(checkpointReached) 

626 self.assertIsNotNone(registry.expandDataId(dataId1, graph=dimension.graph)) 

627 with self.assertRaises(LookupError): 

628 registry.expandDataId(dataId2, graph=dimension.graph) 

629 

630 def testInstrumentDimensions(self): 

631 """Test queries involving only instrument dimensions, with no joins to 

632 skymap.""" 

633 registry = self.makeRegistry() 

634 

635 # need a bunch of dimensions and datasets for test 

636 registry.insertDimensionData( 

637 "instrument", 

638 dict(name="DummyCam", visit_max=25, exposure_max=300, detector_max=6) 

639 ) 

640 registry.insertDimensionData( 

641 "physical_filter", 

642 dict(instrument="DummyCam", name="dummy_r", band="r"), 

643 dict(instrument="DummyCam", name="dummy_i", band="i"), 

644 ) 

645 registry.insertDimensionData( 

646 "detector", 

647 *[dict(instrument="DummyCam", id=i, full_name=str(i)) for i in range(1, 6)] 

648 ) 

649 registry.insertDimensionData( 

650 "visit_system", 

651 dict(instrument="DummyCam", id=1, name="default"), 

652 ) 

653 registry.insertDimensionData( 

654 "visit", 

655 dict(instrument="DummyCam", id=10, name="ten", physical_filter="dummy_i", visit_system=1), 

656 dict(instrument="DummyCam", id=11, name="eleven", physical_filter="dummy_r", visit_system=1), 

657 dict(instrument="DummyCam", id=20, name="twelve", physical_filter="dummy_r", visit_system=1), 

658 ) 

659 registry.insertDimensionData( 

660 "exposure", 

661 dict(instrument="DummyCam", id=100, obs_id="100", physical_filter="dummy_i"), 

662 dict(instrument="DummyCam", id=101, obs_id="101", physical_filter="dummy_i"), 

663 dict(instrument="DummyCam", id=110, obs_id="110", physical_filter="dummy_r"), 

664 dict(instrument="DummyCam", id=111, obs_id="111", physical_filter="dummy_r"), 

665 dict(instrument="DummyCam", id=200, obs_id="200", physical_filter="dummy_r"), 

666 dict(instrument="DummyCam", id=201, obs_id="201", physical_filter="dummy_r"), 

667 ) 

668 registry.insertDimensionData( 

669 "visit_definition", 

670 dict(instrument="DummyCam", exposure=100, visit_system=1, visit=10), 

671 dict(instrument="DummyCam", exposure=101, visit_system=1, visit=10), 

672 dict(instrument="DummyCam", exposure=110, visit_system=1, visit=11), 

673 dict(instrument="DummyCam", exposure=111, visit_system=1, visit=11), 

674 dict(instrument="DummyCam", exposure=200, visit_system=1, visit=20), 

675 dict(instrument="DummyCam", exposure=201, visit_system=1, visit=20), 

676 ) 

677 # dataset types 

678 run1 = "test1_r" 

679 run2 = "test2_r" 

680 tagged2 = "test2_t" 

681 registry.registerRun(run1) 

682 registry.registerRun(run2) 

683 registry.registerCollection(tagged2) 

684 storageClass = StorageClass("testDataset") 

685 registry.storageClasses.registerStorageClass(storageClass) 

686 rawType = DatasetType(name="RAW", 

687 dimensions=registry.dimensions.extract(("instrument", "exposure", "detector")), 

688 storageClass=storageClass) 

689 registry.registerDatasetType(rawType) 

690 calexpType = DatasetType(name="CALEXP", 

691 dimensions=registry.dimensions.extract(("instrument", "visit", "detector")), 

692 storageClass=storageClass) 

693 registry.registerDatasetType(calexpType) 

694 

695 # add pre-existing datasets 

696 for exposure in (100, 101, 110, 111): 

697 for detector in (1, 2, 3): 

698 # note that only 3 of 5 detectors have datasets 

699 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector) 

700 ref, = registry.insertDatasets(rawType, dataIds=[dataId], run=run1) 

701 # exposures 100 and 101 appear in both run1 and tagged2. 

702 # 100 has different datasets in the different collections 

703 # 101 has the same dataset in both collections. 

704 if exposure == 100: 

705 ref, = registry.insertDatasets(rawType, dataIds=[dataId], run=run2) 

706 if exposure in (100, 101): 

707 registry.associate(tagged2, [ref]) 

708 # Add pre-existing datasets to tagged2. 

709 for exposure in (200, 201): 

710 for detector in (3, 4, 5): 

711 # note that only 3 of 5 detectors have datasets 

712 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector) 

713 ref, = registry.insertDatasets(rawType, dataIds=[dataId], run=run2) 

714 registry.associate(tagged2, [ref]) 

715 

716 dimensions = DimensionGraph( 

717 registry.dimensions, 

718 dimensions=(rawType.dimensions.required | calexpType.dimensions.required) 

719 ) 

720 # Test that single dim string works as well as list of str 

721 rows = registry.queryDataIds("visit", datasets=rawType, collections=run1).expanded().toSet() 

722 rowsI = registry.queryDataIds(["visit"], datasets=rawType, collections=run1).expanded().toSet() 

723 self.assertEqual(rows, rowsI) 

724 # with empty expression 

725 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1).expanded().toSet() 

726 self.assertEqual(len(rows), 4*3) # 4 exposures times 3 detectors 

727 for dataId in rows: 

728 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit")) 

729 packer1 = registry.dimensions.makePacker("visit_detector", dataId) 

730 packer2 = registry.dimensions.makePacker("exposure_detector", dataId) 

731 self.assertEqual(packer1.unpack(packer1.pack(dataId)), 

732 DataCoordinate.standardize(dataId, graph=packer1.dimensions)) 

733 self.assertEqual(packer2.unpack(packer2.pack(dataId)), 

734 DataCoordinate.standardize(dataId, graph=packer2.dimensions)) 

735 self.assertNotEqual(packer1.pack(dataId), packer2.pack(dataId)) 

736 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), 

737 (100, 101, 110, 111)) 

738 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 11)) 

739 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3)) 

740 

741 # second collection 

742 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=tagged2).toSet() 

743 self.assertEqual(len(rows), 4*3) # 4 exposures times 3 detectors 

744 for dataId in rows: 

745 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit")) 

746 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), 

747 (100, 101, 200, 201)) 

748 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 20)) 

749 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3, 4, 5)) 

750 

751 # with two input datasets 

752 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=[run1, tagged2]).toSet() 

753 self.assertEqual(len(set(rows)), 6*3) # 6 exposures times 3 detectors; set needed to de-dupe 

754 for dataId in rows: 

755 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit")) 

756 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), 

757 (100, 101, 110, 111, 200, 201)) 

758 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 11, 20)) 

759 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3, 4, 5)) 

760 

761 # limit to single visit 

762 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1, 

763 where="visit = 10").toSet() 

764 self.assertEqual(len(rows), 2*3) # 2 exposures times 3 detectors 

765 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101)) 

766 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10,)) 

767 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3)) 

768 

769 # more limiting expression, using link names instead of Table.column 

770 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1, 

771 where="visit = 10 and detector > 1").toSet() 

772 self.assertEqual(len(rows), 2*2) # 2 exposures times 2 detectors 

773 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101)) 

774 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10,)) 

775 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (2, 3)) 

776 

777 # expression excludes everything 

778 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1, 

779 where="visit > 1000").toSet() 

780 self.assertEqual(len(rows), 0) 

781 

782 # Selecting by physical_filter, this is not in the dimensions, but it 

783 # is a part of the full expression so it should work too. 

784 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1, 

785 where="physical_filter = 'dummy_r'").toSet() 

786 self.assertEqual(len(rows), 2*3) # 2 exposures times 3 detectors 

787 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (110, 111)) 

788 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (11,)) 

789 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3)) 

790 

791 def testSkyMapDimensions(self): 

792 """Tests involving only skymap dimensions, no joins to instrument.""" 

793 registry = self.makeRegistry() 

794 

795 # need a bunch of dimensions and datasets for test, we want 

796 # "band" in the test so also have to add physical_filter 

797 # dimensions 

798 registry.insertDimensionData( 

799 "instrument", 

800 dict(instrument="DummyCam") 

801 ) 

802 registry.insertDimensionData( 

803 "physical_filter", 

804 dict(instrument="DummyCam", name="dummy_r", band="r"), 

805 dict(instrument="DummyCam", name="dummy_i", band="i"), 

806 ) 

807 registry.insertDimensionData( 

808 "skymap", 

809 dict(name="DummyMap", hash="sha!".encode("utf8")) 

810 ) 

811 for tract in range(10): 

812 registry.insertDimensionData("tract", dict(skymap="DummyMap", id=tract)) 

813 registry.insertDimensionData( 

814 "patch", 

815 *[dict(skymap="DummyMap", tract=tract, id=patch, cell_x=0, cell_y=0) 

816 for patch in range(10)] 

817 ) 

818 

819 # dataset types 

820 run = "test" 

821 registry.registerRun(run) 

822 storageClass = StorageClass("testDataset") 

823 registry.storageClasses.registerStorageClass(storageClass) 

824 calexpType = DatasetType(name="deepCoadd_calexp", 

825 dimensions=registry.dimensions.extract(("skymap", "tract", "patch", 

826 "band")), 

827 storageClass=storageClass) 

828 registry.registerDatasetType(calexpType) 

829 mergeType = DatasetType(name="deepCoadd_mergeDet", 

830 dimensions=registry.dimensions.extract(("skymap", "tract", "patch")), 

831 storageClass=storageClass) 

832 registry.registerDatasetType(mergeType) 

833 measType = DatasetType(name="deepCoadd_meas", 

834 dimensions=registry.dimensions.extract(("skymap", "tract", "patch", 

835 "band")), 

836 storageClass=storageClass) 

837 registry.registerDatasetType(measType) 

838 

839 dimensions = DimensionGraph( 

840 registry.dimensions, 

841 dimensions=(calexpType.dimensions.required | mergeType.dimensions.required 

842 | measType.dimensions.required) 

843 ) 

844 

845 # add pre-existing datasets 

846 for tract in (1, 3, 5): 

847 for patch in (2, 4, 6, 7): 

848 dataId = dict(skymap="DummyMap", tract=tract, patch=patch) 

849 registry.insertDatasets(mergeType, dataIds=[dataId], run=run) 

850 for aFilter in ("i", "r"): 

851 dataId = dict(skymap="DummyMap", tract=tract, patch=patch, band=aFilter) 

852 registry.insertDatasets(calexpType, dataIds=[dataId], run=run) 

853 

854 # with empty expression 

855 rows = registry.queryDataIds(dimensions, 

856 datasets=[calexpType, mergeType], collections=run).toSet() 

857 self.assertEqual(len(rows), 3*4*2) # 4 tracts x 4 patches x 2 filters 

858 for dataId in rows: 

859 self.assertCountEqual(dataId.keys(), ("skymap", "tract", "patch", "band")) 

860 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 3, 5)) 

861 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 4, 6, 7)) 

862 self.assertCountEqual(set(dataId["band"] for dataId in rows), ("i", "r")) 

863 

864 # limit to 2 tracts and 2 patches 

865 rows = registry.queryDataIds(dimensions, 

866 datasets=[calexpType, mergeType], collections=run, 

867 where="tract IN (1, 5) AND patch IN (2, 7)").toSet() 

868 self.assertEqual(len(rows), 2*2*2) # 2 tracts x 2 patches x 2 filters 

869 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 5)) 

870 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 7)) 

871 self.assertCountEqual(set(dataId["band"] for dataId in rows), ("i", "r")) 

872 

873 # limit to single filter 

874 rows = registry.queryDataIds(dimensions, 

875 datasets=[calexpType, mergeType], collections=run, 

876 where="band = 'i'").toSet() 

877 self.assertEqual(len(rows), 3*4*1) # 4 tracts x 4 patches x 2 filters 

878 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 3, 5)) 

879 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 4, 6, 7)) 

880 self.assertCountEqual(set(dataId["band"] for dataId in rows), ("i",)) 

881 

882 # expression excludes everything, specifying non-existing skymap is 

883 # not a fatal error, it's operator error 

884 rows = registry.queryDataIds(dimensions, 

885 datasets=[calexpType, mergeType], collections=run, 

886 where="skymap = 'Mars'").toSet() 

887 self.assertEqual(len(rows), 0) 

888 

889 def testSpatialJoin(self): 

890 """Test queries that involve spatial overlap joins. 

891 """ 

892 registry = self.makeRegistry() 

893 self.loadData(registry, "hsc-rc2-subset.yaml") 

894 

895 # Dictionary of spatial DatabaseDimensionElements, keyed by the name of 

896 # the TopologicalFamily they belong to. We'll relate all elements in 

897 # each family to all of the elements in each other family. 

898 families = defaultdict(set) 

899 # Dictionary of {element.name: {dataId: region}}. 

900 regions = {} 

901 for element in registry.dimensions.getDatabaseElements(): 

902 if element.spatial is not None: 

903 families[element.spatial.name].add(element) 

904 regions[element.name] = { 

905 record.dataId: record.region for record in registry.queryDimensionRecords(element) 

906 } 

907 

908 # If this check fails, it's not necessarily a problem - it may just be 

909 # a reasonable change to the default dimension definitions - but the 

910 # test below depends on there being more than one family to do anything 

911 # useful. 

912 self.assertEqual(len(families), 2) 

913 

914 # Overlap DatabaseDimensionElements with each other. 

915 for family1, family2 in itertools.combinations(families, 2): 

916 for element1, element2 in itertools.product(families[family1], families[family2]): 

917 graph = DimensionGraph.union(element1.graph, element2.graph) 

918 # Construct expected set of overlapping data IDs via a 

919 # brute-force comparison of the regions we've already fetched. 

920 expected = { 

921 DataCoordinate.standardize( 

922 {**dataId1.byName(), **dataId2.byName()}, 

923 graph=graph 

924 ) 

925 for (dataId1, region1), (dataId2, region2) 

926 in itertools.product(regions[element1.name].items(), regions[element2.name].items()) 

927 if not region1.isDisjointFrom(region2) 

928 } 

929 self.assertGreater(len(expected), 2, msg="Test that we aren't just comparing empty sets.") 

930 queried = set(registry.queryDataIds(graph)) 

931 self.assertEqual(expected, queried) 

932 

933 # Overlap each DatabaseDimensionElement with the commonSkyPix system. 

934 commonSkyPix = registry.dimensions.commonSkyPix 

935 for elementName, regions in regions.items(): 

936 graph = DimensionGraph.union(registry.dimensions[elementName].graph, commonSkyPix.graph) 

937 expected = set() 

938 for dataId, region in regions.items(): 

939 for begin, end in commonSkyPix.pixelization.envelope(region): 

940 expected.update( 

941 DataCoordinate.standardize( 

942 {commonSkyPix.name: index, **dataId.byName()}, 

943 graph=graph 

944 ) 

945 for index in range(begin, end) 

946 ) 

947 self.assertGreater(len(expected), 2, msg="Test that we aren't just comparing empty sets.") 

948 queried = set(registry.queryDataIds(graph)) 

949 self.assertEqual(expected, queried) 

950 

951 def testAbstractQuery(self): 

952 """Test that we can run a query that just lists the known 

953 bands. This is tricky because band is 

954 backed by a query against physical_filter. 

955 """ 

956 registry = self.makeRegistry() 

957 registry.insertDimensionData("instrument", dict(name="DummyCam")) 

958 registry.insertDimensionData( 

959 "physical_filter", 

960 dict(instrument="DummyCam", name="dummy_i", band="i"), 

961 dict(instrument="DummyCam", name="dummy_i2", band="i"), 

962 dict(instrument="DummyCam", name="dummy_r", band="r"), 

963 ) 

964 rows = registry.queryDataIds(["band"]).toSet() 

965 self.assertCountEqual( 

966 rows, 

967 [DataCoordinate.standardize(band="i", universe=registry.dimensions), 

968 DataCoordinate.standardize(band="r", universe=registry.dimensions)] 

969 ) 

970 

971 def testAttributeManager(self): 

972 """Test basic functionality of attribute manager. 

973 """ 

974 # number of attributes with schema versions in a fresh database, 

975 # 6 managers with 3 records per manager, plus config for dimensions 

976 VERSION_COUNT = 6 * 3 + 1 

977 

978 registry = self.makeRegistry() 

979 attributes = registry._attributes 

980 

981 # check what get() returns for non-existing key 

982 self.assertIsNone(attributes.get("attr")) 

983 self.assertEqual(attributes.get("attr", ""), "") 

984 self.assertEqual(attributes.get("attr", "Value"), "Value") 

985 self.assertEqual(len(list(attributes.items())), VERSION_COUNT) 

986 

987 # cannot store empty key or value 

988 with self.assertRaises(ValueError): 

989 attributes.set("", "value") 

990 with self.assertRaises(ValueError): 

991 attributes.set("attr", "") 

992 

993 # set value of non-existing key 

994 attributes.set("attr", "value") 

995 self.assertEqual(len(list(attributes.items())), VERSION_COUNT + 1) 

996 self.assertEqual(attributes.get("attr"), "value") 

997 

998 # update value of existing key 

999 with self.assertRaises(ButlerAttributeExistsError): 

1000 attributes.set("attr", "value2") 

1001 

1002 attributes.set("attr", "value2", force=True) 

1003 self.assertEqual(len(list(attributes.items())), VERSION_COUNT + 1) 

1004 self.assertEqual(attributes.get("attr"), "value2") 

1005 

1006 # delete existing key 

1007 self.assertTrue(attributes.delete("attr")) 

1008 self.assertEqual(len(list(attributes.items())), VERSION_COUNT) 

1009 

1010 # delete non-existing key 

1011 self.assertFalse(attributes.delete("non-attr")) 

1012 

1013 # store bunch of keys and get the list back 

1014 data = [ 

1015 ("version.core", "1.2.3"), 

1016 ("version.dimensions", "3.2.1"), 

1017 ("config.managers.opaque", "ByNameOpaqueTableStorageManager"), 

1018 ] 

1019 for key, value in data: 

1020 attributes.set(key, value) 

1021 items = dict(attributes.items()) 

1022 for key, value in data: 

1023 self.assertEqual(items[key], value) 

1024 

1025 def testQueryDatasetsDeduplication(self): 

1026 """Test that the findFirst option to queryDatasets selects datasets 

1027 from collections in the order given". 

1028 """ 

1029 registry = self.makeRegistry() 

1030 self.loadData(registry, "base.yaml") 

1031 self.loadData(registry, "datasets.yaml") 

1032 self.assertCountEqual( 

1033 list(registry.queryDatasets("bias", collections=["imported_g", "imported_r"])), 

1034 [ 

1035 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"), 

1036 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g"), 

1037 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g"), 

1038 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r"), 

1039 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r"), 

1040 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

1041 ] 

1042 ) 

1043 self.assertCountEqual( 

1044 list(registry.queryDatasets("bias", collections=["imported_g", "imported_r"], 

1045 findFirst=True)), 

1046 [ 

1047 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"), 

1048 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g"), 

1049 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g"), 

1050 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

1051 ] 

1052 ) 

1053 self.assertCountEqual( 

1054 list(registry.queryDatasets("bias", collections=["imported_r", "imported_g"], 

1055 findFirst=True)), 

1056 [ 

1057 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"), 

1058 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r"), 

1059 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r"), 

1060 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

1061 ] 

1062 ) 

1063 

1064 def testQueryResults(self): 

1065 """Test querying for data IDs and then manipulating the QueryResults 

1066 object returned to perform other queries. 

1067 """ 

1068 registry = self.makeRegistry() 

1069 self.loadData(registry, "base.yaml") 

1070 self.loadData(registry, "datasets.yaml") 

1071 bias = registry.getDatasetType("bias") 

1072 flat = registry.getDatasetType("flat") 

1073 # Obtain expected results from methods other than those we're testing 

1074 # here. That includes: 

1075 # - the dimensions of the data IDs we want to query: 

1076 expectedGraph = DimensionGraph(registry.dimensions, names=["detector", "physical_filter"]) 

1077 # - the dimensions of some other data IDs we'll extract from that: 

1078 expectedSubsetGraph = DimensionGraph(registry.dimensions, names=["detector"]) 

1079 # - the data IDs we expect to obtain from the first queries: 

1080 expectedDataIds = DataCoordinateSet( 

1081 { 

1082 DataCoordinate.standardize(instrument="Cam1", detector=d, physical_filter=p, 

1083 universe=registry.dimensions) 

1084 for d, p in itertools.product({1, 2, 3}, {"Cam1-G", "Cam1-R1", "Cam1-R2"}) 

1085 }, 

1086 graph=expectedGraph, 

1087 hasFull=False, 

1088 hasRecords=False, 

1089 ) 

1090 # - the flat datasets we expect to find from those data IDs, in just 

1091 # one collection (so deduplication is irrelevant): 

1092 expectedFlats = [ 

1093 registry.findDataset(flat, instrument="Cam1", detector=1, physical_filter="Cam1-R1", 

1094 collections="imported_r"), 

1095 registry.findDataset(flat, instrument="Cam1", detector=2, physical_filter="Cam1-R1", 

1096 collections="imported_r"), 

1097 registry.findDataset(flat, instrument="Cam1", detector=3, physical_filter="Cam1-R2", 

1098 collections="imported_r"), 

1099 ] 

1100 # - the data IDs we expect to extract from that: 

1101 expectedSubsetDataIds = expectedDataIds.subset(expectedSubsetGraph) 

1102 # - the bias datasets we expect to find from those data IDs, after we 

1103 # subset-out the physical_filter dimension, both with duplicates: 

1104 expectedAllBiases = [ 

1105 registry.findDataset(bias, instrument="Cam1", detector=1, collections="imported_g"), 

1106 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_g"), 

1107 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_g"), 

1108 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_r"), 

1109 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_r"), 

1110 ] 

1111 # - ...and without duplicates: 

1112 expectedDeduplicatedBiases = [ 

1113 registry.findDataset(bias, instrument="Cam1", detector=1, collections="imported_g"), 

1114 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_r"), 

1115 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_r"), 

1116 ] 

1117 # Test against those expected results, using a "lazy" query for the 

1118 # data IDs (which re-executes that query each time we use it to do 

1119 # something new). 

1120 dataIds = registry.queryDataIds( 

1121 ["detector", "physical_filter"], 

1122 where="detector.purpose = 'SCIENCE'", # this rejects detector=4 

1123 ) 

1124 self.assertEqual(dataIds.graph, expectedGraph) 

1125 self.assertEqual(dataIds.toSet(), expectedDataIds) 

1126 self.assertCountEqual( 

1127 list( 

1128 dataIds.findDatasets( 

1129 flat, 

1130 collections=["imported_r"], 

1131 ) 

1132 ), 

1133 expectedFlats, 

1134 ) 

1135 subsetDataIds = dataIds.subset(expectedSubsetGraph, unique=True) 

1136 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph) 

1137 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds) 

1138 self.assertCountEqual( 

1139 list( 

1140 subsetDataIds.findDatasets( 

1141 bias, 

1142 collections=["imported_r", "imported_g"], 

1143 findFirst=False 

1144 ) 

1145 ), 

1146 expectedAllBiases 

1147 ) 

1148 self.assertCountEqual( 

1149 list( 

1150 subsetDataIds.findDatasets( 

1151 bias, 

1152 collections=["imported_r", "imported_g"], 

1153 findFirst=True 

1154 ) 

1155 ), expectedDeduplicatedBiases 

1156 ) 

1157 # Materialize the bias dataset queries (only) by putting the results 

1158 # into temporary tables, then repeat those tests. 

1159 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], 

1160 findFirst=False).materialize() as biases: 

1161 self.assertCountEqual(list(biases), expectedAllBiases) 

1162 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], 

1163 findFirst=True).materialize() as biases: 

1164 self.assertCountEqual(list(biases), expectedDeduplicatedBiases) 

1165 # Materialize the data ID subset query, but not the dataset queries. 

1166 with subsetDataIds.materialize() as subsetDataIds: 

1167 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph) 

1168 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds) 

1169 self.assertCountEqual( 

1170 list( 

1171 subsetDataIds.findDatasets( 

1172 bias, 

1173 collections=["imported_r", "imported_g"], 

1174 findFirst=False 

1175 ) 

1176 ), 

1177 expectedAllBiases 

1178 ) 

1179 self.assertCountEqual( 

1180 list( 

1181 subsetDataIds.findDatasets( 

1182 bias, 

1183 collections=["imported_r", "imported_g"], 

1184 findFirst=True 

1185 ) 

1186 ), expectedDeduplicatedBiases 

1187 ) 

1188 # Materialize the dataset queries, too. 

1189 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], 

1190 findFirst=False).materialize() as biases: 

1191 self.assertCountEqual(list(biases), expectedAllBiases) 

1192 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], 

1193 findFirst=True).materialize() as biases: 

1194 self.assertCountEqual(list(biases), expectedDeduplicatedBiases) 

1195 # Materialize the original query, but none of the follow-up queries. 

1196 with dataIds.materialize() as dataIds: 

1197 self.assertEqual(dataIds.graph, expectedGraph) 

1198 self.assertEqual(dataIds.toSet(), expectedDataIds) 

1199 self.assertCountEqual( 

1200 list( 

1201 dataIds.findDatasets( 

1202 flat, 

1203 collections=["imported_r"], 

1204 ) 

1205 ), 

1206 expectedFlats, 

1207 ) 

1208 subsetDataIds = dataIds.subset(expectedSubsetGraph, unique=True) 

1209 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph) 

1210 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds) 

1211 self.assertCountEqual( 

1212 list( 

1213 subsetDataIds.findDatasets( 

1214 bias, 

1215 collections=["imported_r", "imported_g"], 

1216 findFirst=False 

1217 ) 

1218 ), 

1219 expectedAllBiases 

1220 ) 

1221 self.assertCountEqual( 

1222 list( 

1223 subsetDataIds.findDatasets( 

1224 bias, 

1225 collections=["imported_r", "imported_g"], 

1226 findFirst=True 

1227 ) 

1228 ), expectedDeduplicatedBiases 

1229 ) 

1230 # Materialize just the bias dataset queries. 

1231 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], 

1232 findFirst=False).materialize() as biases: 

1233 self.assertCountEqual(list(biases), expectedAllBiases) 

1234 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], 

1235 findFirst=True).materialize() as biases: 

1236 self.assertCountEqual(list(biases), expectedDeduplicatedBiases) 

1237 # Materialize the subset data ID query, but not the dataset 

1238 # queries. 

1239 with subsetDataIds.materialize() as subsetDataIds: 

1240 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph) 

1241 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds) 

1242 self.assertCountEqual( 

1243 list( 

1244 subsetDataIds.findDatasets( 

1245 bias, 

1246 collections=["imported_r", "imported_g"], 

1247 findFirst=False 

1248 ) 

1249 ), 

1250 expectedAllBiases 

1251 ) 

1252 self.assertCountEqual( 

1253 list( 

1254 subsetDataIds.findDatasets( 

1255 bias, 

1256 collections=["imported_r", "imported_g"], 

1257 findFirst=True 

1258 ) 

1259 ), expectedDeduplicatedBiases 

1260 ) 

1261 # Materialize the bias dataset queries, too, so now we're 

1262 # materializing every single step. 

1263 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], 

1264 findFirst=False).materialize() as biases: 

1265 self.assertCountEqual(list(biases), expectedAllBiases) 

1266 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], 

1267 findFirst=True).materialize() as biases: 

1268 self.assertCountEqual(list(biases), expectedDeduplicatedBiases) 

1269 

1270 def testEmptyDimensionsQueries(self): 

1271 """Test Query and QueryResults objects in the case where there are no 

1272 dimensions. 

1273 """ 

1274 # Set up test data: one dataset type, two runs, one dataset in each. 

1275 registry = self.makeRegistry() 

1276 self.loadData(registry, "base.yaml") 

1277 schema = DatasetType("schema", dimensions=registry.dimensions.empty, storageClass="Catalog") 

1278 registry.registerDatasetType(schema) 

1279 dataId = DataCoordinate.makeEmpty(registry.dimensions) 

1280 run1 = "run1" 

1281 run2 = "run2" 

1282 registry.registerRun(run1) 

1283 registry.registerRun(run2) 

1284 (dataset1,) = registry.insertDatasets(schema, dataIds=[dataId], run=run1) 

1285 (dataset2,) = registry.insertDatasets(schema, dataIds=[dataId], run=run2) 

1286 # Query directly for both of the datasets, and each one, one at a time. 

1287 self.assertCountEqual( 

1288 list(registry.queryDatasets(schema, collections=[run1, run2], findFirst=False)), 

1289 [dataset1, dataset2] 

1290 ) 

1291 self.assertEqual( 

1292 list(registry.queryDatasets(schema, collections=[run1, run2], findFirst=True)), 

1293 [dataset1], 

1294 ) 

1295 self.assertEqual( 

1296 list(registry.queryDatasets(schema, collections=[run2, run1], findFirst=True)), 

1297 [dataset2], 

1298 ) 

1299 # Query for data IDs with no dimensions. 

1300 dataIds = registry.queryDataIds([]) 

1301 self.assertEqual( 

1302 dataIds.toSequence(), 

1303 DataCoordinateSequence([dataId], registry.dimensions.empty) 

1304 ) 

1305 # Use queried data IDs to find the datasets. 

1306 self.assertCountEqual( 

1307 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False)), 

1308 [dataset1, dataset2], 

1309 ) 

1310 self.assertEqual( 

1311 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True)), 

1312 [dataset1], 

1313 ) 

1314 self.assertEqual( 

1315 list(dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True)), 

1316 [dataset2], 

1317 ) 

1318 # Now materialize the data ID query results and repeat those tests. 

1319 with dataIds.materialize() as dataIds: 

1320 self.assertEqual( 

1321 dataIds.toSequence(), 

1322 DataCoordinateSequence([dataId], registry.dimensions.empty) 

1323 ) 

1324 self.assertCountEqual( 

1325 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False)), 

1326 [dataset1, dataset2], 

1327 ) 

1328 self.assertEqual( 

1329 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True)), 

1330 [dataset1], 

1331 ) 

1332 self.assertEqual( 

1333 list(dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True)), 

1334 [dataset2], 

1335 ) 

1336 # Query for non-empty data IDs, then subset that to get the empty one. 

1337 # Repeat the above tests starting from that. 

1338 dataIds = registry.queryDataIds(["instrument"]).subset(registry.dimensions.empty, unique=True) 

1339 self.assertEqual( 

1340 dataIds.toSequence(), 

1341 DataCoordinateSequence([dataId], registry.dimensions.empty) 

1342 ) 

1343 self.assertCountEqual( 

1344 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False)), 

1345 [dataset1, dataset2], 

1346 ) 

1347 self.assertEqual( 

1348 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True)), 

1349 [dataset1], 

1350 ) 

1351 self.assertEqual( 

1352 list(dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True)), 

1353 [dataset2], 

1354 ) 

1355 with dataIds.materialize() as dataIds: 

1356 self.assertEqual( 

1357 dataIds.toSequence(), 

1358 DataCoordinateSequence([dataId], registry.dimensions.empty) 

1359 ) 

1360 self.assertCountEqual( 

1361 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False)), 

1362 [dataset1, dataset2], 

1363 ) 

1364 self.assertEqual( 

1365 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True)), 

1366 [dataset1], 

1367 ) 

1368 self.assertEqual( 

1369 list(dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True)), 

1370 [dataset2], 

1371 ) 

1372 # Query for non-empty data IDs, then materialize, then subset to get 

1373 # the empty one. Repeat again. 

1374 with registry.queryDataIds(["instrument"]).materialize() as nonEmptyDataIds: 

1375 dataIds = nonEmptyDataIds.subset(registry.dimensions.empty, unique=True) 

1376 self.assertEqual( 

1377 dataIds.toSequence(), 

1378 DataCoordinateSequence([dataId], registry.dimensions.empty) 

1379 ) 

1380 self.assertCountEqual( 

1381 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False)), 

1382 [dataset1, dataset2], 

1383 ) 

1384 self.assertEqual( 

1385 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True)), 

1386 [dataset1], 

1387 ) 

1388 self.assertEqual( 

1389 list(dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True)), 

1390 [dataset2], 

1391 ) 

1392 with dataIds.materialize() as dataIds: 

1393 self.assertEqual( 

1394 dataIds.toSequence(), 

1395 DataCoordinateSequence([dataId], registry.dimensions.empty) 

1396 ) 

1397 self.assertCountEqual( 

1398 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False)), 

1399 [dataset1, dataset2], 

1400 ) 

1401 self.assertEqual( 

1402 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True)), 

1403 [dataset1], 

1404 ) 

1405 self.assertEqual( 

1406 list(dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True)), 

1407 [dataset2], 

1408 ) 

1409 

1410 def testCalibrationCollections(self): 

1411 """Test operations on `~CollectionType.CALIBRATION` collections, 

1412 including `Registry.certify`, `Registry.decertify`, and 

1413 `Registry.findDataset`. 

1414 """ 

1415 # Setup - make a Registry, fill it with some datasets in 

1416 # non-calibration collections. 

1417 registry = self.makeRegistry() 

1418 self.loadData(registry, "base.yaml") 

1419 self.loadData(registry, "datasets.yaml") 

1420 # Set up some timestamps. 

1421 t1 = astropy.time.Time('2020-01-01T01:00:00', format="isot", scale="tai") 

1422 t2 = astropy.time.Time('2020-01-01T02:00:00', format="isot", scale="tai") 

1423 t3 = astropy.time.Time('2020-01-01T03:00:00', format="isot", scale="tai") 

1424 t4 = astropy.time.Time('2020-01-01T04:00:00', format="isot", scale="tai") 

1425 t5 = astropy.time.Time('2020-01-01T05:00:00', format="isot", scale="tai") 

1426 allTimespans = [ 

1427 Timespan(a, b) for a, b in itertools.combinations([None, t1, t2, t3, t4, t5, None], r=2) 

1428 ] 

1429 # Get references to some datasets. 

1430 bias2a = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g") 

1431 bias3a = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g") 

1432 bias2b = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r") 

1433 bias3b = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r") 

1434 # Register the main calibration collection we'll be working with. 

1435 collection = "Cam1/calibs/default" 

1436 registry.registerCollection(collection, type=CollectionType.CALIBRATION) 

1437 # Cannot associate into a calibration collection (no timespan). 

1438 with self.assertRaises(TypeError): 

1439 registry.associate(collection, [bias2a]) 

1440 # Certify 2a dataset with [t2, t4) validity. 

1441 registry.certify(collection, [bias2a], Timespan(begin=t2, end=t4)) 

1442 # We should not be able to certify 2b with anything overlapping that 

1443 # window. 

1444 with self.assertRaises(ConflictingDefinitionError): 

1445 registry.certify(collection, [bias2b], Timespan(begin=None, end=t3)) 

1446 with self.assertRaises(ConflictingDefinitionError): 

1447 registry.certify(collection, [bias2b], Timespan(begin=None, end=t5)) 

1448 with self.assertRaises(ConflictingDefinitionError): 

1449 registry.certify(collection, [bias2b], Timespan(begin=t1, end=t3)) 

1450 with self.assertRaises(ConflictingDefinitionError): 

1451 registry.certify(collection, [bias2b], Timespan(begin=t1, end=t5)) 

1452 with self.assertRaises(ConflictingDefinitionError): 

1453 registry.certify(collection, [bias2b], Timespan(begin=t1, end=None)) 

1454 with self.assertRaises(ConflictingDefinitionError): 

1455 registry.certify(collection, [bias2b], Timespan(begin=t2, end=t3)) 

1456 with self.assertRaises(ConflictingDefinitionError): 

1457 registry.certify(collection, [bias2b], Timespan(begin=t2, end=t5)) 

1458 with self.assertRaises(ConflictingDefinitionError): 

1459 registry.certify(collection, [bias2b], Timespan(begin=t2, end=None)) 

1460 # We should be able to certify 3a with a range overlapping that window, 

1461 # because it's for a different detector. 

1462 # We'll certify 3a over [t1, t3). 

1463 registry.certify(collection, [bias3a], Timespan(begin=t1, end=t3)) 

1464 # Now we'll certify 2b and 3b together over [t4, ∞). 

1465 registry.certify(collection, [bias2b, bias3b], Timespan(begin=t4, end=None)) 

1466 

1467 # Fetch all associations and check that they are what we expect. 

1468 self.assertCountEqual( 

1469 list( 

1470 registry.queryDatasetAssociations( 

1471 "bias", 

1472 collections=[collection, "imported_g", "imported_r"], 

1473 ) 

1474 ), 

1475 [ 

1476 DatasetAssociation( 

1477 ref=registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"), 

1478 collection="imported_g", 

1479 timespan=None, 

1480 ), 

1481 DatasetAssociation( 

1482 ref=registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

1483 collection="imported_r", 

1484 timespan=None, 

1485 ), 

1486 DatasetAssociation(ref=bias2a, collection="imported_g", timespan=None), 

1487 DatasetAssociation(ref=bias3a, collection="imported_g", timespan=None), 

1488 DatasetAssociation(ref=bias2b, collection="imported_r", timespan=None), 

1489 DatasetAssociation(ref=bias3b, collection="imported_r", timespan=None), 

1490 DatasetAssociation(ref=bias2a, collection=collection, timespan=Timespan(begin=t2, end=t4)), 

1491 DatasetAssociation(ref=bias3a, collection=collection, timespan=Timespan(begin=t1, end=t3)), 

1492 DatasetAssociation(ref=bias2b, collection=collection, timespan=Timespan(begin=t4, end=None)), 

1493 DatasetAssociation(ref=bias3b, collection=collection, timespan=Timespan(begin=t4, end=None)), 

1494 ] 

1495 ) 

1496 

1497 class Ambiguous: 

1498 """Tag class to denote lookups that are expected to be ambiguous. 

1499 """ 

1500 pass 

1501 

1502 def assertLookup(detector: int, timespan: Timespan, 

1503 expected: Optional[Union[DatasetRef, Type[Ambiguous]]]) -> None: 

1504 """Local function that asserts that a bias lookup returns the given 

1505 expected result. 

1506 """ 

1507 if expected is Ambiguous: 

1508 with self.assertRaises(RuntimeError): 

1509 registry.findDataset("bias", collections=collection, instrument="Cam1", 

1510 detector=detector, timespan=timespan) 

1511 else: 

1512 self.assertEqual( 

1513 expected, 

1514 registry.findDataset("bias", collections=collection, instrument="Cam1", 

1515 detector=detector, timespan=timespan) 

1516 ) 

1517 

1518 # Systematically test lookups against expected results. 

1519 assertLookup(detector=2, timespan=Timespan(None, t1), expected=None) 

1520 assertLookup(detector=2, timespan=Timespan(None, t2), expected=None) 

1521 assertLookup(detector=2, timespan=Timespan(None, t3), expected=bias2a) 

1522 assertLookup(detector=2, timespan=Timespan(None, t4), expected=bias2a) 

1523 assertLookup(detector=2, timespan=Timespan(None, t5), expected=Ambiguous) 

1524 assertLookup(detector=2, timespan=Timespan(None, None), expected=Ambiguous) 

1525 assertLookup(detector=2, timespan=Timespan(t1, t2), expected=None) 

1526 assertLookup(detector=2, timespan=Timespan(t1, t3), expected=bias2a) 

1527 assertLookup(detector=2, timespan=Timespan(t1, t4), expected=bias2a) 

1528 assertLookup(detector=2, timespan=Timespan(t1, t5), expected=Ambiguous) 

1529 assertLookup(detector=2, timespan=Timespan(t1, None), expected=Ambiguous) 

1530 assertLookup(detector=2, timespan=Timespan(t2, t3), expected=bias2a) 

1531 assertLookup(detector=2, timespan=Timespan(t2, t4), expected=bias2a) 

1532 assertLookup(detector=2, timespan=Timespan(t2, t5), expected=Ambiguous) 

1533 assertLookup(detector=2, timespan=Timespan(t2, None), expected=Ambiguous) 

1534 assertLookup(detector=2, timespan=Timespan(t3, t4), expected=bias2a) 

1535 assertLookup(detector=2, timespan=Timespan(t3, t5), expected=Ambiguous) 

1536 assertLookup(detector=2, timespan=Timespan(t3, None), expected=Ambiguous) 

1537 assertLookup(detector=2, timespan=Timespan(t4, t5), expected=bias2b) 

1538 assertLookup(detector=2, timespan=Timespan(t4, None), expected=bias2b) 

1539 assertLookup(detector=2, timespan=Timespan(t5, None), expected=bias2b) 

1540 assertLookup(detector=3, timespan=Timespan(None, t1), expected=None) 

1541 assertLookup(detector=3, timespan=Timespan(None, t2), expected=bias3a) 

1542 assertLookup(detector=3, timespan=Timespan(None, t3), expected=bias3a) 

1543 assertLookup(detector=3, timespan=Timespan(None, t4), expected=bias3a) 

1544 assertLookup(detector=3, timespan=Timespan(None, t5), expected=Ambiguous) 

1545 assertLookup(detector=3, timespan=Timespan(None, None), expected=Ambiguous) 

1546 assertLookup(detector=3, timespan=Timespan(t1, t2), expected=bias3a) 

1547 assertLookup(detector=3, timespan=Timespan(t1, t3), expected=bias3a) 

1548 assertLookup(detector=3, timespan=Timespan(t1, t4), expected=bias3a) 

1549 assertLookup(detector=3, timespan=Timespan(t1, t5), expected=Ambiguous) 

1550 assertLookup(detector=3, timespan=Timespan(t1, None), expected=Ambiguous) 

1551 assertLookup(detector=3, timespan=Timespan(t2, t3), expected=bias3a) 

1552 assertLookup(detector=3, timespan=Timespan(t2, t4), expected=bias3a) 

1553 assertLookup(detector=3, timespan=Timespan(t2, t5), expected=Ambiguous) 

1554 assertLookup(detector=3, timespan=Timespan(t2, None), expected=Ambiguous) 

1555 assertLookup(detector=3, timespan=Timespan(t3, t4), expected=None) 

1556 assertLookup(detector=3, timespan=Timespan(t3, t5), expected=bias3b) 

1557 assertLookup(detector=3, timespan=Timespan(t3, None), expected=bias3b) 

1558 assertLookup(detector=3, timespan=Timespan(t4, t5), expected=bias3b) 

1559 assertLookup(detector=3, timespan=Timespan(t4, None), expected=bias3b) 

1560 assertLookup(detector=3, timespan=Timespan(t5, None), expected=bias3b) 

1561 

1562 # Decertify [t3, t5) for all data IDs, and do test lookups again. 

1563 # This should truncate bias2a to [t2, t3), leave bias3a unchanged at 

1564 # [t1, t3), and truncate bias2b and bias3b to [t5, ∞). 

1565 registry.decertify(collection=collection, datasetType="bias", timespan=Timespan(t3, t5)) 

1566 assertLookup(detector=2, timespan=Timespan(None, t1), expected=None) 

1567 assertLookup(detector=2, timespan=Timespan(None, t2), expected=None) 

1568 assertLookup(detector=2, timespan=Timespan(None, t3), expected=bias2a) 

1569 assertLookup(detector=2, timespan=Timespan(None, t4), expected=bias2a) 

1570 assertLookup(detector=2, timespan=Timespan(None, t5), expected=bias2a) 

1571 assertLookup(detector=2, timespan=Timespan(None, None), expected=Ambiguous) 

1572 assertLookup(detector=2, timespan=Timespan(t1, t2), expected=None) 

1573 assertLookup(detector=2, timespan=Timespan(t1, t3), expected=bias2a) 

1574 assertLookup(detector=2, timespan=Timespan(t1, t4), expected=bias2a) 

1575 assertLookup(detector=2, timespan=Timespan(t1, t5), expected=bias2a) 

1576 assertLookup(detector=2, timespan=Timespan(t1, None), expected=Ambiguous) 

1577 assertLookup(detector=2, timespan=Timespan(t2, t3), expected=bias2a) 

1578 assertLookup(detector=2, timespan=Timespan(t2, t4), expected=bias2a) 

1579 assertLookup(detector=2, timespan=Timespan(t2, t5), expected=bias2a) 

1580 assertLookup(detector=2, timespan=Timespan(t2, None), expected=Ambiguous) 

1581 assertLookup(detector=2, timespan=Timespan(t3, t4), expected=None) 

1582 assertLookup(detector=2, timespan=Timespan(t3, t5), expected=None) 

1583 assertLookup(detector=2, timespan=Timespan(t3, None), expected=bias2b) 

1584 assertLookup(detector=2, timespan=Timespan(t4, t5), expected=None) 

1585 assertLookup(detector=2, timespan=Timespan(t4, None), expected=bias2b) 

1586 assertLookup(detector=2, timespan=Timespan(t5, None), expected=bias2b) 

1587 assertLookup(detector=3, timespan=Timespan(None, t1), expected=None) 

1588 assertLookup(detector=3, timespan=Timespan(None, t2), expected=bias3a) 

1589 assertLookup(detector=3, timespan=Timespan(None, t3), expected=bias3a) 

1590 assertLookup(detector=3, timespan=Timespan(None, t4), expected=bias3a) 

1591 assertLookup(detector=3, timespan=Timespan(None, t5), expected=bias3a) 

1592 assertLookup(detector=3, timespan=Timespan(None, None), expected=Ambiguous) 

1593 assertLookup(detector=3, timespan=Timespan(t1, t2), expected=bias3a) 

1594 assertLookup(detector=3, timespan=Timespan(t1, t3), expected=bias3a) 

1595 assertLookup(detector=3, timespan=Timespan(t1, t4), expected=bias3a) 

1596 assertLookup(detector=3, timespan=Timespan(t1, t5), expected=bias3a) 

1597 assertLookup(detector=3, timespan=Timespan(t1, None), expected=Ambiguous) 

1598 assertLookup(detector=3, timespan=Timespan(t2, t3), expected=bias3a) 

1599 assertLookup(detector=3, timespan=Timespan(t2, t4), expected=bias3a) 

1600 assertLookup(detector=3, timespan=Timespan(t2, t5), expected=bias3a) 

1601 assertLookup(detector=3, timespan=Timespan(t2, None), expected=Ambiguous) 

1602 assertLookup(detector=3, timespan=Timespan(t3, t4), expected=None) 

1603 assertLookup(detector=3, timespan=Timespan(t3, t5), expected=None) 

1604 assertLookup(detector=3, timespan=Timespan(t3, None), expected=bias3b) 

1605 assertLookup(detector=3, timespan=Timespan(t4, t5), expected=None) 

1606 assertLookup(detector=3, timespan=Timespan(t4, None), expected=bias3b) 

1607 assertLookup(detector=3, timespan=Timespan(t5, None), expected=bias3b) 

1608 

1609 # Decertify everything, this time with explicit data IDs, then check 

1610 # that no lookups succeed. 

1611 registry.decertify( 

1612 collection, "bias", Timespan(None, None), 

1613 dataIds=[ 

1614 dict(instrument="Cam1", detector=2), 

1615 dict(instrument="Cam1", detector=3), 

1616 ] 

1617 ) 

1618 for detector in (2, 3): 

1619 for timespan in allTimespans: 

1620 assertLookup(detector=detector, timespan=timespan, expected=None) 

1621 # Certify bias2a and bias3a over (-∞, ∞), check that all lookups return 

1622 # those. 

1623 registry.certify(collection, [bias2a, bias3a], Timespan(None, None),) 

1624 for timespan in allTimespans: 

1625 assertLookup(detector=2, timespan=timespan, expected=bias2a) 

1626 assertLookup(detector=3, timespan=timespan, expected=bias3a) 

1627 # Decertify just bias2 over [t2, t4). 

1628 # This should split a single certification row into two (and leave the 

1629 # other existing row, for bias3a, alone). 

1630 registry.decertify(collection, "bias", Timespan(t2, t4), 

1631 dataIds=[dict(instrument="Cam1", detector=2)]) 

1632 for timespan in allTimespans: 

1633 assertLookup(detector=3, timespan=timespan, expected=bias3a) 

1634 overlapsBefore = timespan.overlaps(Timespan(None, t2)) 

1635 overlapsAfter = timespan.overlaps(Timespan(t4, None)) 

1636 if overlapsBefore and overlapsAfter: 

1637 expected = Ambiguous 

1638 elif overlapsBefore or overlapsAfter: 

1639 expected = bias2a 

1640 else: 

1641 expected = None 

1642 assertLookup(detector=2, timespan=timespan, expected=expected)