Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = ["RegistryTests"] 

24 

25from abc import ABC, abstractmethod 

26from collections import defaultdict 

27import itertools 

28import os 

29import re 

30import unittest 

31 

32import astropy.time 

33import sqlalchemy 

34from typing import Optional, Type, Union 

35 

36try: 

37 import numpy as np 

38except ImportError: 

39 np = None 

40 

41from ...core import ( 

42 DataCoordinate, 

43 DataCoordinateSequence, 

44 DataCoordinateSet, 

45 DatasetAssociation, 

46 DatasetRef, 

47 DatasetType, 

48 DimensionGraph, 

49 NamedValueSet, 

50 StorageClass, 

51 ddl, 

52 Timespan, 

53) 

54from .._registry import ( 

55 CollectionType, 

56 ConflictingDefinitionError, 

57 InconsistentDataIdError, 

58 Registry, 

59 RegistryConfig, 

60) 

61from ..interfaces import MissingCollectionError, ButlerAttributeExistsError 

62 

63 

64class RegistryTests(ABC): 

65 """Generic tests for the `Registry` class that can be subclassed to 

66 generate tests for different configurations. 

67 """ 

68 

69 collectionsManager: Optional[str] = None 

70 """Name of the collections manager class, if subclass provides value for 

71 this member then it overrides name specified in default configuration 

72 (`str`). 

73 """ 

74 

75 @classmethod 

76 @abstractmethod 

77 def getDataDir(cls) -> str: 

78 """Return the root directory containing test data YAML files. 

79 """ 

80 raise NotImplementedError() 

81 

82 def makeRegistryConfig(self) -> RegistryConfig: 

83 """Create RegistryConfig used to create a registry. 

84 

85 This method should be called by a subclass from `makeRegistry`. 

86 Returned instance will be pre-configured based on the values of class 

87 members, and default-configured for all other parametrs. Subclasses 

88 that need default configuration should just instantiate 

89 `RegistryConfig` directly. 

90 """ 

91 config = RegistryConfig() 

92 if self.collectionsManager: 

93 config["managers"]["collections"] = self.collectionsManager 

94 return config 

95 

96 @abstractmethod 

97 def makeRegistry(self) -> Registry: 

98 """Return the Registry instance to be tested. 

99 """ 

100 raise NotImplementedError() 

101 

102 def loadData(self, registry: Registry, filename: str): 

103 """Load registry test data from ``getDataDir/<filename>``, 

104 which should be a YAML import/export file. 

105 """ 

106 from ...transfers import YamlRepoImportBackend 

107 with open(os.path.join(self.getDataDir(), filename), 'r') as stream: 

108 backend = YamlRepoImportBackend(stream, registry) 

109 backend.register() 

110 backend.load(datastore=None) 

111 

112 def testOpaque(self): 

113 """Tests for `Registry.registerOpaqueTable`, 

114 `Registry.insertOpaqueData`, `Registry.fetchOpaqueData`, and 

115 `Registry.deleteOpaqueData`. 

116 """ 

117 registry = self.makeRegistry() 

118 table = "opaque_table_for_testing" 

119 registry.registerOpaqueTable( 

120 table, 

121 spec=ddl.TableSpec( 

122 fields=[ 

123 ddl.FieldSpec("id", dtype=sqlalchemy.BigInteger, primaryKey=True), 

124 ddl.FieldSpec("name", dtype=sqlalchemy.String, length=16, nullable=False), 

125 ddl.FieldSpec("count", dtype=sqlalchemy.SmallInteger, nullable=True), 

126 ], 

127 ) 

128 ) 

129 rows = [ 

130 {"id": 1, "name": "one", "count": None}, 

131 {"id": 2, "name": "two", "count": 5}, 

132 {"id": 3, "name": "three", "count": 6}, 

133 ] 

134 registry.insertOpaqueData(table, *rows) 

135 self.assertCountEqual(rows, list(registry.fetchOpaqueData(table))) 

136 self.assertEqual(rows[0:1], list(registry.fetchOpaqueData(table, id=1))) 

137 self.assertEqual(rows[1:2], list(registry.fetchOpaqueData(table, name="two"))) 

138 self.assertEqual([], list(registry.fetchOpaqueData(table, id=1, name="two"))) 

139 registry.deleteOpaqueData(table, id=3) 

140 self.assertCountEqual(rows[:2], list(registry.fetchOpaqueData(table))) 

141 registry.deleteOpaqueData(table) 

142 self.assertEqual([], list(registry.fetchOpaqueData(table))) 

143 

144 def testDatasetType(self): 

145 """Tests for `Registry.registerDatasetType` and 

146 `Registry.getDatasetType`. 

147 """ 

148 registry = self.makeRegistry() 

149 # Check valid insert 

150 datasetTypeName = "test" 

151 storageClass = StorageClass("testDatasetType") 

152 registry.storageClasses.registerStorageClass(storageClass) 

153 dimensions = registry.dimensions.extract(("instrument", "visit")) 

154 differentDimensions = registry.dimensions.extract(("instrument", "patch")) 

155 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

156 # Inserting for the first time should return True 

157 self.assertTrue(registry.registerDatasetType(inDatasetType)) 

158 outDatasetType1 = registry.getDatasetType(datasetTypeName) 

159 self.assertEqual(outDatasetType1, inDatasetType) 

160 

161 # Re-inserting should work 

162 self.assertFalse(registry.registerDatasetType(inDatasetType)) 

163 # Except when they are not identical 

164 with self.assertRaises(ConflictingDefinitionError): 

165 nonIdenticalDatasetType = DatasetType(datasetTypeName, differentDimensions, storageClass) 

166 registry.registerDatasetType(nonIdenticalDatasetType) 

167 

168 # Template can be None 

169 datasetTypeName = "testNoneTemplate" 

170 storageClass = StorageClass("testDatasetType2") 

171 registry.storageClasses.registerStorageClass(storageClass) 

172 dimensions = registry.dimensions.extract(("instrument", "visit")) 

173 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

174 registry.registerDatasetType(inDatasetType) 

175 outDatasetType2 = registry.getDatasetType(datasetTypeName) 

176 self.assertEqual(outDatasetType2, inDatasetType) 

177 

178 allTypes = set(registry.queryDatasetTypes()) 

179 self.assertEqual(allTypes, {outDatasetType1, outDatasetType2}) 

180 

181 def testDimensions(self): 

182 """Tests for `Registry.insertDimensionData`, 

183 `Registry.syncDimensionData`, and `Registry.expandDataId`. 

184 """ 

185 registry = self.makeRegistry() 

186 dimensionName = "instrument" 

187 dimension = registry.dimensions[dimensionName] 

188 dimensionValue = {"name": "DummyCam", "visit_max": 10, "exposure_max": 10, "detector_max": 2, 

189 "class_name": "lsst.obs.base.Instrument"} 

190 registry.insertDimensionData(dimensionName, dimensionValue) 

191 # Inserting the same value twice should fail 

192 with self.assertRaises(sqlalchemy.exc.IntegrityError): 

193 registry.insertDimensionData(dimensionName, dimensionValue) 

194 # expandDataId should retrieve the record we just inserted 

195 self.assertEqual( 

196 registry.expandDataId( 

197 instrument="DummyCam", 

198 graph=dimension.graph 

199 ).records[dimensionName].toDict(), 

200 dimensionValue 

201 ) 

202 # expandDataId should raise if there is no record with the given ID. 

203 with self.assertRaises(LookupError): 

204 registry.expandDataId({"instrument": "Unknown"}, graph=dimension.graph) 

205 # band doesn't have a table; insert should fail. 

206 with self.assertRaises(TypeError): 

207 registry.insertDimensionData("band", {"band": "i"}) 

208 dimensionName2 = "physical_filter" 

209 dimension2 = registry.dimensions[dimensionName2] 

210 dimensionValue2 = {"name": "DummyCam_i", "band": "i"} 

211 # Missing required dependency ("instrument") should fail 

212 with self.assertRaises(KeyError): 

213 registry.insertDimensionData(dimensionName2, dimensionValue2) 

214 # Adding required dependency should fix the failure 

215 dimensionValue2["instrument"] = "DummyCam" 

216 registry.insertDimensionData(dimensionName2, dimensionValue2) 

217 # expandDataId should retrieve the record we just inserted. 

218 self.assertEqual( 

219 registry.expandDataId( 

220 instrument="DummyCam", physical_filter="DummyCam_i", 

221 graph=dimension2.graph 

222 ).records[dimensionName2].toDict(), 

223 dimensionValue2 

224 ) 

225 # Use syncDimensionData to insert a new record successfully. 

226 dimensionName3 = "detector" 

227 dimensionValue3 = {"instrument": "DummyCam", "id": 1, "full_name": "one", 

228 "name_in_raft": "zero", "purpose": "SCIENCE"} 

229 self.assertTrue(registry.syncDimensionData(dimensionName3, dimensionValue3)) 

230 # Sync that again. Note that one field ("raft") is NULL, and that 

231 # should be okay. 

232 self.assertFalse(registry.syncDimensionData(dimensionName3, dimensionValue3)) 

233 # Now try that sync with the same primary key but a different value. 

234 # This should fail. 

235 with self.assertRaises(ConflictingDefinitionError): 

236 registry.syncDimensionData( 

237 dimensionName3, 

238 {"instrument": "DummyCam", "id": 1, "full_name": "one", 

239 "name_in_raft": "four", "purpose": "SCIENCE"} 

240 ) 

241 

242 @unittest.skipIf(np is None, "numpy not available.") 

243 def testNumpyDataId(self): 

244 """Test that we can use a numpy int in a dataId.""" 

245 registry = self.makeRegistry() 

246 dimensionEntries = [ 

247 ("instrument", {"instrument": "DummyCam"}), 

248 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}), 

249 # Using an np.int64 here fails unless Records.fromDict is also 

250 # patched to look for numbers.Integral 

251 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}), 

252 ] 

253 for args in dimensionEntries: 

254 registry.insertDimensionData(*args) 

255 

256 # Try a normal integer and something that looks like an int but 

257 # is not. 

258 for visit_id in (42, np.int64(42)): 

259 with self.subTest(visit_id=visit_id, id_type=type(visit_id).__name__): 

260 expanded = registry.expandDataId({"instrument": "DummyCam", "visit": visit_id}) 

261 self.assertEqual(expanded["visit"], int(visit_id)) 

262 self.assertIsInstance(expanded["visit"], int) 

263 

264 def testDataIdRelationships(self): 

265 """Test that `Registry.expandDataId` raises an exception when the given 

266 keys are inconsistent. 

267 """ 

268 registry = self.makeRegistry() 

269 self.loadData(registry, "base.yaml") 

270 # Insert a few more dimension records for the next test. 

271 registry.insertDimensionData( 

272 "exposure", 

273 {"instrument": "Cam1", "id": 1, "obs_id": "one", "physical_filter": "Cam1-G"}, 

274 ) 

275 registry.insertDimensionData( 

276 "exposure", 

277 {"instrument": "Cam1", "id": 2, "obs_id": "two", "physical_filter": "Cam1-G"}, 

278 ) 

279 registry.insertDimensionData( 

280 "visit_system", 

281 {"instrument": "Cam1", "id": 0, "name": "one-to-one"}, 

282 ) 

283 registry.insertDimensionData( 

284 "visit", 

285 {"instrument": "Cam1", "id": 1, "name": "one", "physical_filter": "Cam1-G", "visit_system": 0}, 

286 ) 

287 registry.insertDimensionData( 

288 "visit_definition", 

289 {"instrument": "Cam1", "visit": 1, "exposure": 1, "visit_system": 0}, 

290 ) 

291 with self.assertRaises(InconsistentDataIdError): 

292 registry.expandDataId( 

293 {"instrument": "Cam1", "visit": 1, "exposure": 2}, 

294 ) 

295 

296 def testDataset(self): 

297 """Basic tests for `Registry.insertDatasets`, `Registry.getDataset`, 

298 and `Registry.removeDatasets`. 

299 """ 

300 registry = self.makeRegistry() 

301 self.loadData(registry, "base.yaml") 

302 run = "test" 

303 registry.registerRun(run) 

304 datasetType = registry.getDatasetType("bias") 

305 dataId = {"instrument": "Cam1", "detector": 2} 

306 ref, = registry.insertDatasets(datasetType, dataIds=[dataId], run=run) 

307 outRef = registry.getDataset(ref.id) 

308 self.assertIsNotNone(ref.id) 

309 self.assertEqual(ref, outRef) 

310 with self.assertRaises(ConflictingDefinitionError): 

311 registry.insertDatasets(datasetType, dataIds=[dataId], run=run) 

312 registry.removeDatasets([ref]) 

313 self.assertIsNone(registry.findDataset(datasetType, dataId, collections=[run])) 

314 

315 def testFindDataset(self): 

316 """Tests for `Registry.findDataset`. 

317 """ 

318 registry = self.makeRegistry() 

319 self.loadData(registry, "base.yaml") 

320 run = "test" 

321 datasetType = registry.getDatasetType("bias") 

322 dataId = {"instrument": "Cam1", "detector": 4} 

323 registry.registerRun(run) 

324 inputRef, = registry.insertDatasets(datasetType, dataIds=[dataId], run=run) 

325 outputRef = registry.findDataset(datasetType, dataId, collections=[run]) 

326 self.assertEqual(outputRef, inputRef) 

327 # Check that retrieval with invalid dataId raises 

328 with self.assertRaises(LookupError): 

329 dataId = {"instrument": "Cam1"} # no detector 

330 registry.findDataset(datasetType, dataId, collections=run) 

331 # Check that different dataIds match to different datasets 

332 dataId1 = {"instrument": "Cam1", "detector": 1} 

333 inputRef1, = registry.insertDatasets(datasetType, dataIds=[dataId1], run=run) 

334 dataId2 = {"instrument": "Cam1", "detector": 2} 

335 inputRef2, = registry.insertDatasets(datasetType, dataIds=[dataId2], run=run) 

336 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef1) 

337 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef2) 

338 self.assertNotEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef2) 

339 self.assertNotEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef1) 

340 # Check that requesting a non-existing dataId returns None 

341 nonExistingDataId = {"instrument": "Cam1", "detector": 3} 

342 self.assertIsNone(registry.findDataset(datasetType, nonExistingDataId, collections=run)) 

343 

344 def testDatasetTypeComponentQueries(self): 

345 """Test component options when querying for dataset types. 

346 """ 

347 registry = self.makeRegistry() 

348 self.loadData(registry, "base.yaml") 

349 self.loadData(registry, "datasets.yaml") 

350 # Test querying for dataset types with different inputs. 

351 # First query for all dataset types; components should only be included 

352 # when components=True. 

353 self.assertEqual( 

354 {"bias", "flat"}, 

355 NamedValueSet(registry.queryDatasetTypes()).names 

356 ) 

357 self.assertEqual( 

358 {"bias", "flat"}, 

359 NamedValueSet(registry.queryDatasetTypes(components=False)).names 

360 ) 

361 self.assertLess( 

362 {"bias", "flat", "bias.wcs", "flat.photoCalib"}, 

363 NamedValueSet(registry.queryDatasetTypes(components=True)).names 

364 ) 

365 # Use a pattern that can match either parent or components. Again, 

366 # components are only returned if components=True. 

367 self.assertEqual( 

368 {"bias"}, 

369 NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"))).names 

370 ) 

371 self.assertEqual( 

372 {"bias"}, 

373 NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"), components=False)).names 

374 ) 

375 self.assertLess( 

376 {"bias", "bias.wcs"}, 

377 NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"), components=True)).names 

378 ) 

379 # This pattern matches only a component. In this case we also return 

380 # that component dataset type if components=None. 

381 self.assertEqual( 

382 {"bias.wcs"}, 

383 NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"))).names 

384 ) 

385 self.assertEqual( 

386 set(), 

387 NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"), components=False)).names 

388 ) 

389 self.assertEqual( 

390 {"bias.wcs"}, 

391 NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"), components=True)).names 

392 ) 

393 

394 def testComponentLookups(self): 

395 """Test searching for component datasets via their parents. 

396 """ 

397 registry = self.makeRegistry() 

398 self.loadData(registry, "base.yaml") 

399 self.loadData(registry, "datasets.yaml") 

400 # Test getting the child dataset type (which does still exist in the 

401 # Registry), and check for consistency with 

402 # DatasetRef.makeComponentRef. 

403 collection = "imported_g" 

404 parentType = registry.getDatasetType("bias") 

405 childType = registry.getDatasetType("bias.wcs") 

406 parentRefResolved = registry.findDataset(parentType, collections=collection, 

407 instrument="Cam1", detector=1) 

408 self.assertIsInstance(parentRefResolved, DatasetRef) 

409 self.assertEqual(childType, parentRefResolved.makeComponentRef("wcs").datasetType) 

410 # Search for a single dataset with findDataset. 

411 childRef1 = registry.findDataset("bias.wcs", collections=collection, 

412 dataId=parentRefResolved.dataId) 

413 self.assertEqual(childRef1, parentRefResolved.makeComponentRef("wcs")) 

414 # Search for detector data IDs constrained by component dataset 

415 # existence with queryDataIds. 

416 dataIds = registry.queryDataIds( 

417 ["detector"], 

418 datasets=["bias.wcs"], 

419 collections=collection, 

420 ).toSet() 

421 self.assertEqual( 

422 dataIds, 

423 DataCoordinateSet( 

424 { 

425 DataCoordinate.standardize(instrument="Cam1", detector=d, graph=parentType.dimensions) 

426 for d in (1, 2, 3) 

427 }, 

428 parentType.dimensions, 

429 ) 

430 ) 

431 # Search for multiple datasets of a single type with queryDatasets. 

432 childRefs2 = set(registry.queryDatasets( 

433 "bias.wcs", 

434 collections=collection, 

435 )) 

436 self.assertEqual( 

437 {ref.unresolved() for ref in childRefs2}, 

438 {DatasetRef(childType, dataId) for dataId in dataIds} 

439 ) 

440 

441 def testCollections(self): 

442 """Tests for registry methods that manage collections. 

443 """ 

444 registry = self.makeRegistry() 

445 self.loadData(registry, "base.yaml") 

446 self.loadData(registry, "datasets.yaml") 

447 run1 = "imported_g" 

448 run2 = "imported_r" 

449 # Test setting a collection docstring after it has been created. 

450 registry.setCollectionDocumentation(run1, "doc for run1") 

451 self.assertEqual(registry.getCollectionDocumentation(run1), "doc for run1") 

452 registry.setCollectionDocumentation(run1, None) 

453 self.assertIsNone(registry.getCollectionDocumentation(run1)) 

454 datasetType = "bias" 

455 # Find some datasets via their run's collection. 

456 dataId1 = {"instrument": "Cam1", "detector": 1} 

457 ref1 = registry.findDataset(datasetType, dataId1, collections=run1) 

458 self.assertIsNotNone(ref1) 

459 dataId2 = {"instrument": "Cam1", "detector": 2} 

460 ref2 = registry.findDataset(datasetType, dataId2, collections=run1) 

461 self.assertIsNotNone(ref2) 

462 # Associate those into a new collection,then look for them there. 

463 tag1 = "tag1" 

464 registry.registerCollection(tag1, type=CollectionType.TAGGED, doc="doc for tag1") 

465 self.assertEqual(registry.getCollectionDocumentation(tag1), "doc for tag1") 

466 registry.associate(tag1, [ref1, ref2]) 

467 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1) 

468 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

469 # Disassociate one and verify that we can't it there anymore... 

470 registry.disassociate(tag1, [ref1]) 

471 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=tag1)) 

472 # ...but we can still find ref2 in tag1, and ref1 in the run. 

473 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run1), ref1) 

474 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

475 collections = set(registry.queryCollections()) 

476 self.assertEqual(collections, {run1, run2, tag1}) 

477 # Associate both refs into tag1 again; ref2 is already there, but that 

478 # should be a harmless no-op. 

479 registry.associate(tag1, [ref1, ref2]) 

480 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1) 

481 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

482 # Get a different dataset (from a different run) that has the same 

483 # dataset type and data ID as ref2. 

484 ref2b = registry.findDataset(datasetType, dataId2, collections=run2) 

485 self.assertNotEqual(ref2, ref2b) 

486 # Attempting to associate that into tag1 should be an error. 

487 with self.assertRaises(ConflictingDefinitionError): 

488 registry.associate(tag1, [ref2b]) 

489 # That error shouldn't have messed up what we had before. 

490 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1) 

491 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

492 # Attempt to associate the conflicting dataset again, this time with 

493 # a dataset that isn't in the collection and won't cause a conflict. 

494 # Should also fail without modifying anything. 

495 dataId3 = {"instrument": "Cam1", "detector": 3} 

496 ref3 = registry.findDataset(datasetType, dataId3, collections=run1) 

497 with self.assertRaises(ConflictingDefinitionError): 

498 registry.associate(tag1, [ref3, ref2b]) 

499 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1) 

500 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

501 self.assertIsNone(registry.findDataset(datasetType, dataId3, collections=tag1)) 

502 # Register a chained collection that searches [tag1, run2] 

503 chain1 = "chain1" 

504 registry.registerCollection(chain1, type=CollectionType.CHAINED) 

505 self.assertIs(registry.getCollectionType(chain1), CollectionType.CHAINED) 

506 # Chained collection exists, but has no collections in it. 

507 self.assertFalse(registry.getCollectionChain(chain1)) 

508 # If we query for all collections, we should get the chained collection 

509 # only if we don't ask to flatten it (i.e. yield only its children). 

510 self.assertEqual(set(registry.queryCollections(flattenChains=False)), {tag1, run1, run2, chain1}) 

511 self.assertEqual(set(registry.queryCollections(flattenChains=True)), {tag1, run1, run2}) 

512 # Attempt to set its child collections to something circular; that 

513 # should fail. 

514 with self.assertRaises(ValueError): 

515 registry.setCollectionChain(chain1, [tag1, chain1]) 

516 # Add the child collections. 

517 registry.setCollectionChain(chain1, [tag1, run2]) 

518 self.assertEqual( 

519 list(registry.getCollectionChain(chain1)), 

520 [tag1, run2] 

521 ) 

522 # Searching for dataId1 or dataId2 in the chain should return ref1 and 

523 # ref2, because both are in tag1. 

524 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain1), ref1) 

525 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain1), ref2) 

526 # Now disassociate ref2 from tag1. The search (for bias) with 

527 # dataId2 in chain1 should then: 

528 # 1. not find it in tag1 

529 # 2. find a different dataset in run2 

530 registry.disassociate(tag1, [ref2]) 

531 ref2b = registry.findDataset(datasetType, dataId2, collections=chain1) 

532 self.assertNotEqual(ref2b, ref2) 

533 self.assertEqual(ref2b, registry.findDataset(datasetType, dataId2, collections=run2)) 

534 # Define a new chain so we can test recursive chains. 

535 chain2 = "chain2" 

536 registry.registerCollection(chain2, type=CollectionType.CHAINED) 

537 registry.setCollectionChain(chain2, [run2, chain1]) 

538 # Query for collections matching a regex. 

539 self.assertCountEqual( 

540 list(registry.queryCollections(re.compile("imported_."), flattenChains=False)), 

541 ["imported_r", "imported_g"] 

542 ) 

543 # Query for collections matching a regex or an explicit str. 

544 self.assertCountEqual( 

545 list(registry.queryCollections([re.compile("imported_."), "chain1"], flattenChains=False)), 

546 ["imported_r", "imported_g", "chain1"] 

547 ) 

548 # Search for bias with dataId1 should find it via tag1 in chain2, 

549 # recursing, because is not in run1. 

550 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=run2)) 

551 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain2), ref1) 

552 # Search for bias with dataId2 should find it in run2 (ref2b). 

553 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain2), ref2b) 

554 # Search for a flat that is in run2. That should not be found 

555 # at the front of chain2, because of the restriction to bias 

556 # on run2 there, but it should be found in at the end of chain1. 

557 dataId4 = {"instrument": "Cam1", "detector": 3, "physical_filter": "Cam1-R2"} 

558 ref4 = registry.findDataset("flat", dataId4, collections=run2) 

559 self.assertIsNotNone(ref4) 

560 self.assertEqual(ref4, registry.findDataset("flat", dataId4, collections=chain2)) 

561 # Deleting a collection that's part of a CHAINED collection is not 

562 # allowed, and is exception-safe. 

563 with self.assertRaises(Exception): 

564 registry.removeCollection(run2) 

565 self.assertEqual(registry.getCollectionType(run2), CollectionType.RUN) 

566 with self.assertRaises(Exception): 

567 registry.removeCollection(chain1) 

568 self.assertEqual(registry.getCollectionType(chain1), CollectionType.CHAINED) 

569 # Actually remove chain2, test that it's gone by asking for its type. 

570 registry.removeCollection(chain2) 

571 with self.assertRaises(MissingCollectionError): 

572 registry.getCollectionType(chain2) 

573 # Actually remove run2 and chain1, which should work now. 

574 registry.removeCollection(chain1) 

575 registry.removeCollection(run2) 

576 with self.assertRaises(MissingCollectionError): 

577 registry.getCollectionType(run2) 

578 with self.assertRaises(MissingCollectionError): 

579 registry.getCollectionType(chain1) 

580 # Remove tag1 as well, just to test that we can remove TAGGED 

581 # collections. 

582 registry.removeCollection(tag1) 

583 with self.assertRaises(MissingCollectionError): 

584 registry.getCollectionType(tag1) 

585 

586 def testBasicTransaction(self): 

587 """Test that all operations within a single transaction block are 

588 rolled back if an exception propagates out of the block. 

589 """ 

590 registry = self.makeRegistry() 

591 storageClass = StorageClass("testDatasetType") 

592 registry.storageClasses.registerStorageClass(storageClass) 

593 with registry.transaction(): 

594 registry.insertDimensionData("instrument", {"name": "Cam1", "class_name": "A"}) 

595 with self.assertRaises(ValueError): 

596 with registry.transaction(): 

597 registry.insertDimensionData("instrument", {"name": "Cam2"}) 

598 raise ValueError("Oops, something went wrong") 

599 # Cam1 should exist 

600 self.assertEqual(registry.expandDataId(instrument="Cam1").records["instrument"].class_name, "A") 

601 # But Cam2 and Cam3 should both not exist 

602 with self.assertRaises(LookupError): 

603 registry.expandDataId(instrument="Cam2") 

604 with self.assertRaises(LookupError): 

605 registry.expandDataId(instrument="Cam3") 

606 

607 def testNestedTransaction(self): 

608 """Test that operations within a transaction block are not rolled back 

609 if an exception propagates out of an inner transaction block and is 

610 then caught. 

611 """ 

612 registry = self.makeRegistry() 

613 dimension = registry.dimensions["instrument"] 

614 dataId1 = {"instrument": "DummyCam"} 

615 dataId2 = {"instrument": "DummyCam2"} 

616 checkpointReached = False 

617 with registry.transaction(): 

618 # This should be added and (ultimately) committed. 

619 registry.insertDimensionData(dimension, dataId1) 

620 with self.assertRaises(sqlalchemy.exc.IntegrityError): 

621 with registry.transaction(savepoint=True): 

622 # This does not conflict, and should succeed (but not 

623 # be committed). 

624 registry.insertDimensionData(dimension, dataId2) 

625 checkpointReached = True 

626 # This should conflict and raise, triggerring a rollback 

627 # of the previous insertion within the same transaction 

628 # context, but not the original insertion in the outer 

629 # block. 

630 registry.insertDimensionData(dimension, dataId1) 

631 self.assertTrue(checkpointReached) 

632 self.assertIsNotNone(registry.expandDataId(dataId1, graph=dimension.graph)) 

633 with self.assertRaises(LookupError): 

634 registry.expandDataId(dataId2, graph=dimension.graph) 

635 

636 def testInstrumentDimensions(self): 

637 """Test queries involving only instrument dimensions, with no joins to 

638 skymap.""" 

639 registry = self.makeRegistry() 

640 

641 # need a bunch of dimensions and datasets for test 

642 registry.insertDimensionData( 

643 "instrument", 

644 dict(name="DummyCam", visit_max=25, exposure_max=300, detector_max=6) 

645 ) 

646 registry.insertDimensionData( 

647 "physical_filter", 

648 dict(instrument="DummyCam", name="dummy_r", band="r"), 

649 dict(instrument="DummyCam", name="dummy_i", band="i"), 

650 ) 

651 registry.insertDimensionData( 

652 "detector", 

653 *[dict(instrument="DummyCam", id=i, full_name=str(i)) for i in range(1, 6)] 

654 ) 

655 registry.insertDimensionData( 

656 "visit_system", 

657 dict(instrument="DummyCam", id=1, name="default"), 

658 ) 

659 registry.insertDimensionData( 

660 "visit", 

661 dict(instrument="DummyCam", id=10, name="ten", physical_filter="dummy_i", visit_system=1), 

662 dict(instrument="DummyCam", id=11, name="eleven", physical_filter="dummy_r", visit_system=1), 

663 dict(instrument="DummyCam", id=20, name="twelve", physical_filter="dummy_r", visit_system=1), 

664 ) 

665 registry.insertDimensionData( 

666 "exposure", 

667 dict(instrument="DummyCam", id=100, obs_id="100", physical_filter="dummy_i"), 

668 dict(instrument="DummyCam", id=101, obs_id="101", physical_filter="dummy_i"), 

669 dict(instrument="DummyCam", id=110, obs_id="110", physical_filter="dummy_r"), 

670 dict(instrument="DummyCam", id=111, obs_id="111", physical_filter="dummy_r"), 

671 dict(instrument="DummyCam", id=200, obs_id="200", physical_filter="dummy_r"), 

672 dict(instrument="DummyCam", id=201, obs_id="201", physical_filter="dummy_r"), 

673 ) 

674 registry.insertDimensionData( 

675 "visit_definition", 

676 dict(instrument="DummyCam", exposure=100, visit_system=1, visit=10), 

677 dict(instrument="DummyCam", exposure=101, visit_system=1, visit=10), 

678 dict(instrument="DummyCam", exposure=110, visit_system=1, visit=11), 

679 dict(instrument="DummyCam", exposure=111, visit_system=1, visit=11), 

680 dict(instrument="DummyCam", exposure=200, visit_system=1, visit=20), 

681 dict(instrument="DummyCam", exposure=201, visit_system=1, visit=20), 

682 ) 

683 # dataset types 

684 run1 = "test1_r" 

685 run2 = "test2_r" 

686 tagged2 = "test2_t" 

687 registry.registerRun(run1) 

688 registry.registerRun(run2) 

689 registry.registerCollection(tagged2) 

690 storageClass = StorageClass("testDataset") 

691 registry.storageClasses.registerStorageClass(storageClass) 

692 rawType = DatasetType(name="RAW", 

693 dimensions=registry.dimensions.extract(("instrument", "exposure", "detector")), 

694 storageClass=storageClass) 

695 registry.registerDatasetType(rawType) 

696 calexpType = DatasetType(name="CALEXP", 

697 dimensions=registry.dimensions.extract(("instrument", "visit", "detector")), 

698 storageClass=storageClass) 

699 registry.registerDatasetType(calexpType) 

700 

701 # add pre-existing datasets 

702 for exposure in (100, 101, 110, 111): 

703 for detector in (1, 2, 3): 

704 # note that only 3 of 5 detectors have datasets 

705 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector) 

706 ref, = registry.insertDatasets(rawType, dataIds=[dataId], run=run1) 

707 # exposures 100 and 101 appear in both run1 and tagged2. 

708 # 100 has different datasets in the different collections 

709 # 101 has the same dataset in both collections. 

710 if exposure == 100: 

711 ref, = registry.insertDatasets(rawType, dataIds=[dataId], run=run2) 

712 if exposure in (100, 101): 

713 registry.associate(tagged2, [ref]) 

714 # Add pre-existing datasets to tagged2. 

715 for exposure in (200, 201): 

716 for detector in (3, 4, 5): 

717 # note that only 3 of 5 detectors have datasets 

718 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector) 

719 ref, = registry.insertDatasets(rawType, dataIds=[dataId], run=run2) 

720 registry.associate(tagged2, [ref]) 

721 

722 dimensions = DimensionGraph( 

723 registry.dimensions, 

724 dimensions=(rawType.dimensions.required | calexpType.dimensions.required) 

725 ) 

726 # Test that single dim string works as well as list of str 

727 rows = registry.queryDataIds("visit", datasets=rawType, collections=run1).expanded().toSet() 

728 rowsI = registry.queryDataIds(["visit"], datasets=rawType, collections=run1).expanded().toSet() 

729 self.assertEqual(rows, rowsI) 

730 # with empty expression 

731 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1).expanded().toSet() 

732 self.assertEqual(len(rows), 4*3) # 4 exposures times 3 detectors 

733 for dataId in rows: 

734 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit")) 

735 packer1 = registry.dimensions.makePacker("visit_detector", dataId) 

736 packer2 = registry.dimensions.makePacker("exposure_detector", dataId) 

737 self.assertEqual(packer1.unpack(packer1.pack(dataId)), 

738 DataCoordinate.standardize(dataId, graph=packer1.dimensions)) 

739 self.assertEqual(packer2.unpack(packer2.pack(dataId)), 

740 DataCoordinate.standardize(dataId, graph=packer2.dimensions)) 

741 self.assertNotEqual(packer1.pack(dataId), packer2.pack(dataId)) 

742 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), 

743 (100, 101, 110, 111)) 

744 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 11)) 

745 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3)) 

746 

747 # second collection 

748 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=tagged2).toSet() 

749 self.assertEqual(len(rows), 4*3) # 4 exposures times 3 detectors 

750 for dataId in rows: 

751 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit")) 

752 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), 

753 (100, 101, 200, 201)) 

754 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 20)) 

755 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3, 4, 5)) 

756 

757 # with two input datasets 

758 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=[run1, tagged2]).toSet() 

759 self.assertEqual(len(set(rows)), 6*3) # 6 exposures times 3 detectors; set needed to de-dupe 

760 for dataId in rows: 

761 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit")) 

762 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), 

763 (100, 101, 110, 111, 200, 201)) 

764 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 11, 20)) 

765 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3, 4, 5)) 

766 

767 # limit to single visit 

768 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1, 

769 where="visit = 10", instrument="DummyCam").toSet() 

770 self.assertEqual(len(rows), 2*3) # 2 exposures times 3 detectors 

771 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101)) 

772 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10,)) 

773 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3)) 

774 

775 # more limiting expression, using link names instead of Table.column 

776 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1, 

777 where="visit = 10 and detector > 1 and 'DummyCam'=instrument").toSet() 

778 self.assertEqual(len(rows), 2*2) # 2 exposures times 2 detectors 

779 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101)) 

780 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10,)) 

781 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (2, 3)) 

782 

783 # expression excludes everything 

784 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1, 

785 where="visit > 1000", instrument="DummyCam").toSet() 

786 self.assertEqual(len(rows), 0) 

787 

788 # Selecting by physical_filter, this is not in the dimensions, but it 

789 # is a part of the full expression so it should work too. 

790 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1, 

791 where="physical_filter = 'dummy_r'", instrument="DummyCam").toSet() 

792 self.assertEqual(len(rows), 2*3) # 2 exposures times 3 detectors 

793 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (110, 111)) 

794 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (11,)) 

795 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3)) 

796 

797 def testSkyMapDimensions(self): 

798 """Tests involving only skymap dimensions, no joins to instrument.""" 

799 registry = self.makeRegistry() 

800 

801 # need a bunch of dimensions and datasets for test, we want 

802 # "band" in the test so also have to add physical_filter 

803 # dimensions 

804 registry.insertDimensionData( 

805 "instrument", 

806 dict(instrument="DummyCam") 

807 ) 

808 registry.insertDimensionData( 

809 "physical_filter", 

810 dict(instrument="DummyCam", name="dummy_r", band="r"), 

811 dict(instrument="DummyCam", name="dummy_i", band="i"), 

812 ) 

813 registry.insertDimensionData( 

814 "skymap", 

815 dict(name="DummyMap", hash="sha!".encode("utf8")) 

816 ) 

817 for tract in range(10): 

818 registry.insertDimensionData("tract", dict(skymap="DummyMap", id=tract)) 

819 registry.insertDimensionData( 

820 "patch", 

821 *[dict(skymap="DummyMap", tract=tract, id=patch, cell_x=0, cell_y=0) 

822 for patch in range(10)] 

823 ) 

824 

825 # dataset types 

826 run = "test" 

827 registry.registerRun(run) 

828 storageClass = StorageClass("testDataset") 

829 registry.storageClasses.registerStorageClass(storageClass) 

830 calexpType = DatasetType(name="deepCoadd_calexp", 

831 dimensions=registry.dimensions.extract(("skymap", "tract", "patch", 

832 "band")), 

833 storageClass=storageClass) 

834 registry.registerDatasetType(calexpType) 

835 mergeType = DatasetType(name="deepCoadd_mergeDet", 

836 dimensions=registry.dimensions.extract(("skymap", "tract", "patch")), 

837 storageClass=storageClass) 

838 registry.registerDatasetType(mergeType) 

839 measType = DatasetType(name="deepCoadd_meas", 

840 dimensions=registry.dimensions.extract(("skymap", "tract", "patch", 

841 "band")), 

842 storageClass=storageClass) 

843 registry.registerDatasetType(measType) 

844 

845 dimensions = DimensionGraph( 

846 registry.dimensions, 

847 dimensions=(calexpType.dimensions.required | mergeType.dimensions.required 

848 | measType.dimensions.required) 

849 ) 

850 

851 # add pre-existing datasets 

852 for tract in (1, 3, 5): 

853 for patch in (2, 4, 6, 7): 

854 dataId = dict(skymap="DummyMap", tract=tract, patch=patch) 

855 registry.insertDatasets(mergeType, dataIds=[dataId], run=run) 

856 for aFilter in ("i", "r"): 

857 dataId = dict(skymap="DummyMap", tract=tract, patch=patch, band=aFilter) 

858 registry.insertDatasets(calexpType, dataIds=[dataId], run=run) 

859 

860 # with empty expression 

861 rows = registry.queryDataIds(dimensions, 

862 datasets=[calexpType, mergeType], collections=run).toSet() 

863 self.assertEqual(len(rows), 3*4*2) # 4 tracts x 4 patches x 2 filters 

864 for dataId in rows: 

865 self.assertCountEqual(dataId.keys(), ("skymap", "tract", "patch", "band")) 

866 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 3, 5)) 

867 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 4, 6, 7)) 

868 self.assertCountEqual(set(dataId["band"] for dataId in rows), ("i", "r")) 

869 

870 # limit to 2 tracts and 2 patches 

871 rows = registry.queryDataIds(dimensions, 

872 datasets=[calexpType, mergeType], collections=run, 

873 where="tract IN (1, 5) AND patch IN (2, 7)", skymap="DummyMap").toSet() 

874 self.assertEqual(len(rows), 2*2*2) # 2 tracts x 2 patches x 2 filters 

875 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 5)) 

876 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 7)) 

877 self.assertCountEqual(set(dataId["band"] for dataId in rows), ("i", "r")) 

878 

879 # limit to single filter 

880 rows = registry.queryDataIds(dimensions, 

881 datasets=[calexpType, mergeType], collections=run, 

882 where="band = 'i'").toSet() 

883 self.assertEqual(len(rows), 3*4*1) # 4 tracts x 4 patches x 2 filters 

884 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 3, 5)) 

885 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 4, 6, 7)) 

886 self.assertCountEqual(set(dataId["band"] for dataId in rows), ("i",)) 

887 

888 # expression excludes everything, specifying non-existing skymap is 

889 # not a fatal error, it's operator error 

890 rows = registry.queryDataIds(dimensions, 

891 datasets=[calexpType, mergeType], collections=run, 

892 where="skymap = 'Mars'").toSet() 

893 self.assertEqual(len(rows), 0) 

894 

895 def testSpatialJoin(self): 

896 """Test queries that involve spatial overlap joins. 

897 """ 

898 registry = self.makeRegistry() 

899 self.loadData(registry, "hsc-rc2-subset.yaml") 

900 

901 # Dictionary of spatial DatabaseDimensionElements, keyed by the name of 

902 # the TopologicalFamily they belong to. We'll relate all elements in 

903 # each family to all of the elements in each other family. 

904 families = defaultdict(set) 

905 # Dictionary of {element.name: {dataId: region}}. 

906 regions = {} 

907 for element in registry.dimensions.getDatabaseElements(): 

908 if element.spatial is not None: 

909 families[element.spatial.name].add(element) 

910 regions[element.name] = { 

911 record.dataId: record.region for record in registry.queryDimensionRecords(element) 

912 } 

913 

914 # If this check fails, it's not necessarily a problem - it may just be 

915 # a reasonable change to the default dimension definitions - but the 

916 # test below depends on there being more than one family to do anything 

917 # useful. 

918 self.assertEqual(len(families), 2) 

919 

920 # Overlap DatabaseDimensionElements with each other. 

921 for family1, family2 in itertools.combinations(families, 2): 

922 for element1, element2 in itertools.product(families[family1], families[family2]): 

923 graph = DimensionGraph.union(element1.graph, element2.graph) 

924 # Construct expected set of overlapping data IDs via a 

925 # brute-force comparison of the regions we've already fetched. 

926 expected = { 

927 DataCoordinate.standardize( 

928 {**dataId1.byName(), **dataId2.byName()}, 

929 graph=graph 

930 ) 

931 for (dataId1, region1), (dataId2, region2) 

932 in itertools.product(regions[element1.name].items(), regions[element2.name].items()) 

933 if not region1.isDisjointFrom(region2) 

934 } 

935 self.assertGreater(len(expected), 2, msg="Test that we aren't just comparing empty sets.") 

936 queried = set(registry.queryDataIds(graph)) 

937 self.assertEqual(expected, queried) 

938 

939 # Overlap each DatabaseDimensionElement with the commonSkyPix system. 

940 commonSkyPix = registry.dimensions.commonSkyPix 

941 for elementName, regions in regions.items(): 

942 graph = DimensionGraph.union(registry.dimensions[elementName].graph, commonSkyPix.graph) 

943 expected = set() 

944 for dataId, region in regions.items(): 

945 for begin, end in commonSkyPix.pixelization.envelope(region): 

946 expected.update( 

947 DataCoordinate.standardize( 

948 {commonSkyPix.name: index, **dataId.byName()}, 

949 graph=graph 

950 ) 

951 for index in range(begin, end) 

952 ) 

953 self.assertGreater(len(expected), 2, msg="Test that we aren't just comparing empty sets.") 

954 queried = set(registry.queryDataIds(graph)) 

955 self.assertEqual(expected, queried) 

956 

957 def testAbstractQuery(self): 

958 """Test that we can run a query that just lists the known 

959 bands. This is tricky because band is 

960 backed by a query against physical_filter. 

961 """ 

962 registry = self.makeRegistry() 

963 registry.insertDimensionData("instrument", dict(name="DummyCam")) 

964 registry.insertDimensionData( 

965 "physical_filter", 

966 dict(instrument="DummyCam", name="dummy_i", band="i"), 

967 dict(instrument="DummyCam", name="dummy_i2", band="i"), 

968 dict(instrument="DummyCam", name="dummy_r", band="r"), 

969 ) 

970 rows = registry.queryDataIds(["band"]).toSet() 

971 self.assertCountEqual( 

972 rows, 

973 [DataCoordinate.standardize(band="i", universe=registry.dimensions), 

974 DataCoordinate.standardize(band="r", universe=registry.dimensions)] 

975 ) 

976 

977 def testAttributeManager(self): 

978 """Test basic functionality of attribute manager. 

979 """ 

980 # number of attributes with schema versions in a fresh database, 

981 # 6 managers with 3 records per manager, plus config for dimensions 

982 VERSION_COUNT = 6 * 3 + 1 

983 

984 registry = self.makeRegistry() 

985 attributes = registry._attributes 

986 

987 # check what get() returns for non-existing key 

988 self.assertIsNone(attributes.get("attr")) 

989 self.assertEqual(attributes.get("attr", ""), "") 

990 self.assertEqual(attributes.get("attr", "Value"), "Value") 

991 self.assertEqual(len(list(attributes.items())), VERSION_COUNT) 

992 

993 # cannot store empty key or value 

994 with self.assertRaises(ValueError): 

995 attributes.set("", "value") 

996 with self.assertRaises(ValueError): 

997 attributes.set("attr", "") 

998 

999 # set value of non-existing key 

1000 attributes.set("attr", "value") 

1001 self.assertEqual(len(list(attributes.items())), VERSION_COUNT + 1) 

1002 self.assertEqual(attributes.get("attr"), "value") 

1003 

1004 # update value of existing key 

1005 with self.assertRaises(ButlerAttributeExistsError): 

1006 attributes.set("attr", "value2") 

1007 

1008 attributes.set("attr", "value2", force=True) 

1009 self.assertEqual(len(list(attributes.items())), VERSION_COUNT + 1) 

1010 self.assertEqual(attributes.get("attr"), "value2") 

1011 

1012 # delete existing key 

1013 self.assertTrue(attributes.delete("attr")) 

1014 self.assertEqual(len(list(attributes.items())), VERSION_COUNT) 

1015 

1016 # delete non-existing key 

1017 self.assertFalse(attributes.delete("non-attr")) 

1018 

1019 # store bunch of keys and get the list back 

1020 data = [ 

1021 ("version.core", "1.2.3"), 

1022 ("version.dimensions", "3.2.1"), 

1023 ("config.managers.opaque", "ByNameOpaqueTableStorageManager"), 

1024 ] 

1025 for key, value in data: 

1026 attributes.set(key, value) 

1027 items = dict(attributes.items()) 

1028 for key, value in data: 

1029 self.assertEqual(items[key], value) 

1030 

1031 def testQueryDatasetsDeduplication(self): 

1032 """Test that the findFirst option to queryDatasets selects datasets 

1033 from collections in the order given". 

1034 """ 

1035 registry = self.makeRegistry() 

1036 self.loadData(registry, "base.yaml") 

1037 self.loadData(registry, "datasets.yaml") 

1038 self.assertCountEqual( 

1039 list(registry.queryDatasets("bias", collections=["imported_g", "imported_r"])), 

1040 [ 

1041 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"), 

1042 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g"), 

1043 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g"), 

1044 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r"), 

1045 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r"), 

1046 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

1047 ] 

1048 ) 

1049 self.assertCountEqual( 

1050 list(registry.queryDatasets("bias", collections=["imported_g", "imported_r"], 

1051 findFirst=True)), 

1052 [ 

1053 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"), 

1054 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g"), 

1055 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g"), 

1056 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

1057 ] 

1058 ) 

1059 self.assertCountEqual( 

1060 list(registry.queryDatasets("bias", collections=["imported_r", "imported_g"], 

1061 findFirst=True)), 

1062 [ 

1063 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"), 

1064 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r"), 

1065 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r"), 

1066 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

1067 ] 

1068 ) 

1069 

1070 def testQueryResults(self): 

1071 """Test querying for data IDs and then manipulating the QueryResults 

1072 object returned to perform other queries. 

1073 """ 

1074 registry = self.makeRegistry() 

1075 self.loadData(registry, "base.yaml") 

1076 self.loadData(registry, "datasets.yaml") 

1077 bias = registry.getDatasetType("bias") 

1078 flat = registry.getDatasetType("flat") 

1079 # Obtain expected results from methods other than those we're testing 

1080 # here. That includes: 

1081 # - the dimensions of the data IDs we want to query: 

1082 expectedGraph = DimensionGraph(registry.dimensions, names=["detector", "physical_filter"]) 

1083 # - the dimensions of some other data IDs we'll extract from that: 

1084 expectedSubsetGraph = DimensionGraph(registry.dimensions, names=["detector"]) 

1085 # - the data IDs we expect to obtain from the first queries: 

1086 expectedDataIds = DataCoordinateSet( 

1087 { 

1088 DataCoordinate.standardize(instrument="Cam1", detector=d, physical_filter=p, 

1089 universe=registry.dimensions) 

1090 for d, p in itertools.product({1, 2, 3}, {"Cam1-G", "Cam1-R1", "Cam1-R2"}) 

1091 }, 

1092 graph=expectedGraph, 

1093 hasFull=False, 

1094 hasRecords=False, 

1095 ) 

1096 # - the flat datasets we expect to find from those data IDs, in just 

1097 # one collection (so deduplication is irrelevant): 

1098 expectedFlats = [ 

1099 registry.findDataset(flat, instrument="Cam1", detector=1, physical_filter="Cam1-R1", 

1100 collections="imported_r"), 

1101 registry.findDataset(flat, instrument="Cam1", detector=2, physical_filter="Cam1-R1", 

1102 collections="imported_r"), 

1103 registry.findDataset(flat, instrument="Cam1", detector=3, physical_filter="Cam1-R2", 

1104 collections="imported_r"), 

1105 ] 

1106 # - the data IDs we expect to extract from that: 

1107 expectedSubsetDataIds = expectedDataIds.subset(expectedSubsetGraph) 

1108 # - the bias datasets we expect to find from those data IDs, after we 

1109 # subset-out the physical_filter dimension, both with duplicates: 

1110 expectedAllBiases = [ 

1111 registry.findDataset(bias, instrument="Cam1", detector=1, collections="imported_g"), 

1112 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_g"), 

1113 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_g"), 

1114 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_r"), 

1115 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_r"), 

1116 ] 

1117 # - ...and without duplicates: 

1118 expectedDeduplicatedBiases = [ 

1119 registry.findDataset(bias, instrument="Cam1", detector=1, collections="imported_g"), 

1120 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_r"), 

1121 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_r"), 

1122 ] 

1123 # Test against those expected results, using a "lazy" query for the 

1124 # data IDs (which re-executes that query each time we use it to do 

1125 # something new). 

1126 dataIds = registry.queryDataIds( 

1127 ["detector", "physical_filter"], 

1128 where="detector.purpose = 'SCIENCE'", # this rejects detector=4 

1129 instrument="Cam1", 

1130 ) 

1131 self.assertEqual(dataIds.graph, expectedGraph) 

1132 self.assertEqual(dataIds.toSet(), expectedDataIds) 

1133 self.assertCountEqual( 

1134 list( 

1135 dataIds.findDatasets( 

1136 flat, 

1137 collections=["imported_r"], 

1138 ) 

1139 ), 

1140 expectedFlats, 

1141 ) 

1142 subsetDataIds = dataIds.subset(expectedSubsetGraph, unique=True) 

1143 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph) 

1144 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds) 

1145 self.assertCountEqual( 

1146 list( 

1147 subsetDataIds.findDatasets( 

1148 bias, 

1149 collections=["imported_r", "imported_g"], 

1150 findFirst=False 

1151 ) 

1152 ), 

1153 expectedAllBiases 

1154 ) 

1155 self.assertCountEqual( 

1156 list( 

1157 subsetDataIds.findDatasets( 

1158 bias, 

1159 collections=["imported_r", "imported_g"], 

1160 findFirst=True 

1161 ) 

1162 ), expectedDeduplicatedBiases 

1163 ) 

1164 # Materialize the bias dataset queries (only) by putting the results 

1165 # into temporary tables, then repeat those tests. 

1166 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], 

1167 findFirst=False).materialize() as biases: 

1168 self.assertCountEqual(list(biases), expectedAllBiases) 

1169 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], 

1170 findFirst=True).materialize() as biases: 

1171 self.assertCountEqual(list(biases), expectedDeduplicatedBiases) 

1172 # Materialize the data ID subset query, but not the dataset queries. 

1173 with subsetDataIds.materialize() as subsetDataIds: 

1174 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph) 

1175 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds) 

1176 self.assertCountEqual( 

1177 list( 

1178 subsetDataIds.findDatasets( 

1179 bias, 

1180 collections=["imported_r", "imported_g"], 

1181 findFirst=False 

1182 ) 

1183 ), 

1184 expectedAllBiases 

1185 ) 

1186 self.assertCountEqual( 

1187 list( 

1188 subsetDataIds.findDatasets( 

1189 bias, 

1190 collections=["imported_r", "imported_g"], 

1191 findFirst=True 

1192 ) 

1193 ), expectedDeduplicatedBiases 

1194 ) 

1195 # Materialize the dataset queries, too. 

1196 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], 

1197 findFirst=False).materialize() as biases: 

1198 self.assertCountEqual(list(biases), expectedAllBiases) 

1199 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], 

1200 findFirst=True).materialize() as biases: 

1201 self.assertCountEqual(list(biases), expectedDeduplicatedBiases) 

1202 # Materialize the original query, but none of the follow-up queries. 

1203 with dataIds.materialize() as dataIds: 

1204 self.assertEqual(dataIds.graph, expectedGraph) 

1205 self.assertEqual(dataIds.toSet(), expectedDataIds) 

1206 self.assertCountEqual( 

1207 list( 

1208 dataIds.findDatasets( 

1209 flat, 

1210 collections=["imported_r"], 

1211 ) 

1212 ), 

1213 expectedFlats, 

1214 ) 

1215 subsetDataIds = dataIds.subset(expectedSubsetGraph, unique=True) 

1216 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph) 

1217 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds) 

1218 self.assertCountEqual( 

1219 list( 

1220 subsetDataIds.findDatasets( 

1221 bias, 

1222 collections=["imported_r", "imported_g"], 

1223 findFirst=False 

1224 ) 

1225 ), 

1226 expectedAllBiases 

1227 ) 

1228 self.assertCountEqual( 

1229 list( 

1230 subsetDataIds.findDatasets( 

1231 bias, 

1232 collections=["imported_r", "imported_g"], 

1233 findFirst=True 

1234 ) 

1235 ), expectedDeduplicatedBiases 

1236 ) 

1237 # Materialize just the bias dataset queries. 

1238 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], 

1239 findFirst=False).materialize() as biases: 

1240 self.assertCountEqual(list(biases), expectedAllBiases) 

1241 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], 

1242 findFirst=True).materialize() as biases: 

1243 self.assertCountEqual(list(biases), expectedDeduplicatedBiases) 

1244 # Materialize the subset data ID query, but not the dataset 

1245 # queries. 

1246 with subsetDataIds.materialize() as subsetDataIds: 

1247 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph) 

1248 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds) 

1249 self.assertCountEqual( 

1250 list( 

1251 subsetDataIds.findDatasets( 

1252 bias, 

1253 collections=["imported_r", "imported_g"], 

1254 findFirst=False 

1255 ) 

1256 ), 

1257 expectedAllBiases 

1258 ) 

1259 self.assertCountEqual( 

1260 list( 

1261 subsetDataIds.findDatasets( 

1262 bias, 

1263 collections=["imported_r", "imported_g"], 

1264 findFirst=True 

1265 ) 

1266 ), expectedDeduplicatedBiases 

1267 ) 

1268 # Materialize the bias dataset queries, too, so now we're 

1269 # materializing every single step. 

1270 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], 

1271 findFirst=False).materialize() as biases: 

1272 self.assertCountEqual(list(biases), expectedAllBiases) 

1273 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], 

1274 findFirst=True).materialize() as biases: 

1275 self.assertCountEqual(list(biases), expectedDeduplicatedBiases) 

1276 

1277 def testEmptyDimensionsQueries(self): 

1278 """Test Query and QueryResults objects in the case where there are no 

1279 dimensions. 

1280 """ 

1281 # Set up test data: one dataset type, two runs, one dataset in each. 

1282 registry = self.makeRegistry() 

1283 self.loadData(registry, "base.yaml") 

1284 schema = DatasetType("schema", dimensions=registry.dimensions.empty, storageClass="Catalog") 

1285 registry.registerDatasetType(schema) 

1286 dataId = DataCoordinate.makeEmpty(registry.dimensions) 

1287 run1 = "run1" 

1288 run2 = "run2" 

1289 registry.registerRun(run1) 

1290 registry.registerRun(run2) 

1291 (dataset1,) = registry.insertDatasets(schema, dataIds=[dataId], run=run1) 

1292 (dataset2,) = registry.insertDatasets(schema, dataIds=[dataId], run=run2) 

1293 # Query directly for both of the datasets, and each one, one at a time. 

1294 self.assertCountEqual( 

1295 list(registry.queryDatasets(schema, collections=[run1, run2], findFirst=False)), 

1296 [dataset1, dataset2] 

1297 ) 

1298 self.assertEqual( 

1299 list(registry.queryDatasets(schema, collections=[run1, run2], findFirst=True)), 

1300 [dataset1], 

1301 ) 

1302 self.assertEqual( 

1303 list(registry.queryDatasets(schema, collections=[run2, run1], findFirst=True)), 

1304 [dataset2], 

1305 ) 

1306 # Query for data IDs with no dimensions. 

1307 dataIds = registry.queryDataIds([]) 

1308 self.assertEqual( 

1309 dataIds.toSequence(), 

1310 DataCoordinateSequence([dataId], registry.dimensions.empty) 

1311 ) 

1312 # Use queried data IDs to find the datasets. 

1313 self.assertCountEqual( 

1314 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False)), 

1315 [dataset1, dataset2], 

1316 ) 

1317 self.assertEqual( 

1318 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True)), 

1319 [dataset1], 

1320 ) 

1321 self.assertEqual( 

1322 list(dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True)), 

1323 [dataset2], 

1324 ) 

1325 # Now materialize the data ID query results and repeat those tests. 

1326 with dataIds.materialize() as dataIds: 

1327 self.assertEqual( 

1328 dataIds.toSequence(), 

1329 DataCoordinateSequence([dataId], registry.dimensions.empty) 

1330 ) 

1331 self.assertCountEqual( 

1332 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False)), 

1333 [dataset1, dataset2], 

1334 ) 

1335 self.assertEqual( 

1336 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True)), 

1337 [dataset1], 

1338 ) 

1339 self.assertEqual( 

1340 list(dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True)), 

1341 [dataset2], 

1342 ) 

1343 # Query for non-empty data IDs, then subset that to get the empty one. 

1344 # Repeat the above tests starting from that. 

1345 dataIds = registry.queryDataIds(["instrument"]).subset(registry.dimensions.empty, unique=True) 

1346 self.assertEqual( 

1347 dataIds.toSequence(), 

1348 DataCoordinateSequence([dataId], registry.dimensions.empty) 

1349 ) 

1350 self.assertCountEqual( 

1351 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False)), 

1352 [dataset1, dataset2], 

1353 ) 

1354 self.assertEqual( 

1355 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True)), 

1356 [dataset1], 

1357 ) 

1358 self.assertEqual( 

1359 list(dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True)), 

1360 [dataset2], 

1361 ) 

1362 with dataIds.materialize() as dataIds: 

1363 self.assertEqual( 

1364 dataIds.toSequence(), 

1365 DataCoordinateSequence([dataId], registry.dimensions.empty) 

1366 ) 

1367 self.assertCountEqual( 

1368 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False)), 

1369 [dataset1, dataset2], 

1370 ) 

1371 self.assertEqual( 

1372 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True)), 

1373 [dataset1], 

1374 ) 

1375 self.assertEqual( 

1376 list(dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True)), 

1377 [dataset2], 

1378 ) 

1379 # Query for non-empty data IDs, then materialize, then subset to get 

1380 # the empty one. Repeat again. 

1381 with registry.queryDataIds(["instrument"]).materialize() as nonEmptyDataIds: 

1382 dataIds = nonEmptyDataIds.subset(registry.dimensions.empty, unique=True) 

1383 self.assertEqual( 

1384 dataIds.toSequence(), 

1385 DataCoordinateSequence([dataId], registry.dimensions.empty) 

1386 ) 

1387 self.assertCountEqual( 

1388 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False)), 

1389 [dataset1, dataset2], 

1390 ) 

1391 self.assertEqual( 

1392 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True)), 

1393 [dataset1], 

1394 ) 

1395 self.assertEqual( 

1396 list(dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True)), 

1397 [dataset2], 

1398 ) 

1399 with dataIds.materialize() as dataIds: 

1400 self.assertEqual( 

1401 dataIds.toSequence(), 

1402 DataCoordinateSequence([dataId], registry.dimensions.empty) 

1403 ) 

1404 self.assertCountEqual( 

1405 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False)), 

1406 [dataset1, dataset2], 

1407 ) 

1408 self.assertEqual( 

1409 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True)), 

1410 [dataset1], 

1411 ) 

1412 self.assertEqual( 

1413 list(dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True)), 

1414 [dataset2], 

1415 ) 

1416 

1417 def testCalibrationCollections(self): 

1418 """Test operations on `~CollectionType.CALIBRATION` collections, 

1419 including `Registry.certify`, `Registry.decertify`, and 

1420 `Registry.findDataset`. 

1421 """ 

1422 # Setup - make a Registry, fill it with some datasets in 

1423 # non-calibration collections. 

1424 registry = self.makeRegistry() 

1425 self.loadData(registry, "base.yaml") 

1426 self.loadData(registry, "datasets.yaml") 

1427 # Set up some timestamps. 

1428 t1 = astropy.time.Time('2020-01-01T01:00:00', format="isot", scale="tai") 

1429 t2 = astropy.time.Time('2020-01-01T02:00:00', format="isot", scale="tai") 

1430 t3 = astropy.time.Time('2020-01-01T03:00:00', format="isot", scale="tai") 

1431 t4 = astropy.time.Time('2020-01-01T04:00:00', format="isot", scale="tai") 

1432 t5 = astropy.time.Time('2020-01-01T05:00:00', format="isot", scale="tai") 

1433 allTimespans = [ 

1434 Timespan(a, b) for a, b in itertools.combinations([None, t1, t2, t3, t4, t5, None], r=2) 

1435 ] 

1436 # Get references to some datasets. 

1437 bias2a = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g") 

1438 bias3a = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g") 

1439 bias2b = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r") 

1440 bias3b = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r") 

1441 # Register the main calibration collection we'll be working with. 

1442 collection = "Cam1/calibs/default" 

1443 registry.registerCollection(collection, type=CollectionType.CALIBRATION) 

1444 # Cannot associate into a calibration collection (no timespan). 

1445 with self.assertRaises(TypeError): 

1446 registry.associate(collection, [bias2a]) 

1447 # Certify 2a dataset with [t2, t4) validity. 

1448 registry.certify(collection, [bias2a], Timespan(begin=t2, end=t4)) 

1449 # We should not be able to certify 2b with anything overlapping that 

1450 # window. 

1451 with self.assertRaises(ConflictingDefinitionError): 

1452 registry.certify(collection, [bias2b], Timespan(begin=None, end=t3)) 

1453 with self.assertRaises(ConflictingDefinitionError): 

1454 registry.certify(collection, [bias2b], Timespan(begin=None, end=t5)) 

1455 with self.assertRaises(ConflictingDefinitionError): 

1456 registry.certify(collection, [bias2b], Timespan(begin=t1, end=t3)) 

1457 with self.assertRaises(ConflictingDefinitionError): 

1458 registry.certify(collection, [bias2b], Timespan(begin=t1, end=t5)) 

1459 with self.assertRaises(ConflictingDefinitionError): 

1460 registry.certify(collection, [bias2b], Timespan(begin=t1, end=None)) 

1461 with self.assertRaises(ConflictingDefinitionError): 

1462 registry.certify(collection, [bias2b], Timespan(begin=t2, end=t3)) 

1463 with self.assertRaises(ConflictingDefinitionError): 

1464 registry.certify(collection, [bias2b], Timespan(begin=t2, end=t5)) 

1465 with self.assertRaises(ConflictingDefinitionError): 

1466 registry.certify(collection, [bias2b], Timespan(begin=t2, end=None)) 

1467 # We should be able to certify 3a with a range overlapping that window, 

1468 # because it's for a different detector. 

1469 # We'll certify 3a over [t1, t3). 

1470 registry.certify(collection, [bias3a], Timespan(begin=t1, end=t3)) 

1471 # Now we'll certify 2b and 3b together over [t4, ∞). 

1472 registry.certify(collection, [bias2b, bias3b], Timespan(begin=t4, end=None)) 

1473 

1474 # Fetch all associations and check that they are what we expect. 

1475 self.assertCountEqual( 

1476 list( 

1477 registry.queryDatasetAssociations( 

1478 "bias", 

1479 collections=[collection, "imported_g", "imported_r"], 

1480 ) 

1481 ), 

1482 [ 

1483 DatasetAssociation( 

1484 ref=registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"), 

1485 collection="imported_g", 

1486 timespan=None, 

1487 ), 

1488 DatasetAssociation( 

1489 ref=registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

1490 collection="imported_r", 

1491 timespan=None, 

1492 ), 

1493 DatasetAssociation(ref=bias2a, collection="imported_g", timespan=None), 

1494 DatasetAssociation(ref=bias3a, collection="imported_g", timespan=None), 

1495 DatasetAssociation(ref=bias2b, collection="imported_r", timespan=None), 

1496 DatasetAssociation(ref=bias3b, collection="imported_r", timespan=None), 

1497 DatasetAssociation(ref=bias2a, collection=collection, timespan=Timespan(begin=t2, end=t4)), 

1498 DatasetAssociation(ref=bias3a, collection=collection, timespan=Timespan(begin=t1, end=t3)), 

1499 DatasetAssociation(ref=bias2b, collection=collection, timespan=Timespan(begin=t4, end=None)), 

1500 DatasetAssociation(ref=bias3b, collection=collection, timespan=Timespan(begin=t4, end=None)), 

1501 ] 

1502 ) 

1503 

1504 class Ambiguous: 

1505 """Tag class to denote lookups that are expected to be ambiguous. 

1506 """ 

1507 pass 

1508 

1509 def assertLookup(detector: int, timespan: Timespan, 

1510 expected: Optional[Union[DatasetRef, Type[Ambiguous]]]) -> None: 

1511 """Local function that asserts that a bias lookup returns the given 

1512 expected result. 

1513 """ 

1514 if expected is Ambiguous: 

1515 with self.assertRaises(RuntimeError): 

1516 registry.findDataset("bias", collections=collection, instrument="Cam1", 

1517 detector=detector, timespan=timespan) 

1518 else: 

1519 self.assertEqual( 

1520 expected, 

1521 registry.findDataset("bias", collections=collection, instrument="Cam1", 

1522 detector=detector, timespan=timespan) 

1523 ) 

1524 

1525 # Systematically test lookups against expected results. 

1526 assertLookup(detector=2, timespan=Timespan(None, t1), expected=None) 

1527 assertLookup(detector=2, timespan=Timespan(None, t2), expected=None) 

1528 assertLookup(detector=2, timespan=Timespan(None, t3), expected=bias2a) 

1529 assertLookup(detector=2, timespan=Timespan(None, t4), expected=bias2a) 

1530 assertLookup(detector=2, timespan=Timespan(None, t5), expected=Ambiguous) 

1531 assertLookup(detector=2, timespan=Timespan(None, None), expected=Ambiguous) 

1532 assertLookup(detector=2, timespan=Timespan(t1, t2), expected=None) 

1533 assertLookup(detector=2, timespan=Timespan(t1, t3), expected=bias2a) 

1534 assertLookup(detector=2, timespan=Timespan(t1, t4), expected=bias2a) 

1535 assertLookup(detector=2, timespan=Timespan(t1, t5), expected=Ambiguous) 

1536 assertLookup(detector=2, timespan=Timespan(t1, None), expected=Ambiguous) 

1537 assertLookup(detector=2, timespan=Timespan(t2, t3), expected=bias2a) 

1538 assertLookup(detector=2, timespan=Timespan(t2, t4), expected=bias2a) 

1539 assertLookup(detector=2, timespan=Timespan(t2, t5), expected=Ambiguous) 

1540 assertLookup(detector=2, timespan=Timespan(t2, None), expected=Ambiguous) 

1541 assertLookup(detector=2, timespan=Timespan(t3, t4), expected=bias2a) 

1542 assertLookup(detector=2, timespan=Timespan(t3, t5), expected=Ambiguous) 

1543 assertLookup(detector=2, timespan=Timespan(t3, None), expected=Ambiguous) 

1544 assertLookup(detector=2, timespan=Timespan(t4, t5), expected=bias2b) 

1545 assertLookup(detector=2, timespan=Timespan(t4, None), expected=bias2b) 

1546 assertLookup(detector=2, timespan=Timespan(t5, None), expected=bias2b) 

1547 assertLookup(detector=3, timespan=Timespan(None, t1), expected=None) 

1548 assertLookup(detector=3, timespan=Timespan(None, t2), expected=bias3a) 

1549 assertLookup(detector=3, timespan=Timespan(None, t3), expected=bias3a) 

1550 assertLookup(detector=3, timespan=Timespan(None, t4), expected=bias3a) 

1551 assertLookup(detector=3, timespan=Timespan(None, t5), expected=Ambiguous) 

1552 assertLookup(detector=3, timespan=Timespan(None, None), expected=Ambiguous) 

1553 assertLookup(detector=3, timespan=Timespan(t1, t2), expected=bias3a) 

1554 assertLookup(detector=3, timespan=Timespan(t1, t3), expected=bias3a) 

1555 assertLookup(detector=3, timespan=Timespan(t1, t4), expected=bias3a) 

1556 assertLookup(detector=3, timespan=Timespan(t1, t5), expected=Ambiguous) 

1557 assertLookup(detector=3, timespan=Timespan(t1, None), expected=Ambiguous) 

1558 assertLookup(detector=3, timespan=Timespan(t2, t3), expected=bias3a) 

1559 assertLookup(detector=3, timespan=Timespan(t2, t4), expected=bias3a) 

1560 assertLookup(detector=3, timespan=Timespan(t2, t5), expected=Ambiguous) 

1561 assertLookup(detector=3, timespan=Timespan(t2, None), expected=Ambiguous) 

1562 assertLookup(detector=3, timespan=Timespan(t3, t4), expected=None) 

1563 assertLookup(detector=3, timespan=Timespan(t3, t5), expected=bias3b) 

1564 assertLookup(detector=3, timespan=Timespan(t3, None), expected=bias3b) 

1565 assertLookup(detector=3, timespan=Timespan(t4, t5), expected=bias3b) 

1566 assertLookup(detector=3, timespan=Timespan(t4, None), expected=bias3b) 

1567 assertLookup(detector=3, timespan=Timespan(t5, None), expected=bias3b) 

1568 

1569 # Decertify [t3, t5) for all data IDs, and do test lookups again. 

1570 # This should truncate bias2a to [t2, t3), leave bias3a unchanged at 

1571 # [t1, t3), and truncate bias2b and bias3b to [t5, ∞). 

1572 registry.decertify(collection=collection, datasetType="bias", timespan=Timespan(t3, t5)) 

1573 assertLookup(detector=2, timespan=Timespan(None, t1), expected=None) 

1574 assertLookup(detector=2, timespan=Timespan(None, t2), expected=None) 

1575 assertLookup(detector=2, timespan=Timespan(None, t3), expected=bias2a) 

1576 assertLookup(detector=2, timespan=Timespan(None, t4), expected=bias2a) 

1577 assertLookup(detector=2, timespan=Timespan(None, t5), expected=bias2a) 

1578 assertLookup(detector=2, timespan=Timespan(None, None), expected=Ambiguous) 

1579 assertLookup(detector=2, timespan=Timespan(t1, t2), expected=None) 

1580 assertLookup(detector=2, timespan=Timespan(t1, t3), expected=bias2a) 

1581 assertLookup(detector=2, timespan=Timespan(t1, t4), expected=bias2a) 

1582 assertLookup(detector=2, timespan=Timespan(t1, t5), expected=bias2a) 

1583 assertLookup(detector=2, timespan=Timespan(t1, None), expected=Ambiguous) 

1584 assertLookup(detector=2, timespan=Timespan(t2, t3), expected=bias2a) 

1585 assertLookup(detector=2, timespan=Timespan(t2, t4), expected=bias2a) 

1586 assertLookup(detector=2, timespan=Timespan(t2, t5), expected=bias2a) 

1587 assertLookup(detector=2, timespan=Timespan(t2, None), expected=Ambiguous) 

1588 assertLookup(detector=2, timespan=Timespan(t3, t4), expected=None) 

1589 assertLookup(detector=2, timespan=Timespan(t3, t5), expected=None) 

1590 assertLookup(detector=2, timespan=Timespan(t3, None), expected=bias2b) 

1591 assertLookup(detector=2, timespan=Timespan(t4, t5), expected=None) 

1592 assertLookup(detector=2, timespan=Timespan(t4, None), expected=bias2b) 

1593 assertLookup(detector=2, timespan=Timespan(t5, None), expected=bias2b) 

1594 assertLookup(detector=3, timespan=Timespan(None, t1), expected=None) 

1595 assertLookup(detector=3, timespan=Timespan(None, t2), expected=bias3a) 

1596 assertLookup(detector=3, timespan=Timespan(None, t3), expected=bias3a) 

1597 assertLookup(detector=3, timespan=Timespan(None, t4), expected=bias3a) 

1598 assertLookup(detector=3, timespan=Timespan(None, t5), expected=bias3a) 

1599 assertLookup(detector=3, timespan=Timespan(None, None), expected=Ambiguous) 

1600 assertLookup(detector=3, timespan=Timespan(t1, t2), expected=bias3a) 

1601 assertLookup(detector=3, timespan=Timespan(t1, t3), expected=bias3a) 

1602 assertLookup(detector=3, timespan=Timespan(t1, t4), expected=bias3a) 

1603 assertLookup(detector=3, timespan=Timespan(t1, t5), expected=bias3a) 

1604 assertLookup(detector=3, timespan=Timespan(t1, None), expected=Ambiguous) 

1605 assertLookup(detector=3, timespan=Timespan(t2, t3), expected=bias3a) 

1606 assertLookup(detector=3, timespan=Timespan(t2, t4), expected=bias3a) 

1607 assertLookup(detector=3, timespan=Timespan(t2, t5), expected=bias3a) 

1608 assertLookup(detector=3, timespan=Timespan(t2, None), expected=Ambiguous) 

1609 assertLookup(detector=3, timespan=Timespan(t3, t4), expected=None) 

1610 assertLookup(detector=3, timespan=Timespan(t3, t5), expected=None) 

1611 assertLookup(detector=3, timespan=Timespan(t3, None), expected=bias3b) 

1612 assertLookup(detector=3, timespan=Timespan(t4, t5), expected=None) 

1613 assertLookup(detector=3, timespan=Timespan(t4, None), expected=bias3b) 

1614 assertLookup(detector=3, timespan=Timespan(t5, None), expected=bias3b) 

1615 

1616 # Decertify everything, this time with explicit data IDs, then check 

1617 # that no lookups succeed. 

1618 registry.decertify( 

1619 collection, "bias", Timespan(None, None), 

1620 dataIds=[ 

1621 dict(instrument="Cam1", detector=2), 

1622 dict(instrument="Cam1", detector=3), 

1623 ] 

1624 ) 

1625 for detector in (2, 3): 

1626 for timespan in allTimespans: 

1627 assertLookup(detector=detector, timespan=timespan, expected=None) 

1628 # Certify bias2a and bias3a over (-∞, ∞), check that all lookups return 

1629 # those. 

1630 registry.certify(collection, [bias2a, bias3a], Timespan(None, None),) 

1631 for timespan in allTimespans: 

1632 assertLookup(detector=2, timespan=timespan, expected=bias2a) 

1633 assertLookup(detector=3, timespan=timespan, expected=bias3a) 

1634 # Decertify just bias2 over [t2, t4). 

1635 # This should split a single certification row into two (and leave the 

1636 # other existing row, for bias3a, alone). 

1637 registry.decertify(collection, "bias", Timespan(t2, t4), 

1638 dataIds=[dict(instrument="Cam1", detector=2)]) 

1639 for timespan in allTimespans: 

1640 assertLookup(detector=3, timespan=timespan, expected=bias3a) 

1641 overlapsBefore = timespan.overlaps(Timespan(None, t2)) 

1642 overlapsAfter = timespan.overlaps(Timespan(t4, None)) 

1643 if overlapsBefore and overlapsAfter: 

1644 expected = Ambiguous 

1645 elif overlapsBefore or overlapsAfter: 

1646 expected = bias2a 

1647 else: 

1648 expected = None 

1649 assertLookup(detector=2, timespan=timespan, expected=expected) 

1650 

1651 def testIngestTimeQuery(self): 

1652 

1653 registry = self.makeRegistry() 

1654 self.loadData(registry, "base.yaml") 

1655 self.loadData(registry, "datasets.yaml") 

1656 

1657 datasets = list(registry.queryDatasets(..., collections=...)) 

1658 len0 = len(datasets) 

1659 self.assertGreater(len0, 0) 

1660 

1661 where = "ingest_date > T'2000-01-01'" 

1662 datasets = list(registry.queryDatasets(..., collections=..., where=where)) 

1663 len1 = len(datasets) 

1664 self.assertEqual(len0, len1) 

1665 

1666 # no one will ever use this piece of software in 30 years 

1667 where = "ingest_date > T'2050-01-01'" 

1668 datasets = list(registry.queryDatasets(..., collections=..., where=where)) 

1669 len2 = len(datasets) 

1670 self.assertEqual(len2, 0)