Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = ["RegistryTests"] 

24 

25from abc import ABC, abstractmethod 

26from collections import defaultdict 

27import itertools 

28import logging 

29import os 

30import re 

31import unittest 

32 

33import astropy.time 

34import sqlalchemy 

35from typing import Optional, Type, Union 

36 

37try: 

38 import numpy as np 

39except ImportError: 

40 np = None 

41 

42from ...core import ( 

43 DataCoordinate, 

44 DataCoordinateSequence, 

45 DataCoordinateSet, 

46 DatasetAssociation, 

47 DatasetRef, 

48 DatasetType, 

49 DimensionGraph, 

50 NamedValueSet, 

51 StorageClass, 

52 ddl, 

53 Timespan, 

54) 

55from .._registry import ( 

56 CollectionType, 

57 ConflictingDefinitionError, 

58 InconsistentDataIdError, 

59 Registry, 

60 RegistryConfig, 

61) 

62from ..interfaces import MissingCollectionError, ButlerAttributeExistsError 

63 

64 

65class RegistryTests(ABC): 

66 """Generic tests for the `Registry` class that can be subclassed to 

67 generate tests for different configurations. 

68 """ 

69 

70 collectionsManager: Optional[str] = None 

71 """Name of the collections manager class, if subclass provides value for 

72 this member then it overrides name specified in default configuration 

73 (`str`). 

74 """ 

75 

76 @classmethod 

77 @abstractmethod 

78 def getDataDir(cls) -> str: 

79 """Return the root directory containing test data YAML files. 

80 """ 

81 raise NotImplementedError() 

82 

83 def makeRegistryConfig(self) -> RegistryConfig: 

84 """Create RegistryConfig used to create a registry. 

85 

86 This method should be called by a subclass from `makeRegistry`. 

87 Returned instance will be pre-configured based on the values of class 

88 members, and default-configured for all other parametrs. Subclasses 

89 that need default configuration should just instantiate 

90 `RegistryConfig` directly. 

91 """ 

92 config = RegistryConfig() 

93 if self.collectionsManager: 

94 config["managers"]["collections"] = self.collectionsManager 

95 return config 

96 

97 @abstractmethod 

98 def makeRegistry(self) -> Registry: 

99 """Return the Registry instance to be tested. 

100 """ 

101 raise NotImplementedError() 

102 

103 def loadData(self, registry: Registry, filename: str): 

104 """Load registry test data from ``getDataDir/<filename>``, 

105 which should be a YAML import/export file. 

106 """ 

107 from ...transfers import YamlRepoImportBackend 

108 with open(os.path.join(self.getDataDir(), filename), 'r') as stream: 

109 backend = YamlRepoImportBackend(stream, registry) 

110 backend.register() 

111 backend.load(datastore=None) 

112 

113 def testOpaque(self): 

114 """Tests for `Registry.registerOpaqueTable`, 

115 `Registry.insertOpaqueData`, `Registry.fetchOpaqueData`, and 

116 `Registry.deleteOpaqueData`. 

117 """ 

118 registry = self.makeRegistry() 

119 table = "opaque_table_for_testing" 

120 registry.registerOpaqueTable( 

121 table, 

122 spec=ddl.TableSpec( 

123 fields=[ 

124 ddl.FieldSpec("id", dtype=sqlalchemy.BigInteger, primaryKey=True), 

125 ddl.FieldSpec("name", dtype=sqlalchemy.String, length=16, nullable=False), 

126 ddl.FieldSpec("count", dtype=sqlalchemy.SmallInteger, nullable=True), 

127 ], 

128 ) 

129 ) 

130 rows = [ 

131 {"id": 1, "name": "one", "count": None}, 

132 {"id": 2, "name": "two", "count": 5}, 

133 {"id": 3, "name": "three", "count": 6}, 

134 ] 

135 registry.insertOpaqueData(table, *rows) 

136 self.assertCountEqual(rows, list(registry.fetchOpaqueData(table))) 

137 self.assertEqual(rows[0:1], list(registry.fetchOpaqueData(table, id=1))) 

138 self.assertEqual(rows[1:2], list(registry.fetchOpaqueData(table, name="two"))) 

139 self.assertEqual([], list(registry.fetchOpaqueData(table, id=1, name="two"))) 

140 registry.deleteOpaqueData(table, id=3) 

141 self.assertCountEqual(rows[:2], list(registry.fetchOpaqueData(table))) 

142 registry.deleteOpaqueData(table) 

143 self.assertEqual([], list(registry.fetchOpaqueData(table))) 

144 

145 def testDatasetType(self): 

146 """Tests for `Registry.registerDatasetType` and 

147 `Registry.getDatasetType`. 

148 """ 

149 registry = self.makeRegistry() 

150 # Check valid insert 

151 datasetTypeName = "test" 

152 storageClass = StorageClass("testDatasetType") 

153 registry.storageClasses.registerStorageClass(storageClass) 

154 dimensions = registry.dimensions.extract(("instrument", "visit")) 

155 differentDimensions = registry.dimensions.extract(("instrument", "patch")) 

156 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

157 # Inserting for the first time should return True 

158 self.assertTrue(registry.registerDatasetType(inDatasetType)) 

159 outDatasetType1 = registry.getDatasetType(datasetTypeName) 

160 self.assertEqual(outDatasetType1, inDatasetType) 

161 

162 # Re-inserting should work 

163 self.assertFalse(registry.registerDatasetType(inDatasetType)) 

164 # Except when they are not identical 

165 with self.assertRaises(ConflictingDefinitionError): 

166 nonIdenticalDatasetType = DatasetType(datasetTypeName, differentDimensions, storageClass) 

167 registry.registerDatasetType(nonIdenticalDatasetType) 

168 

169 # Template can be None 

170 datasetTypeName = "testNoneTemplate" 

171 storageClass = StorageClass("testDatasetType2") 

172 registry.storageClasses.registerStorageClass(storageClass) 

173 dimensions = registry.dimensions.extract(("instrument", "visit")) 

174 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

175 registry.registerDatasetType(inDatasetType) 

176 outDatasetType2 = registry.getDatasetType(datasetTypeName) 

177 self.assertEqual(outDatasetType2, inDatasetType) 

178 

179 allTypes = set(registry.queryDatasetTypes()) 

180 self.assertEqual(allTypes, {outDatasetType1, outDatasetType2}) 

181 

182 def testDimensions(self): 

183 """Tests for `Registry.insertDimensionData`, 

184 `Registry.syncDimensionData`, and `Registry.expandDataId`. 

185 """ 

186 registry = self.makeRegistry() 

187 dimensionName = "instrument" 

188 dimension = registry.dimensions[dimensionName] 

189 dimensionValue = {"name": "DummyCam", "visit_max": 10, "exposure_max": 10, "detector_max": 2, 

190 "class_name": "lsst.obs.base.Instrument"} 

191 registry.insertDimensionData(dimensionName, dimensionValue) 

192 # Inserting the same value twice should fail 

193 with self.assertRaises(sqlalchemy.exc.IntegrityError): 

194 registry.insertDimensionData(dimensionName, dimensionValue) 

195 # expandDataId should retrieve the record we just inserted 

196 self.assertEqual( 

197 registry.expandDataId( 

198 instrument="DummyCam", 

199 graph=dimension.graph 

200 ).records[dimensionName].toDict(), 

201 dimensionValue 

202 ) 

203 # expandDataId should raise if there is no record with the given ID. 

204 with self.assertRaises(LookupError): 

205 registry.expandDataId({"instrument": "Unknown"}, graph=dimension.graph) 

206 # band doesn't have a table; insert should fail. 

207 with self.assertRaises(TypeError): 

208 registry.insertDimensionData("band", {"band": "i"}) 

209 dimensionName2 = "physical_filter" 

210 dimension2 = registry.dimensions[dimensionName2] 

211 dimensionValue2 = {"name": "DummyCam_i", "band": "i"} 

212 # Missing required dependency ("instrument") should fail 

213 with self.assertRaises(KeyError): 

214 registry.insertDimensionData(dimensionName2, dimensionValue2) 

215 # Adding required dependency should fix the failure 

216 dimensionValue2["instrument"] = "DummyCam" 

217 registry.insertDimensionData(dimensionName2, dimensionValue2) 

218 # expandDataId should retrieve the record we just inserted. 

219 self.assertEqual( 

220 registry.expandDataId( 

221 instrument="DummyCam", physical_filter="DummyCam_i", 

222 graph=dimension2.graph 

223 ).records[dimensionName2].toDict(), 

224 dimensionValue2 

225 ) 

226 # Use syncDimensionData to insert a new record successfully. 

227 dimensionName3 = "detector" 

228 dimensionValue3 = {"instrument": "DummyCam", "id": 1, "full_name": "one", 

229 "name_in_raft": "zero", "purpose": "SCIENCE"} 

230 self.assertTrue(registry.syncDimensionData(dimensionName3, dimensionValue3)) 

231 # Sync that again. Note that one field ("raft") is NULL, and that 

232 # should be okay. 

233 self.assertFalse(registry.syncDimensionData(dimensionName3, dimensionValue3)) 

234 # Now try that sync with the same primary key but a different value. 

235 # This should fail. 

236 with self.assertRaises(ConflictingDefinitionError): 

237 registry.syncDimensionData( 

238 dimensionName3, 

239 {"instrument": "DummyCam", "id": 1, "full_name": "one", 

240 "name_in_raft": "four", "purpose": "SCIENCE"} 

241 ) 

242 

243 @unittest.skipIf(np is None, "numpy not available.") 

244 def testNumpyDataId(self): 

245 """Test that we can use a numpy int in a dataId.""" 

246 registry = self.makeRegistry() 

247 dimensionEntries = [ 

248 ("instrument", {"instrument": "DummyCam"}), 

249 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}), 

250 # Using an np.int64 here fails unless Records.fromDict is also 

251 # patched to look for numbers.Integral 

252 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}), 

253 ] 

254 for args in dimensionEntries: 

255 registry.insertDimensionData(*args) 

256 

257 # Try a normal integer and something that looks like an int but 

258 # is not. 

259 for visit_id in (42, np.int64(42)): 

260 with self.subTest(visit_id=visit_id, id_type=type(visit_id).__name__): 

261 expanded = registry.expandDataId({"instrument": "DummyCam", "visit": visit_id}) 

262 self.assertEqual(expanded["visit"], int(visit_id)) 

263 self.assertIsInstance(expanded["visit"], int) 

264 

265 def testDataIdRelationships(self): 

266 """Test that `Registry.expandDataId` raises an exception when the given 

267 keys are inconsistent. 

268 """ 

269 registry = self.makeRegistry() 

270 self.loadData(registry, "base.yaml") 

271 # Insert a few more dimension records for the next test. 

272 registry.insertDimensionData( 

273 "exposure", 

274 {"instrument": "Cam1", "id": 1, "obs_id": "one", "physical_filter": "Cam1-G"}, 

275 ) 

276 registry.insertDimensionData( 

277 "exposure", 

278 {"instrument": "Cam1", "id": 2, "obs_id": "two", "physical_filter": "Cam1-G"}, 

279 ) 

280 registry.insertDimensionData( 

281 "visit_system", 

282 {"instrument": "Cam1", "id": 0, "name": "one-to-one"}, 

283 ) 

284 registry.insertDimensionData( 

285 "visit", 

286 {"instrument": "Cam1", "id": 1, "name": "one", "physical_filter": "Cam1-G", "visit_system": 0}, 

287 ) 

288 registry.insertDimensionData( 

289 "visit_definition", 

290 {"instrument": "Cam1", "visit": 1, "exposure": 1, "visit_system": 0}, 

291 ) 

292 with self.assertRaises(InconsistentDataIdError): 

293 registry.expandDataId( 

294 {"instrument": "Cam1", "visit": 1, "exposure": 2}, 

295 ) 

296 

297 def testDataset(self): 

298 """Basic tests for `Registry.insertDatasets`, `Registry.getDataset`, 

299 and `Registry.removeDatasets`. 

300 """ 

301 registry = self.makeRegistry() 

302 self.loadData(registry, "base.yaml") 

303 run = "test" 

304 registry.registerRun(run) 

305 datasetType = registry.getDatasetType("bias") 

306 dataId = {"instrument": "Cam1", "detector": 2} 

307 ref, = registry.insertDatasets(datasetType, dataIds=[dataId], run=run) 

308 outRef = registry.getDataset(ref.id) 

309 self.assertIsNotNone(ref.id) 

310 self.assertEqual(ref, outRef) 

311 with self.assertRaises(ConflictingDefinitionError): 

312 registry.insertDatasets(datasetType, dataIds=[dataId], run=run) 

313 registry.removeDatasets([ref]) 

314 self.assertIsNone(registry.findDataset(datasetType, dataId, collections=[run])) 

315 

316 def testFindDataset(self): 

317 """Tests for `Registry.findDataset`. 

318 """ 

319 registry = self.makeRegistry() 

320 self.loadData(registry, "base.yaml") 

321 run = "test" 

322 datasetType = registry.getDatasetType("bias") 

323 dataId = {"instrument": "Cam1", "detector": 4} 

324 registry.registerRun(run) 

325 inputRef, = registry.insertDatasets(datasetType, dataIds=[dataId], run=run) 

326 outputRef = registry.findDataset(datasetType, dataId, collections=[run]) 

327 self.assertEqual(outputRef, inputRef) 

328 # Check that retrieval with invalid dataId raises 

329 with self.assertRaises(LookupError): 

330 dataId = {"instrument": "Cam1"} # no detector 

331 registry.findDataset(datasetType, dataId, collections=run) 

332 # Check that different dataIds match to different datasets 

333 dataId1 = {"instrument": "Cam1", "detector": 1} 

334 inputRef1, = registry.insertDatasets(datasetType, dataIds=[dataId1], run=run) 

335 dataId2 = {"instrument": "Cam1", "detector": 2} 

336 inputRef2, = registry.insertDatasets(datasetType, dataIds=[dataId2], run=run) 

337 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef1) 

338 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef2) 

339 self.assertNotEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef2) 

340 self.assertNotEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef1) 

341 # Check that requesting a non-existing dataId returns None 

342 nonExistingDataId = {"instrument": "Cam1", "detector": 3} 

343 self.assertIsNone(registry.findDataset(datasetType, nonExistingDataId, collections=run)) 

344 

345 def testDatasetTypeComponentQueries(self): 

346 """Test component options when querying for dataset types. 

347 """ 

348 registry = self.makeRegistry() 

349 self.loadData(registry, "base.yaml") 

350 self.loadData(registry, "datasets.yaml") 

351 # Test querying for dataset types with different inputs. 

352 # First query for all dataset types; components should only be included 

353 # when components=True. 

354 self.assertEqual( 

355 {"bias", "flat"}, 

356 NamedValueSet(registry.queryDatasetTypes()).names 

357 ) 

358 self.assertEqual( 

359 {"bias", "flat"}, 

360 NamedValueSet(registry.queryDatasetTypes(components=False)).names 

361 ) 

362 self.assertLess( 

363 {"bias", "flat", "bias.wcs", "flat.photoCalib"}, 

364 NamedValueSet(registry.queryDatasetTypes(components=True)).names 

365 ) 

366 # Use a pattern that can match either parent or components. Again, 

367 # components are only returned if components=True. 

368 self.assertEqual( 

369 {"bias"}, 

370 NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"))).names 

371 ) 

372 self.assertEqual( 

373 {"bias"}, 

374 NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"), components=False)).names 

375 ) 

376 self.assertLess( 

377 {"bias", "bias.wcs"}, 

378 NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"), components=True)).names 

379 ) 

380 # This pattern matches only a component. In this case we also return 

381 # that component dataset type if components=None. 

382 self.assertEqual( 

383 {"bias.wcs"}, 

384 NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"))).names 

385 ) 

386 self.assertEqual( 

387 set(), 

388 NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"), components=False)).names 

389 ) 

390 self.assertEqual( 

391 {"bias.wcs"}, 

392 NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"), components=True)).names 

393 ) 

394 # Add a dataset type using a StorageClass that we'll then remove; check 

395 # that this does not affect our ability to query for dataset types 

396 # (though it will warn). 

397 tempStorageClass = StorageClass( 

398 name="TempStorageClass", 

399 components={"data", registry.storageClasses.getStorageClass("StructuredDataDict")} 

400 ) 

401 registry.storageClasses.registerStorageClass(tempStorageClass) 

402 datasetType = DatasetType("temporary", dimensions=["instrument"], storageClass=tempStorageClass, 

403 universe=registry.dimensions) 

404 registry.registerDatasetType(datasetType) 

405 registry.storageClasses._unregisterStorageClass(tempStorageClass.name) 

406 datasetType._storageClass = None 

407 del tempStorageClass 

408 # Querying for all dataset types, including components, should include 

409 # at least all non-component dataset types (and I don't want to 

410 # enumerate all of the Exposure components for bias and flat here). 

411 with self.assertLogs("lsst.daf.butler.registry._registry", logging.WARN) as cm: 

412 everything = NamedValueSet(registry.queryDatasetTypes(components=True)) 

413 self.assertIn("TempStorageClass", cm.output[0]) 

414 self.assertLess({"bias", "flat", "temporary"}, everything.names) 

415 # It should not include "temporary.columns", because we tried to remove 

416 # the storage class that would tell it about that. So if the next line 

417 # fails (i.e. "temporary.columns" _is_ in everything.names), it means 

418 # this part of the test isn't doing anything, because the _unregister 

419 # call about isn't simulating the real-life case we want it to 

420 # simulate, in which different versions of daf_butler in entirely 

421 # different Python processes interact with the same repo. 

422 self.assertNotIn("temporary.data", everything.names) 

423 # Query for dataset types that start with "temp". This should again 

424 # not include the component, and also not fail. 

425 with self.assertLogs("lsst.daf.butler.registry._registry", logging.WARN) as cm: 

426 startsWithTemp = NamedValueSet(registry.queryDatasetTypes(re.compile("temp.*"))) 

427 self.assertIn("TempStorageClass", cm.output[0]) 

428 self.assertEqual({"temporary"}, startsWithTemp.names) 

429 

430 def testComponentLookups(self): 

431 """Test searching for component datasets via their parents. 

432 """ 

433 registry = self.makeRegistry() 

434 self.loadData(registry, "base.yaml") 

435 self.loadData(registry, "datasets.yaml") 

436 # Test getting the child dataset type (which does still exist in the 

437 # Registry), and check for consistency with 

438 # DatasetRef.makeComponentRef. 

439 collection = "imported_g" 

440 parentType = registry.getDatasetType("bias") 

441 childType = registry.getDatasetType("bias.wcs") 

442 parentRefResolved = registry.findDataset(parentType, collections=collection, 

443 instrument="Cam1", detector=1) 

444 self.assertIsInstance(parentRefResolved, DatasetRef) 

445 self.assertEqual(childType, parentRefResolved.makeComponentRef("wcs").datasetType) 

446 # Search for a single dataset with findDataset. 

447 childRef1 = registry.findDataset("bias.wcs", collections=collection, 

448 dataId=parentRefResolved.dataId) 

449 self.assertEqual(childRef1, parentRefResolved.makeComponentRef("wcs")) 

450 # Search for detector data IDs constrained by component dataset 

451 # existence with queryDataIds. 

452 dataIds = registry.queryDataIds( 

453 ["detector"], 

454 datasets=["bias.wcs"], 

455 collections=collection, 

456 ).toSet() 

457 self.assertEqual( 

458 dataIds, 

459 DataCoordinateSet( 

460 { 

461 DataCoordinate.standardize(instrument="Cam1", detector=d, graph=parentType.dimensions) 

462 for d in (1, 2, 3) 

463 }, 

464 parentType.dimensions, 

465 ) 

466 ) 

467 # Search for multiple datasets of a single type with queryDatasets. 

468 childRefs2 = set(registry.queryDatasets( 

469 "bias.wcs", 

470 collections=collection, 

471 )) 

472 self.assertEqual( 

473 {ref.unresolved() for ref in childRefs2}, 

474 {DatasetRef(childType, dataId) for dataId in dataIds} 

475 ) 

476 

477 def testCollections(self): 

478 """Tests for registry methods that manage collections. 

479 """ 

480 registry = self.makeRegistry() 

481 self.loadData(registry, "base.yaml") 

482 self.loadData(registry, "datasets.yaml") 

483 run1 = "imported_g" 

484 run2 = "imported_r" 

485 # Test setting a collection docstring after it has been created. 

486 registry.setCollectionDocumentation(run1, "doc for run1") 

487 self.assertEqual(registry.getCollectionDocumentation(run1), "doc for run1") 

488 registry.setCollectionDocumentation(run1, None) 

489 self.assertIsNone(registry.getCollectionDocumentation(run1)) 

490 datasetType = "bias" 

491 # Find some datasets via their run's collection. 

492 dataId1 = {"instrument": "Cam1", "detector": 1} 

493 ref1 = registry.findDataset(datasetType, dataId1, collections=run1) 

494 self.assertIsNotNone(ref1) 

495 dataId2 = {"instrument": "Cam1", "detector": 2} 

496 ref2 = registry.findDataset(datasetType, dataId2, collections=run1) 

497 self.assertIsNotNone(ref2) 

498 # Associate those into a new collection,then look for them there. 

499 tag1 = "tag1" 

500 registry.registerCollection(tag1, type=CollectionType.TAGGED, doc="doc for tag1") 

501 self.assertEqual(registry.getCollectionDocumentation(tag1), "doc for tag1") 

502 registry.associate(tag1, [ref1, ref2]) 

503 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1) 

504 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

505 # Disassociate one and verify that we can't it there anymore... 

506 registry.disassociate(tag1, [ref1]) 

507 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=tag1)) 

508 # ...but we can still find ref2 in tag1, and ref1 in the run. 

509 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run1), ref1) 

510 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

511 collections = set(registry.queryCollections()) 

512 self.assertEqual(collections, {run1, run2, tag1}) 

513 # Associate both refs into tag1 again; ref2 is already there, but that 

514 # should be a harmless no-op. 

515 registry.associate(tag1, [ref1, ref2]) 

516 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1) 

517 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

518 # Get a different dataset (from a different run) that has the same 

519 # dataset type and data ID as ref2. 

520 ref2b = registry.findDataset(datasetType, dataId2, collections=run2) 

521 self.assertNotEqual(ref2, ref2b) 

522 # Attempting to associate that into tag1 should be an error. 

523 with self.assertRaises(ConflictingDefinitionError): 

524 registry.associate(tag1, [ref2b]) 

525 # That error shouldn't have messed up what we had before. 

526 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1) 

527 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

528 # Attempt to associate the conflicting dataset again, this time with 

529 # a dataset that isn't in the collection and won't cause a conflict. 

530 # Should also fail without modifying anything. 

531 dataId3 = {"instrument": "Cam1", "detector": 3} 

532 ref3 = registry.findDataset(datasetType, dataId3, collections=run1) 

533 with self.assertRaises(ConflictingDefinitionError): 

534 registry.associate(tag1, [ref3, ref2b]) 

535 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1) 

536 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

537 self.assertIsNone(registry.findDataset(datasetType, dataId3, collections=tag1)) 

538 # Register a chained collection that searches [tag1, run2] 

539 chain1 = "chain1" 

540 registry.registerCollection(chain1, type=CollectionType.CHAINED) 

541 self.assertIs(registry.getCollectionType(chain1), CollectionType.CHAINED) 

542 # Chained collection exists, but has no collections in it. 

543 self.assertFalse(registry.getCollectionChain(chain1)) 

544 # If we query for all collections, we should get the chained collection 

545 # only if we don't ask to flatten it (i.e. yield only its children). 

546 self.assertEqual(set(registry.queryCollections(flattenChains=False)), {tag1, run1, run2, chain1}) 

547 self.assertEqual(set(registry.queryCollections(flattenChains=True)), {tag1, run1, run2}) 

548 # Attempt to set its child collections to something circular; that 

549 # should fail. 

550 with self.assertRaises(ValueError): 

551 registry.setCollectionChain(chain1, [tag1, chain1]) 

552 # Add the child collections. 

553 registry.setCollectionChain(chain1, [tag1, run2]) 

554 self.assertEqual( 

555 list(registry.getCollectionChain(chain1)), 

556 [tag1, run2] 

557 ) 

558 # Searching for dataId1 or dataId2 in the chain should return ref1 and 

559 # ref2, because both are in tag1. 

560 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain1), ref1) 

561 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain1), ref2) 

562 # Now disassociate ref2 from tag1. The search (for bias) with 

563 # dataId2 in chain1 should then: 

564 # 1. not find it in tag1 

565 # 2. find a different dataset in run2 

566 registry.disassociate(tag1, [ref2]) 

567 ref2b = registry.findDataset(datasetType, dataId2, collections=chain1) 

568 self.assertNotEqual(ref2b, ref2) 

569 self.assertEqual(ref2b, registry.findDataset(datasetType, dataId2, collections=run2)) 

570 # Define a new chain so we can test recursive chains. 

571 chain2 = "chain2" 

572 registry.registerCollection(chain2, type=CollectionType.CHAINED) 

573 registry.setCollectionChain(chain2, [run2, chain1]) 

574 # Query for collections matching a regex. 

575 self.assertCountEqual( 

576 list(registry.queryCollections(re.compile("imported_."), flattenChains=False)), 

577 ["imported_r", "imported_g"] 

578 ) 

579 # Query for collections matching a regex or an explicit str. 

580 self.assertCountEqual( 

581 list(registry.queryCollections([re.compile("imported_."), "chain1"], flattenChains=False)), 

582 ["imported_r", "imported_g", "chain1"] 

583 ) 

584 # Search for bias with dataId1 should find it via tag1 in chain2, 

585 # recursing, because is not in run1. 

586 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=run2)) 

587 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain2), ref1) 

588 # Search for bias with dataId2 should find it in run2 (ref2b). 

589 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain2), ref2b) 

590 # Search for a flat that is in run2. That should not be found 

591 # at the front of chain2, because of the restriction to bias 

592 # on run2 there, but it should be found in at the end of chain1. 

593 dataId4 = {"instrument": "Cam1", "detector": 3, "physical_filter": "Cam1-R2"} 

594 ref4 = registry.findDataset("flat", dataId4, collections=run2) 

595 self.assertIsNotNone(ref4) 

596 self.assertEqual(ref4, registry.findDataset("flat", dataId4, collections=chain2)) 

597 # Deleting a collection that's part of a CHAINED collection is not 

598 # allowed, and is exception-safe. 

599 with self.assertRaises(Exception): 

600 registry.removeCollection(run2) 

601 self.assertEqual(registry.getCollectionType(run2), CollectionType.RUN) 

602 with self.assertRaises(Exception): 

603 registry.removeCollection(chain1) 

604 self.assertEqual(registry.getCollectionType(chain1), CollectionType.CHAINED) 

605 # Actually remove chain2, test that it's gone by asking for its type. 

606 registry.removeCollection(chain2) 

607 with self.assertRaises(MissingCollectionError): 

608 registry.getCollectionType(chain2) 

609 # Actually remove run2 and chain1, which should work now. 

610 registry.removeCollection(chain1) 

611 registry.removeCollection(run2) 

612 with self.assertRaises(MissingCollectionError): 

613 registry.getCollectionType(run2) 

614 with self.assertRaises(MissingCollectionError): 

615 registry.getCollectionType(chain1) 

616 # Remove tag1 as well, just to test that we can remove TAGGED 

617 # collections. 

618 registry.removeCollection(tag1) 

619 with self.assertRaises(MissingCollectionError): 

620 registry.getCollectionType(tag1) 

621 

622 def testBasicTransaction(self): 

623 """Test that all operations within a single transaction block are 

624 rolled back if an exception propagates out of the block. 

625 """ 

626 registry = self.makeRegistry() 

627 storageClass = StorageClass("testDatasetType") 

628 registry.storageClasses.registerStorageClass(storageClass) 

629 with registry.transaction(): 

630 registry.insertDimensionData("instrument", {"name": "Cam1", "class_name": "A"}) 

631 with self.assertRaises(ValueError): 

632 with registry.transaction(): 

633 registry.insertDimensionData("instrument", {"name": "Cam2"}) 

634 raise ValueError("Oops, something went wrong") 

635 # Cam1 should exist 

636 self.assertEqual(registry.expandDataId(instrument="Cam1").records["instrument"].class_name, "A") 

637 # But Cam2 and Cam3 should both not exist 

638 with self.assertRaises(LookupError): 

639 registry.expandDataId(instrument="Cam2") 

640 with self.assertRaises(LookupError): 

641 registry.expandDataId(instrument="Cam3") 

642 

643 def testNestedTransaction(self): 

644 """Test that operations within a transaction block are not rolled back 

645 if an exception propagates out of an inner transaction block and is 

646 then caught. 

647 """ 

648 registry = self.makeRegistry() 

649 dimension = registry.dimensions["instrument"] 

650 dataId1 = {"instrument": "DummyCam"} 

651 dataId2 = {"instrument": "DummyCam2"} 

652 checkpointReached = False 

653 with registry.transaction(): 

654 # This should be added and (ultimately) committed. 

655 registry.insertDimensionData(dimension, dataId1) 

656 with self.assertRaises(sqlalchemy.exc.IntegrityError): 

657 with registry.transaction(savepoint=True): 

658 # This does not conflict, and should succeed (but not 

659 # be committed). 

660 registry.insertDimensionData(dimension, dataId2) 

661 checkpointReached = True 

662 # This should conflict and raise, triggerring a rollback 

663 # of the previous insertion within the same transaction 

664 # context, but not the original insertion in the outer 

665 # block. 

666 registry.insertDimensionData(dimension, dataId1) 

667 self.assertTrue(checkpointReached) 

668 self.assertIsNotNone(registry.expandDataId(dataId1, graph=dimension.graph)) 

669 with self.assertRaises(LookupError): 

670 registry.expandDataId(dataId2, graph=dimension.graph) 

671 

672 def testInstrumentDimensions(self): 

673 """Test queries involving only instrument dimensions, with no joins to 

674 skymap.""" 

675 registry = self.makeRegistry() 

676 

677 # need a bunch of dimensions and datasets for test 

678 registry.insertDimensionData( 

679 "instrument", 

680 dict(name="DummyCam", visit_max=25, exposure_max=300, detector_max=6) 

681 ) 

682 registry.insertDimensionData( 

683 "physical_filter", 

684 dict(instrument="DummyCam", name="dummy_r", band="r"), 

685 dict(instrument="DummyCam", name="dummy_i", band="i"), 

686 ) 

687 registry.insertDimensionData( 

688 "detector", 

689 *[dict(instrument="DummyCam", id=i, full_name=str(i)) for i in range(1, 6)] 

690 ) 

691 registry.insertDimensionData( 

692 "visit_system", 

693 dict(instrument="DummyCam", id=1, name="default"), 

694 ) 

695 registry.insertDimensionData( 

696 "visit", 

697 dict(instrument="DummyCam", id=10, name="ten", physical_filter="dummy_i", visit_system=1), 

698 dict(instrument="DummyCam", id=11, name="eleven", physical_filter="dummy_r", visit_system=1), 

699 dict(instrument="DummyCam", id=20, name="twelve", physical_filter="dummy_r", visit_system=1), 

700 ) 

701 registry.insertDimensionData( 

702 "exposure", 

703 dict(instrument="DummyCam", id=100, obs_id="100", physical_filter="dummy_i"), 

704 dict(instrument="DummyCam", id=101, obs_id="101", physical_filter="dummy_i"), 

705 dict(instrument="DummyCam", id=110, obs_id="110", physical_filter="dummy_r"), 

706 dict(instrument="DummyCam", id=111, obs_id="111", physical_filter="dummy_r"), 

707 dict(instrument="DummyCam", id=200, obs_id="200", physical_filter="dummy_r"), 

708 dict(instrument="DummyCam", id=201, obs_id="201", physical_filter="dummy_r"), 

709 ) 

710 registry.insertDimensionData( 

711 "visit_definition", 

712 dict(instrument="DummyCam", exposure=100, visit_system=1, visit=10), 

713 dict(instrument="DummyCam", exposure=101, visit_system=1, visit=10), 

714 dict(instrument="DummyCam", exposure=110, visit_system=1, visit=11), 

715 dict(instrument="DummyCam", exposure=111, visit_system=1, visit=11), 

716 dict(instrument="DummyCam", exposure=200, visit_system=1, visit=20), 

717 dict(instrument="DummyCam", exposure=201, visit_system=1, visit=20), 

718 ) 

719 # dataset types 

720 run1 = "test1_r" 

721 run2 = "test2_r" 

722 tagged2 = "test2_t" 

723 registry.registerRun(run1) 

724 registry.registerRun(run2) 

725 registry.registerCollection(tagged2) 

726 storageClass = StorageClass("testDataset") 

727 registry.storageClasses.registerStorageClass(storageClass) 

728 rawType = DatasetType(name="RAW", 

729 dimensions=registry.dimensions.extract(("instrument", "exposure", "detector")), 

730 storageClass=storageClass) 

731 registry.registerDatasetType(rawType) 

732 calexpType = DatasetType(name="CALEXP", 

733 dimensions=registry.dimensions.extract(("instrument", "visit", "detector")), 

734 storageClass=storageClass) 

735 registry.registerDatasetType(calexpType) 

736 

737 # add pre-existing datasets 

738 for exposure in (100, 101, 110, 111): 

739 for detector in (1, 2, 3): 

740 # note that only 3 of 5 detectors have datasets 

741 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector) 

742 ref, = registry.insertDatasets(rawType, dataIds=[dataId], run=run1) 

743 # exposures 100 and 101 appear in both run1 and tagged2. 

744 # 100 has different datasets in the different collections 

745 # 101 has the same dataset in both collections. 

746 if exposure == 100: 

747 ref, = registry.insertDatasets(rawType, dataIds=[dataId], run=run2) 

748 if exposure in (100, 101): 

749 registry.associate(tagged2, [ref]) 

750 # Add pre-existing datasets to tagged2. 

751 for exposure in (200, 201): 

752 for detector in (3, 4, 5): 

753 # note that only 3 of 5 detectors have datasets 

754 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector) 

755 ref, = registry.insertDatasets(rawType, dataIds=[dataId], run=run2) 

756 registry.associate(tagged2, [ref]) 

757 

758 dimensions = DimensionGraph( 

759 registry.dimensions, 

760 dimensions=(rawType.dimensions.required | calexpType.dimensions.required) 

761 ) 

762 # Test that single dim string works as well as list of str 

763 rows = registry.queryDataIds("visit", datasets=rawType, collections=run1).expanded().toSet() 

764 rowsI = registry.queryDataIds(["visit"], datasets=rawType, collections=run1).expanded().toSet() 

765 self.assertEqual(rows, rowsI) 

766 # with empty expression 

767 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1).expanded().toSet() 

768 self.assertEqual(len(rows), 4*3) # 4 exposures times 3 detectors 

769 for dataId in rows: 

770 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit")) 

771 packer1 = registry.dimensions.makePacker("visit_detector", dataId) 

772 packer2 = registry.dimensions.makePacker("exposure_detector", dataId) 

773 self.assertEqual(packer1.unpack(packer1.pack(dataId)), 

774 DataCoordinate.standardize(dataId, graph=packer1.dimensions)) 

775 self.assertEqual(packer2.unpack(packer2.pack(dataId)), 

776 DataCoordinate.standardize(dataId, graph=packer2.dimensions)) 

777 self.assertNotEqual(packer1.pack(dataId), packer2.pack(dataId)) 

778 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), 

779 (100, 101, 110, 111)) 

780 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 11)) 

781 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3)) 

782 

783 # second collection 

784 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=tagged2).toSet() 

785 self.assertEqual(len(rows), 4*3) # 4 exposures times 3 detectors 

786 for dataId in rows: 

787 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit")) 

788 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), 

789 (100, 101, 200, 201)) 

790 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 20)) 

791 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3, 4, 5)) 

792 

793 # with two input datasets 

794 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=[run1, tagged2]).toSet() 

795 self.assertEqual(len(set(rows)), 6*3) # 6 exposures times 3 detectors; set needed to de-dupe 

796 for dataId in rows: 

797 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit")) 

798 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), 

799 (100, 101, 110, 111, 200, 201)) 

800 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 11, 20)) 

801 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3, 4, 5)) 

802 

803 # limit to single visit 

804 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1, 

805 where="visit = 10", instrument="DummyCam").toSet() 

806 self.assertEqual(len(rows), 2*3) # 2 exposures times 3 detectors 

807 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101)) 

808 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10,)) 

809 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3)) 

810 

811 # more limiting expression, using link names instead of Table.column 

812 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1, 

813 where="visit = 10 and detector > 1 and 'DummyCam'=instrument").toSet() 

814 self.assertEqual(len(rows), 2*2) # 2 exposures times 2 detectors 

815 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101)) 

816 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10,)) 

817 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (2, 3)) 

818 

819 # expression excludes everything 

820 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1, 

821 where="visit > 1000", instrument="DummyCam").toSet() 

822 self.assertEqual(len(rows), 0) 

823 

824 # Selecting by physical_filter, this is not in the dimensions, but it 

825 # is a part of the full expression so it should work too. 

826 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1, 

827 where="physical_filter = 'dummy_r'", instrument="DummyCam").toSet() 

828 self.assertEqual(len(rows), 2*3) # 2 exposures times 3 detectors 

829 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (110, 111)) 

830 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (11,)) 

831 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3)) 

832 

833 def testSkyMapDimensions(self): 

834 """Tests involving only skymap dimensions, no joins to instrument.""" 

835 registry = self.makeRegistry() 

836 

837 # need a bunch of dimensions and datasets for test, we want 

838 # "band" in the test so also have to add physical_filter 

839 # dimensions 

840 registry.insertDimensionData( 

841 "instrument", 

842 dict(instrument="DummyCam") 

843 ) 

844 registry.insertDimensionData( 

845 "physical_filter", 

846 dict(instrument="DummyCam", name="dummy_r", band="r"), 

847 dict(instrument="DummyCam", name="dummy_i", band="i"), 

848 ) 

849 registry.insertDimensionData( 

850 "skymap", 

851 dict(name="DummyMap", hash="sha!".encode("utf8")) 

852 ) 

853 for tract in range(10): 

854 registry.insertDimensionData("tract", dict(skymap="DummyMap", id=tract)) 

855 registry.insertDimensionData( 

856 "patch", 

857 *[dict(skymap="DummyMap", tract=tract, id=patch, cell_x=0, cell_y=0) 

858 for patch in range(10)] 

859 ) 

860 

861 # dataset types 

862 run = "test" 

863 registry.registerRun(run) 

864 storageClass = StorageClass("testDataset") 

865 registry.storageClasses.registerStorageClass(storageClass) 

866 calexpType = DatasetType(name="deepCoadd_calexp", 

867 dimensions=registry.dimensions.extract(("skymap", "tract", "patch", 

868 "band")), 

869 storageClass=storageClass) 

870 registry.registerDatasetType(calexpType) 

871 mergeType = DatasetType(name="deepCoadd_mergeDet", 

872 dimensions=registry.dimensions.extract(("skymap", "tract", "patch")), 

873 storageClass=storageClass) 

874 registry.registerDatasetType(mergeType) 

875 measType = DatasetType(name="deepCoadd_meas", 

876 dimensions=registry.dimensions.extract(("skymap", "tract", "patch", 

877 "band")), 

878 storageClass=storageClass) 

879 registry.registerDatasetType(measType) 

880 

881 dimensions = DimensionGraph( 

882 registry.dimensions, 

883 dimensions=(calexpType.dimensions.required | mergeType.dimensions.required 

884 | measType.dimensions.required) 

885 ) 

886 

887 # add pre-existing datasets 

888 for tract in (1, 3, 5): 

889 for patch in (2, 4, 6, 7): 

890 dataId = dict(skymap="DummyMap", tract=tract, patch=patch) 

891 registry.insertDatasets(mergeType, dataIds=[dataId], run=run) 

892 for aFilter in ("i", "r"): 

893 dataId = dict(skymap="DummyMap", tract=tract, patch=patch, band=aFilter) 

894 registry.insertDatasets(calexpType, dataIds=[dataId], run=run) 

895 

896 # with empty expression 

897 rows = registry.queryDataIds(dimensions, 

898 datasets=[calexpType, mergeType], collections=run).toSet() 

899 self.assertEqual(len(rows), 3*4*2) # 4 tracts x 4 patches x 2 filters 

900 for dataId in rows: 

901 self.assertCountEqual(dataId.keys(), ("skymap", "tract", "patch", "band")) 

902 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 3, 5)) 

903 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 4, 6, 7)) 

904 self.assertCountEqual(set(dataId["band"] for dataId in rows), ("i", "r")) 

905 

906 # limit to 2 tracts and 2 patches 

907 rows = registry.queryDataIds(dimensions, 

908 datasets=[calexpType, mergeType], collections=run, 

909 where="tract IN (1, 5) AND patch IN (2, 7)", skymap="DummyMap").toSet() 

910 self.assertEqual(len(rows), 2*2*2) # 2 tracts x 2 patches x 2 filters 

911 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 5)) 

912 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 7)) 

913 self.assertCountEqual(set(dataId["band"] for dataId in rows), ("i", "r")) 

914 

915 # limit to single filter 

916 rows = registry.queryDataIds(dimensions, 

917 datasets=[calexpType, mergeType], collections=run, 

918 where="band = 'i'").toSet() 

919 self.assertEqual(len(rows), 3*4*1) # 4 tracts x 4 patches x 2 filters 

920 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 3, 5)) 

921 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 4, 6, 7)) 

922 self.assertCountEqual(set(dataId["band"] for dataId in rows), ("i",)) 

923 

924 # expression excludes everything, specifying non-existing skymap is 

925 # not a fatal error, it's operator error 

926 rows = registry.queryDataIds(dimensions, 

927 datasets=[calexpType, mergeType], collections=run, 

928 where="skymap = 'Mars'").toSet() 

929 self.assertEqual(len(rows), 0) 

930 

931 def testSpatialJoin(self): 

932 """Test queries that involve spatial overlap joins. 

933 """ 

934 registry = self.makeRegistry() 

935 self.loadData(registry, "hsc-rc2-subset.yaml") 

936 

937 # Dictionary of spatial DatabaseDimensionElements, keyed by the name of 

938 # the TopologicalFamily they belong to. We'll relate all elements in 

939 # each family to all of the elements in each other family. 

940 families = defaultdict(set) 

941 # Dictionary of {element.name: {dataId: region}}. 

942 regions = {} 

943 for element in registry.dimensions.getDatabaseElements(): 

944 if element.spatial is not None: 

945 families[element.spatial.name].add(element) 

946 regions[element.name] = { 

947 record.dataId: record.region for record in registry.queryDimensionRecords(element) 

948 } 

949 

950 # If this check fails, it's not necessarily a problem - it may just be 

951 # a reasonable change to the default dimension definitions - but the 

952 # test below depends on there being more than one family to do anything 

953 # useful. 

954 self.assertEqual(len(families), 2) 

955 

956 # Overlap DatabaseDimensionElements with each other. 

957 for family1, family2 in itertools.combinations(families, 2): 

958 for element1, element2 in itertools.product(families[family1], families[family2]): 

959 graph = DimensionGraph.union(element1.graph, element2.graph) 

960 # Construct expected set of overlapping data IDs via a 

961 # brute-force comparison of the regions we've already fetched. 

962 expected = { 

963 DataCoordinate.standardize( 

964 {**dataId1.byName(), **dataId2.byName()}, 

965 graph=graph 

966 ) 

967 for (dataId1, region1), (dataId2, region2) 

968 in itertools.product(regions[element1.name].items(), regions[element2.name].items()) 

969 if not region1.isDisjointFrom(region2) 

970 } 

971 self.assertGreater(len(expected), 2, msg="Test that we aren't just comparing empty sets.") 

972 queried = set(registry.queryDataIds(graph)) 

973 self.assertEqual(expected, queried) 

974 

975 # Overlap each DatabaseDimensionElement with the commonSkyPix system. 

976 commonSkyPix = registry.dimensions.commonSkyPix 

977 for elementName, regions in regions.items(): 

978 graph = DimensionGraph.union(registry.dimensions[elementName].graph, commonSkyPix.graph) 

979 expected = set() 

980 for dataId, region in regions.items(): 

981 for begin, end in commonSkyPix.pixelization.envelope(region): 

982 expected.update( 

983 DataCoordinate.standardize( 

984 {commonSkyPix.name: index, **dataId.byName()}, 

985 graph=graph 

986 ) 

987 for index in range(begin, end) 

988 ) 

989 self.assertGreater(len(expected), 2, msg="Test that we aren't just comparing empty sets.") 

990 queried = set(registry.queryDataIds(graph)) 

991 self.assertEqual(expected, queried) 

992 

993 def testAbstractQuery(self): 

994 """Test that we can run a query that just lists the known 

995 bands. This is tricky because band is 

996 backed by a query against physical_filter. 

997 """ 

998 registry = self.makeRegistry() 

999 registry.insertDimensionData("instrument", dict(name="DummyCam")) 

1000 registry.insertDimensionData( 

1001 "physical_filter", 

1002 dict(instrument="DummyCam", name="dummy_i", band="i"), 

1003 dict(instrument="DummyCam", name="dummy_i2", band="i"), 

1004 dict(instrument="DummyCam", name="dummy_r", band="r"), 

1005 ) 

1006 rows = registry.queryDataIds(["band"]).toSet() 

1007 self.assertCountEqual( 

1008 rows, 

1009 [DataCoordinate.standardize(band="i", universe=registry.dimensions), 

1010 DataCoordinate.standardize(band="r", universe=registry.dimensions)] 

1011 ) 

1012 

1013 def testAttributeManager(self): 

1014 """Test basic functionality of attribute manager. 

1015 """ 

1016 # number of attributes with schema versions in a fresh database, 

1017 # 6 managers with 3 records per manager, plus config for dimensions 

1018 VERSION_COUNT = 6 * 3 + 1 

1019 

1020 registry = self.makeRegistry() 

1021 attributes = registry._attributes 

1022 

1023 # check what get() returns for non-existing key 

1024 self.assertIsNone(attributes.get("attr")) 

1025 self.assertEqual(attributes.get("attr", ""), "") 

1026 self.assertEqual(attributes.get("attr", "Value"), "Value") 

1027 self.assertEqual(len(list(attributes.items())), VERSION_COUNT) 

1028 

1029 # cannot store empty key or value 

1030 with self.assertRaises(ValueError): 

1031 attributes.set("", "value") 

1032 with self.assertRaises(ValueError): 

1033 attributes.set("attr", "") 

1034 

1035 # set value of non-existing key 

1036 attributes.set("attr", "value") 

1037 self.assertEqual(len(list(attributes.items())), VERSION_COUNT + 1) 

1038 self.assertEqual(attributes.get("attr"), "value") 

1039 

1040 # update value of existing key 

1041 with self.assertRaises(ButlerAttributeExistsError): 

1042 attributes.set("attr", "value2") 

1043 

1044 attributes.set("attr", "value2", force=True) 

1045 self.assertEqual(len(list(attributes.items())), VERSION_COUNT + 1) 

1046 self.assertEqual(attributes.get("attr"), "value2") 

1047 

1048 # delete existing key 

1049 self.assertTrue(attributes.delete("attr")) 

1050 self.assertEqual(len(list(attributes.items())), VERSION_COUNT) 

1051 

1052 # delete non-existing key 

1053 self.assertFalse(attributes.delete("non-attr")) 

1054 

1055 # store bunch of keys and get the list back 

1056 data = [ 

1057 ("version.core", "1.2.3"), 

1058 ("version.dimensions", "3.2.1"), 

1059 ("config.managers.opaque", "ByNameOpaqueTableStorageManager"), 

1060 ] 

1061 for key, value in data: 

1062 attributes.set(key, value) 

1063 items = dict(attributes.items()) 

1064 for key, value in data: 

1065 self.assertEqual(items[key], value) 

1066 

1067 def testQueryDatasetsDeduplication(self): 

1068 """Test that the findFirst option to queryDatasets selects datasets 

1069 from collections in the order given". 

1070 """ 

1071 registry = self.makeRegistry() 

1072 self.loadData(registry, "base.yaml") 

1073 self.loadData(registry, "datasets.yaml") 

1074 self.assertCountEqual( 

1075 list(registry.queryDatasets("bias", collections=["imported_g", "imported_r"])), 

1076 [ 

1077 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"), 

1078 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g"), 

1079 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g"), 

1080 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r"), 

1081 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r"), 

1082 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

1083 ] 

1084 ) 

1085 self.assertCountEqual( 

1086 list(registry.queryDatasets("bias", collections=["imported_g", "imported_r"], 

1087 findFirst=True)), 

1088 [ 

1089 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"), 

1090 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g"), 

1091 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g"), 

1092 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

1093 ] 

1094 ) 

1095 self.assertCountEqual( 

1096 list(registry.queryDatasets("bias", collections=["imported_r", "imported_g"], 

1097 findFirst=True)), 

1098 [ 

1099 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"), 

1100 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r"), 

1101 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r"), 

1102 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

1103 ] 

1104 ) 

1105 

1106 def testQueryResults(self): 

1107 """Test querying for data IDs and then manipulating the QueryResults 

1108 object returned to perform other queries. 

1109 """ 

1110 registry = self.makeRegistry() 

1111 self.loadData(registry, "base.yaml") 

1112 self.loadData(registry, "datasets.yaml") 

1113 bias = registry.getDatasetType("bias") 

1114 flat = registry.getDatasetType("flat") 

1115 # Obtain expected results from methods other than those we're testing 

1116 # here. That includes: 

1117 # - the dimensions of the data IDs we want to query: 

1118 expectedGraph = DimensionGraph(registry.dimensions, names=["detector", "physical_filter"]) 

1119 # - the dimensions of some other data IDs we'll extract from that: 

1120 expectedSubsetGraph = DimensionGraph(registry.dimensions, names=["detector"]) 

1121 # - the data IDs we expect to obtain from the first queries: 

1122 expectedDataIds = DataCoordinateSet( 

1123 { 

1124 DataCoordinate.standardize(instrument="Cam1", detector=d, physical_filter=p, 

1125 universe=registry.dimensions) 

1126 for d, p in itertools.product({1, 2, 3}, {"Cam1-G", "Cam1-R1", "Cam1-R2"}) 

1127 }, 

1128 graph=expectedGraph, 

1129 hasFull=False, 

1130 hasRecords=False, 

1131 ) 

1132 # - the flat datasets we expect to find from those data IDs, in just 

1133 # one collection (so deduplication is irrelevant): 

1134 expectedFlats = [ 

1135 registry.findDataset(flat, instrument="Cam1", detector=1, physical_filter="Cam1-R1", 

1136 collections="imported_r"), 

1137 registry.findDataset(flat, instrument="Cam1", detector=2, physical_filter="Cam1-R1", 

1138 collections="imported_r"), 

1139 registry.findDataset(flat, instrument="Cam1", detector=3, physical_filter="Cam1-R2", 

1140 collections="imported_r"), 

1141 ] 

1142 # - the data IDs we expect to extract from that: 

1143 expectedSubsetDataIds = expectedDataIds.subset(expectedSubsetGraph) 

1144 # - the bias datasets we expect to find from those data IDs, after we 

1145 # subset-out the physical_filter dimension, both with duplicates: 

1146 expectedAllBiases = [ 

1147 registry.findDataset(bias, instrument="Cam1", detector=1, collections="imported_g"), 

1148 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_g"), 

1149 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_g"), 

1150 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_r"), 

1151 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_r"), 

1152 ] 

1153 # - ...and without duplicates: 

1154 expectedDeduplicatedBiases = [ 

1155 registry.findDataset(bias, instrument="Cam1", detector=1, collections="imported_g"), 

1156 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_r"), 

1157 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_r"), 

1158 ] 

1159 # Test against those expected results, using a "lazy" query for the 

1160 # data IDs (which re-executes that query each time we use it to do 

1161 # something new). 

1162 dataIds = registry.queryDataIds( 

1163 ["detector", "physical_filter"], 

1164 where="detector.purpose = 'SCIENCE'", # this rejects detector=4 

1165 instrument="Cam1", 

1166 ) 

1167 self.assertEqual(dataIds.graph, expectedGraph) 

1168 self.assertEqual(dataIds.toSet(), expectedDataIds) 

1169 self.assertCountEqual( 

1170 list( 

1171 dataIds.findDatasets( 

1172 flat, 

1173 collections=["imported_r"], 

1174 ) 

1175 ), 

1176 expectedFlats, 

1177 ) 

1178 subsetDataIds = dataIds.subset(expectedSubsetGraph, unique=True) 

1179 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph) 

1180 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds) 

1181 self.assertCountEqual( 

1182 list( 

1183 subsetDataIds.findDatasets( 

1184 bias, 

1185 collections=["imported_r", "imported_g"], 

1186 findFirst=False 

1187 ) 

1188 ), 

1189 expectedAllBiases 

1190 ) 

1191 self.assertCountEqual( 

1192 list( 

1193 subsetDataIds.findDatasets( 

1194 bias, 

1195 collections=["imported_r", "imported_g"], 

1196 findFirst=True 

1197 ) 

1198 ), expectedDeduplicatedBiases 

1199 ) 

1200 # Materialize the bias dataset queries (only) by putting the results 

1201 # into temporary tables, then repeat those tests. 

1202 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], 

1203 findFirst=False).materialize() as biases: 

1204 self.assertCountEqual(list(biases), expectedAllBiases) 

1205 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], 

1206 findFirst=True).materialize() as biases: 

1207 self.assertCountEqual(list(biases), expectedDeduplicatedBiases) 

1208 # Materialize the data ID subset query, but not the dataset queries. 

1209 with subsetDataIds.materialize() as subsetDataIds: 

1210 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph) 

1211 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds) 

1212 self.assertCountEqual( 

1213 list( 

1214 subsetDataIds.findDatasets( 

1215 bias, 

1216 collections=["imported_r", "imported_g"], 

1217 findFirst=False 

1218 ) 

1219 ), 

1220 expectedAllBiases 

1221 ) 

1222 self.assertCountEqual( 

1223 list( 

1224 subsetDataIds.findDatasets( 

1225 bias, 

1226 collections=["imported_r", "imported_g"], 

1227 findFirst=True 

1228 ) 

1229 ), expectedDeduplicatedBiases 

1230 ) 

1231 # Materialize the dataset queries, too. 

1232 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], 

1233 findFirst=False).materialize() as biases: 

1234 self.assertCountEqual(list(biases), expectedAllBiases) 

1235 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], 

1236 findFirst=True).materialize() as biases: 

1237 self.assertCountEqual(list(biases), expectedDeduplicatedBiases) 

1238 # Materialize the original query, but none of the follow-up queries. 

1239 with dataIds.materialize() as dataIds: 

1240 self.assertEqual(dataIds.graph, expectedGraph) 

1241 self.assertEqual(dataIds.toSet(), expectedDataIds) 

1242 self.assertCountEqual( 

1243 list( 

1244 dataIds.findDatasets( 

1245 flat, 

1246 collections=["imported_r"], 

1247 ) 

1248 ), 

1249 expectedFlats, 

1250 ) 

1251 subsetDataIds = dataIds.subset(expectedSubsetGraph, unique=True) 

1252 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph) 

1253 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds) 

1254 self.assertCountEqual( 

1255 list( 

1256 subsetDataIds.findDatasets( 

1257 bias, 

1258 collections=["imported_r", "imported_g"], 

1259 findFirst=False 

1260 ) 

1261 ), 

1262 expectedAllBiases 

1263 ) 

1264 self.assertCountEqual( 

1265 list( 

1266 subsetDataIds.findDatasets( 

1267 bias, 

1268 collections=["imported_r", "imported_g"], 

1269 findFirst=True 

1270 ) 

1271 ), expectedDeduplicatedBiases 

1272 ) 

1273 # Materialize just the bias dataset queries. 

1274 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], 

1275 findFirst=False).materialize() as biases: 

1276 self.assertCountEqual(list(biases), expectedAllBiases) 

1277 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], 

1278 findFirst=True).materialize() as biases: 

1279 self.assertCountEqual(list(biases), expectedDeduplicatedBiases) 

1280 # Materialize the subset data ID query, but not the dataset 

1281 # queries. 

1282 with subsetDataIds.materialize() as subsetDataIds: 

1283 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph) 

1284 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds) 

1285 self.assertCountEqual( 

1286 list( 

1287 subsetDataIds.findDatasets( 

1288 bias, 

1289 collections=["imported_r", "imported_g"], 

1290 findFirst=False 

1291 ) 

1292 ), 

1293 expectedAllBiases 

1294 ) 

1295 self.assertCountEqual( 

1296 list( 

1297 subsetDataIds.findDatasets( 

1298 bias, 

1299 collections=["imported_r", "imported_g"], 

1300 findFirst=True 

1301 ) 

1302 ), expectedDeduplicatedBiases 

1303 ) 

1304 # Materialize the bias dataset queries, too, so now we're 

1305 # materializing every single step. 

1306 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], 

1307 findFirst=False).materialize() as biases: 

1308 self.assertCountEqual(list(biases), expectedAllBiases) 

1309 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], 

1310 findFirst=True).materialize() as biases: 

1311 self.assertCountEqual(list(biases), expectedDeduplicatedBiases) 

1312 

1313 def testEmptyDimensionsQueries(self): 

1314 """Test Query and QueryResults objects in the case where there are no 

1315 dimensions. 

1316 """ 

1317 # Set up test data: one dataset type, two runs, one dataset in each. 

1318 registry = self.makeRegistry() 

1319 self.loadData(registry, "base.yaml") 

1320 schema = DatasetType("schema", dimensions=registry.dimensions.empty, storageClass="Catalog") 

1321 registry.registerDatasetType(schema) 

1322 dataId = DataCoordinate.makeEmpty(registry.dimensions) 

1323 run1 = "run1" 

1324 run2 = "run2" 

1325 registry.registerRun(run1) 

1326 registry.registerRun(run2) 

1327 (dataset1,) = registry.insertDatasets(schema, dataIds=[dataId], run=run1) 

1328 (dataset2,) = registry.insertDatasets(schema, dataIds=[dataId], run=run2) 

1329 # Query directly for both of the datasets, and each one, one at a time. 

1330 self.assertCountEqual( 

1331 list(registry.queryDatasets(schema, collections=[run1, run2], findFirst=False)), 

1332 [dataset1, dataset2] 

1333 ) 

1334 self.assertEqual( 

1335 list(registry.queryDatasets(schema, collections=[run1, run2], findFirst=True)), 

1336 [dataset1], 

1337 ) 

1338 self.assertEqual( 

1339 list(registry.queryDatasets(schema, collections=[run2, run1], findFirst=True)), 

1340 [dataset2], 

1341 ) 

1342 # Query for data IDs with no dimensions. 

1343 dataIds = registry.queryDataIds([]) 

1344 self.assertEqual( 

1345 dataIds.toSequence(), 

1346 DataCoordinateSequence([dataId], registry.dimensions.empty) 

1347 ) 

1348 # Use queried data IDs to find the datasets. 

1349 self.assertCountEqual( 

1350 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False)), 

1351 [dataset1, dataset2], 

1352 ) 

1353 self.assertEqual( 

1354 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True)), 

1355 [dataset1], 

1356 ) 

1357 self.assertEqual( 

1358 list(dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True)), 

1359 [dataset2], 

1360 ) 

1361 # Now materialize the data ID query results and repeat those tests. 

1362 with dataIds.materialize() as dataIds: 

1363 self.assertEqual( 

1364 dataIds.toSequence(), 

1365 DataCoordinateSequence([dataId], registry.dimensions.empty) 

1366 ) 

1367 self.assertCountEqual( 

1368 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False)), 

1369 [dataset1, dataset2], 

1370 ) 

1371 self.assertEqual( 

1372 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True)), 

1373 [dataset1], 

1374 ) 

1375 self.assertEqual( 

1376 list(dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True)), 

1377 [dataset2], 

1378 ) 

1379 # Query for non-empty data IDs, then subset that to get the empty one. 

1380 # Repeat the above tests starting from that. 

1381 dataIds = registry.queryDataIds(["instrument"]).subset(registry.dimensions.empty, unique=True) 

1382 self.assertEqual( 

1383 dataIds.toSequence(), 

1384 DataCoordinateSequence([dataId], registry.dimensions.empty) 

1385 ) 

1386 self.assertCountEqual( 

1387 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False)), 

1388 [dataset1, dataset2], 

1389 ) 

1390 self.assertEqual( 

1391 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True)), 

1392 [dataset1], 

1393 ) 

1394 self.assertEqual( 

1395 list(dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True)), 

1396 [dataset2], 

1397 ) 

1398 with dataIds.materialize() as dataIds: 

1399 self.assertEqual( 

1400 dataIds.toSequence(), 

1401 DataCoordinateSequence([dataId], registry.dimensions.empty) 

1402 ) 

1403 self.assertCountEqual( 

1404 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False)), 

1405 [dataset1, dataset2], 

1406 ) 

1407 self.assertEqual( 

1408 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True)), 

1409 [dataset1], 

1410 ) 

1411 self.assertEqual( 

1412 list(dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True)), 

1413 [dataset2], 

1414 ) 

1415 # Query for non-empty data IDs, then materialize, then subset to get 

1416 # the empty one. Repeat again. 

1417 with registry.queryDataIds(["instrument"]).materialize() as nonEmptyDataIds: 

1418 dataIds = nonEmptyDataIds.subset(registry.dimensions.empty, unique=True) 

1419 self.assertEqual( 

1420 dataIds.toSequence(), 

1421 DataCoordinateSequence([dataId], registry.dimensions.empty) 

1422 ) 

1423 self.assertCountEqual( 

1424 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False)), 

1425 [dataset1, dataset2], 

1426 ) 

1427 self.assertEqual( 

1428 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True)), 

1429 [dataset1], 

1430 ) 

1431 self.assertEqual( 

1432 list(dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True)), 

1433 [dataset2], 

1434 ) 

1435 with dataIds.materialize() as dataIds: 

1436 self.assertEqual( 

1437 dataIds.toSequence(), 

1438 DataCoordinateSequence([dataId], registry.dimensions.empty) 

1439 ) 

1440 self.assertCountEqual( 

1441 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False)), 

1442 [dataset1, dataset2], 

1443 ) 

1444 self.assertEqual( 

1445 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True)), 

1446 [dataset1], 

1447 ) 

1448 self.assertEqual( 

1449 list(dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True)), 

1450 [dataset2], 

1451 ) 

1452 

1453 def testCalibrationCollections(self): 

1454 """Test operations on `~CollectionType.CALIBRATION` collections, 

1455 including `Registry.certify`, `Registry.decertify`, and 

1456 `Registry.findDataset`. 

1457 """ 

1458 # Setup - make a Registry, fill it with some datasets in 

1459 # non-calibration collections. 

1460 registry = self.makeRegistry() 

1461 self.loadData(registry, "base.yaml") 

1462 self.loadData(registry, "datasets.yaml") 

1463 # Set up some timestamps. 

1464 t1 = astropy.time.Time('2020-01-01T01:00:00', format="isot", scale="tai") 

1465 t2 = astropy.time.Time('2020-01-01T02:00:00', format="isot", scale="tai") 

1466 t3 = astropy.time.Time('2020-01-01T03:00:00', format="isot", scale="tai") 

1467 t4 = astropy.time.Time('2020-01-01T04:00:00', format="isot", scale="tai") 

1468 t5 = astropy.time.Time('2020-01-01T05:00:00', format="isot", scale="tai") 

1469 allTimespans = [ 

1470 Timespan(a, b) for a, b in itertools.combinations([None, t1, t2, t3, t4, t5, None], r=2) 

1471 ] 

1472 # Get references to some datasets. 

1473 bias2a = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g") 

1474 bias3a = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g") 

1475 bias2b = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r") 

1476 bias3b = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r") 

1477 # Register the main calibration collection we'll be working with. 

1478 collection = "Cam1/calibs/default" 

1479 registry.registerCollection(collection, type=CollectionType.CALIBRATION) 

1480 # Cannot associate into a calibration collection (no timespan). 

1481 with self.assertRaises(TypeError): 

1482 registry.associate(collection, [bias2a]) 

1483 # Certify 2a dataset with [t2, t4) validity. 

1484 registry.certify(collection, [bias2a], Timespan(begin=t2, end=t4)) 

1485 # We should not be able to certify 2b with anything overlapping that 

1486 # window. 

1487 with self.assertRaises(ConflictingDefinitionError): 

1488 registry.certify(collection, [bias2b], Timespan(begin=None, end=t3)) 

1489 with self.assertRaises(ConflictingDefinitionError): 

1490 registry.certify(collection, [bias2b], Timespan(begin=None, end=t5)) 

1491 with self.assertRaises(ConflictingDefinitionError): 

1492 registry.certify(collection, [bias2b], Timespan(begin=t1, end=t3)) 

1493 with self.assertRaises(ConflictingDefinitionError): 

1494 registry.certify(collection, [bias2b], Timespan(begin=t1, end=t5)) 

1495 with self.assertRaises(ConflictingDefinitionError): 

1496 registry.certify(collection, [bias2b], Timespan(begin=t1, end=None)) 

1497 with self.assertRaises(ConflictingDefinitionError): 

1498 registry.certify(collection, [bias2b], Timespan(begin=t2, end=t3)) 

1499 with self.assertRaises(ConflictingDefinitionError): 

1500 registry.certify(collection, [bias2b], Timespan(begin=t2, end=t5)) 

1501 with self.assertRaises(ConflictingDefinitionError): 

1502 registry.certify(collection, [bias2b], Timespan(begin=t2, end=None)) 

1503 # We should be able to certify 3a with a range overlapping that window, 

1504 # because it's for a different detector. 

1505 # We'll certify 3a over [t1, t3). 

1506 registry.certify(collection, [bias3a], Timespan(begin=t1, end=t3)) 

1507 # Now we'll certify 2b and 3b together over [t4, ∞). 

1508 registry.certify(collection, [bias2b, bias3b], Timespan(begin=t4, end=None)) 

1509 

1510 # Fetch all associations and check that they are what we expect. 

1511 self.assertCountEqual( 

1512 list( 

1513 registry.queryDatasetAssociations( 

1514 "bias", 

1515 collections=[collection, "imported_g", "imported_r"], 

1516 ) 

1517 ), 

1518 [ 

1519 DatasetAssociation( 

1520 ref=registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"), 

1521 collection="imported_g", 

1522 timespan=None, 

1523 ), 

1524 DatasetAssociation( 

1525 ref=registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

1526 collection="imported_r", 

1527 timespan=None, 

1528 ), 

1529 DatasetAssociation(ref=bias2a, collection="imported_g", timespan=None), 

1530 DatasetAssociation(ref=bias3a, collection="imported_g", timespan=None), 

1531 DatasetAssociation(ref=bias2b, collection="imported_r", timespan=None), 

1532 DatasetAssociation(ref=bias3b, collection="imported_r", timespan=None), 

1533 DatasetAssociation(ref=bias2a, collection=collection, timespan=Timespan(begin=t2, end=t4)), 

1534 DatasetAssociation(ref=bias3a, collection=collection, timespan=Timespan(begin=t1, end=t3)), 

1535 DatasetAssociation(ref=bias2b, collection=collection, timespan=Timespan(begin=t4, end=None)), 

1536 DatasetAssociation(ref=bias3b, collection=collection, timespan=Timespan(begin=t4, end=None)), 

1537 ] 

1538 ) 

1539 

1540 class Ambiguous: 

1541 """Tag class to denote lookups that are expected to be ambiguous. 

1542 """ 

1543 pass 

1544 

1545 def assertLookup(detector: int, timespan: Timespan, 

1546 expected: Optional[Union[DatasetRef, Type[Ambiguous]]]) -> None: 

1547 """Local function that asserts that a bias lookup returns the given 

1548 expected result. 

1549 """ 

1550 if expected is Ambiguous: 

1551 with self.assertRaises(RuntimeError): 

1552 registry.findDataset("bias", collections=collection, instrument="Cam1", 

1553 detector=detector, timespan=timespan) 

1554 else: 

1555 self.assertEqual( 

1556 expected, 

1557 registry.findDataset("bias", collections=collection, instrument="Cam1", 

1558 detector=detector, timespan=timespan) 

1559 ) 

1560 

1561 # Systematically test lookups against expected results. 

1562 assertLookup(detector=2, timespan=Timespan(None, t1), expected=None) 

1563 assertLookup(detector=2, timespan=Timespan(None, t2), expected=None) 

1564 assertLookup(detector=2, timespan=Timespan(None, t3), expected=bias2a) 

1565 assertLookup(detector=2, timespan=Timespan(None, t4), expected=bias2a) 

1566 assertLookup(detector=2, timespan=Timespan(None, t5), expected=Ambiguous) 

1567 assertLookup(detector=2, timespan=Timespan(None, None), expected=Ambiguous) 

1568 assertLookup(detector=2, timespan=Timespan(t1, t2), expected=None) 

1569 assertLookup(detector=2, timespan=Timespan(t1, t3), expected=bias2a) 

1570 assertLookup(detector=2, timespan=Timespan(t1, t4), expected=bias2a) 

1571 assertLookup(detector=2, timespan=Timespan(t1, t5), expected=Ambiguous) 

1572 assertLookup(detector=2, timespan=Timespan(t1, None), expected=Ambiguous) 

1573 assertLookup(detector=2, timespan=Timespan(t2, t3), expected=bias2a) 

1574 assertLookup(detector=2, timespan=Timespan(t2, t4), expected=bias2a) 

1575 assertLookup(detector=2, timespan=Timespan(t2, t5), expected=Ambiguous) 

1576 assertLookup(detector=2, timespan=Timespan(t2, None), expected=Ambiguous) 

1577 assertLookup(detector=2, timespan=Timespan(t3, t4), expected=bias2a) 

1578 assertLookup(detector=2, timespan=Timespan(t3, t5), expected=Ambiguous) 

1579 assertLookup(detector=2, timespan=Timespan(t3, None), expected=Ambiguous) 

1580 assertLookup(detector=2, timespan=Timespan(t4, t5), expected=bias2b) 

1581 assertLookup(detector=2, timespan=Timespan(t4, None), expected=bias2b) 

1582 assertLookup(detector=2, timespan=Timespan(t5, None), expected=bias2b) 

1583 assertLookup(detector=3, timespan=Timespan(None, t1), expected=None) 

1584 assertLookup(detector=3, timespan=Timespan(None, t2), expected=bias3a) 

1585 assertLookup(detector=3, timespan=Timespan(None, t3), expected=bias3a) 

1586 assertLookup(detector=3, timespan=Timespan(None, t4), expected=bias3a) 

1587 assertLookup(detector=3, timespan=Timespan(None, t5), expected=Ambiguous) 

1588 assertLookup(detector=3, timespan=Timespan(None, None), expected=Ambiguous) 

1589 assertLookup(detector=3, timespan=Timespan(t1, t2), expected=bias3a) 

1590 assertLookup(detector=3, timespan=Timespan(t1, t3), expected=bias3a) 

1591 assertLookup(detector=3, timespan=Timespan(t1, t4), expected=bias3a) 

1592 assertLookup(detector=3, timespan=Timespan(t1, t5), expected=Ambiguous) 

1593 assertLookup(detector=3, timespan=Timespan(t1, None), expected=Ambiguous) 

1594 assertLookup(detector=3, timespan=Timespan(t2, t3), expected=bias3a) 

1595 assertLookup(detector=3, timespan=Timespan(t2, t4), expected=bias3a) 

1596 assertLookup(detector=3, timespan=Timespan(t2, t5), expected=Ambiguous) 

1597 assertLookup(detector=3, timespan=Timespan(t2, None), expected=Ambiguous) 

1598 assertLookup(detector=3, timespan=Timespan(t3, t4), expected=None) 

1599 assertLookup(detector=3, timespan=Timespan(t3, t5), expected=bias3b) 

1600 assertLookup(detector=3, timespan=Timespan(t3, None), expected=bias3b) 

1601 assertLookup(detector=3, timespan=Timespan(t4, t5), expected=bias3b) 

1602 assertLookup(detector=3, timespan=Timespan(t4, None), expected=bias3b) 

1603 assertLookup(detector=3, timespan=Timespan(t5, None), expected=bias3b) 

1604 

1605 # Decertify [t3, t5) for all data IDs, and do test lookups again. 

1606 # This should truncate bias2a to [t2, t3), leave bias3a unchanged at 

1607 # [t1, t3), and truncate bias2b and bias3b to [t5, ∞). 

1608 registry.decertify(collection=collection, datasetType="bias", timespan=Timespan(t3, t5)) 

1609 assertLookup(detector=2, timespan=Timespan(None, t1), expected=None) 

1610 assertLookup(detector=2, timespan=Timespan(None, t2), expected=None) 

1611 assertLookup(detector=2, timespan=Timespan(None, t3), expected=bias2a) 

1612 assertLookup(detector=2, timespan=Timespan(None, t4), expected=bias2a) 

1613 assertLookup(detector=2, timespan=Timespan(None, t5), expected=bias2a) 

1614 assertLookup(detector=2, timespan=Timespan(None, None), expected=Ambiguous) 

1615 assertLookup(detector=2, timespan=Timespan(t1, t2), expected=None) 

1616 assertLookup(detector=2, timespan=Timespan(t1, t3), expected=bias2a) 

1617 assertLookup(detector=2, timespan=Timespan(t1, t4), expected=bias2a) 

1618 assertLookup(detector=2, timespan=Timespan(t1, t5), expected=bias2a) 

1619 assertLookup(detector=2, timespan=Timespan(t1, None), expected=Ambiguous) 

1620 assertLookup(detector=2, timespan=Timespan(t2, t3), expected=bias2a) 

1621 assertLookup(detector=2, timespan=Timespan(t2, t4), expected=bias2a) 

1622 assertLookup(detector=2, timespan=Timespan(t2, t5), expected=bias2a) 

1623 assertLookup(detector=2, timespan=Timespan(t2, None), expected=Ambiguous) 

1624 assertLookup(detector=2, timespan=Timespan(t3, t4), expected=None) 

1625 assertLookup(detector=2, timespan=Timespan(t3, t5), expected=None) 

1626 assertLookup(detector=2, timespan=Timespan(t3, None), expected=bias2b) 

1627 assertLookup(detector=2, timespan=Timespan(t4, t5), expected=None) 

1628 assertLookup(detector=2, timespan=Timespan(t4, None), expected=bias2b) 

1629 assertLookup(detector=2, timespan=Timespan(t5, None), expected=bias2b) 

1630 assertLookup(detector=3, timespan=Timespan(None, t1), expected=None) 

1631 assertLookup(detector=3, timespan=Timespan(None, t2), expected=bias3a) 

1632 assertLookup(detector=3, timespan=Timespan(None, t3), expected=bias3a) 

1633 assertLookup(detector=3, timespan=Timespan(None, t4), expected=bias3a) 

1634 assertLookup(detector=3, timespan=Timespan(None, t5), expected=bias3a) 

1635 assertLookup(detector=3, timespan=Timespan(None, None), expected=Ambiguous) 

1636 assertLookup(detector=3, timespan=Timespan(t1, t2), expected=bias3a) 

1637 assertLookup(detector=3, timespan=Timespan(t1, t3), expected=bias3a) 

1638 assertLookup(detector=3, timespan=Timespan(t1, t4), expected=bias3a) 

1639 assertLookup(detector=3, timespan=Timespan(t1, t5), expected=bias3a) 

1640 assertLookup(detector=3, timespan=Timespan(t1, None), expected=Ambiguous) 

1641 assertLookup(detector=3, timespan=Timespan(t2, t3), expected=bias3a) 

1642 assertLookup(detector=3, timespan=Timespan(t2, t4), expected=bias3a) 

1643 assertLookup(detector=3, timespan=Timespan(t2, t5), expected=bias3a) 

1644 assertLookup(detector=3, timespan=Timespan(t2, None), expected=Ambiguous) 

1645 assertLookup(detector=3, timespan=Timespan(t3, t4), expected=None) 

1646 assertLookup(detector=3, timespan=Timespan(t3, t5), expected=None) 

1647 assertLookup(detector=3, timespan=Timespan(t3, None), expected=bias3b) 

1648 assertLookup(detector=3, timespan=Timespan(t4, t5), expected=None) 

1649 assertLookup(detector=3, timespan=Timespan(t4, None), expected=bias3b) 

1650 assertLookup(detector=3, timespan=Timespan(t5, None), expected=bias3b) 

1651 

1652 # Decertify everything, this time with explicit data IDs, then check 

1653 # that no lookups succeed. 

1654 registry.decertify( 

1655 collection, "bias", Timespan(None, None), 

1656 dataIds=[ 

1657 dict(instrument="Cam1", detector=2), 

1658 dict(instrument="Cam1", detector=3), 

1659 ] 

1660 ) 

1661 for detector in (2, 3): 

1662 for timespan in allTimespans: 

1663 assertLookup(detector=detector, timespan=timespan, expected=None) 

1664 # Certify bias2a and bias3a over (-∞, ∞), check that all lookups return 

1665 # those. 

1666 registry.certify(collection, [bias2a, bias3a], Timespan(None, None),) 

1667 for timespan in allTimespans: 

1668 assertLookup(detector=2, timespan=timespan, expected=bias2a) 

1669 assertLookup(detector=3, timespan=timespan, expected=bias3a) 

1670 # Decertify just bias2 over [t2, t4). 

1671 # This should split a single certification row into two (and leave the 

1672 # other existing row, for bias3a, alone). 

1673 registry.decertify(collection, "bias", Timespan(t2, t4), 

1674 dataIds=[dict(instrument="Cam1", detector=2)]) 

1675 for timespan in allTimespans: 

1676 assertLookup(detector=3, timespan=timespan, expected=bias3a) 

1677 overlapsBefore = timespan.overlaps(Timespan(None, t2)) 

1678 overlapsAfter = timespan.overlaps(Timespan(t4, None)) 

1679 if overlapsBefore and overlapsAfter: 

1680 expected = Ambiguous 

1681 elif overlapsBefore or overlapsAfter: 

1682 expected = bias2a 

1683 else: 

1684 expected = None 

1685 assertLookup(detector=2, timespan=timespan, expected=expected) 

1686 

1687 def testIngestTimeQuery(self): 

1688 

1689 registry = self.makeRegistry() 

1690 self.loadData(registry, "base.yaml") 

1691 self.loadData(registry, "datasets.yaml") 

1692 

1693 datasets = list(registry.queryDatasets(..., collections=...)) 

1694 len0 = len(datasets) 

1695 self.assertGreater(len0, 0) 

1696 

1697 where = "ingest_date > T'2000-01-01'" 

1698 datasets = list(registry.queryDatasets(..., collections=..., where=where)) 

1699 len1 = len(datasets) 

1700 self.assertEqual(len0, len1) 

1701 

1702 # no one will ever use this piece of software in 30 years 

1703 where = "ingest_date > T'2050-01-01'" 

1704 datasets = list(registry.queryDatasets(..., collections=..., where=where)) 

1705 len2 = len(datasets) 

1706 self.assertEqual(len2, 0) 

1707 

1708 def testTimespanQueries(self): 

1709 """Test query expressions involving timespans. 

1710 """ 

1711 registry = self.makeRegistry() 

1712 self.loadData(registry, "hsc-rc2-subset.yaml") 

1713 # All exposures in the database; mapping from ID to timespan. 

1714 visits = {record.id: record.timespan for record in registry.queryDimensionRecords("visit")} 

1715 # Just those IDs, sorted (which is also temporal sorting, because HSC 

1716 # exposure IDs are monotonically increasing). 

1717 ids = sorted(visits.keys()) 

1718 self.assertGreater(len(ids), 20) 

1719 # Pick some quasi-random indexes into `ids` to play with. 

1720 i1 = int(len(ids)*0.1) 

1721 i2 = int(len(ids)*0.3) 

1722 i3 = int(len(ids)*0.6) 

1723 i4 = int(len(ids)*0.8) 

1724 # Extract some times from those: just before the beginning of i1 (which 

1725 # should be after the end of the exposure before), exactly the 

1726 # beginning of i2, just after the beginning of i3 (and before its end), 

1727 # and the exact end of i4. 

1728 t1 = visits[ids[i1]].begin - astropy.time.TimeDelta(1.0, format="sec") 

1729 self.assertGreater(t1, visits[ids[i1 - 1]].end) 

1730 t2 = visits[ids[i2]].begin 

1731 t3 = visits[ids[i3]].begin + astropy.time.TimeDelta(1.0, format="sec") 

1732 self.assertLess(t3, visits[ids[i3]].end) 

1733 t4 = visits[ids[i4]].end 

1734 # Make sure those are actually in order. 

1735 self.assertEqual([t1, t2, t3, t4], sorted([t4, t3, t2, t1])) 

1736 

1737 bind = { 

1738 "t1": t1, 

1739 "t2": t2, 

1740 "t3": t3, 

1741 "t4": t4, 

1742 "ts23": Timespan(t2, t3), 

1743 } 

1744 

1745 def query(where): 

1746 """Helper function that queries for visit data IDs and returns 

1747 results as a sorted, deduplicated list of visit IDs. 

1748 """ 

1749 return sorted( 

1750 {dataId["visit"] for dataId in registry.queryDataIds("visit", 

1751 instrument="HSC", 

1752 bind=bind, 

1753 where=where)} 

1754 ) 

1755 

1756 # Try a bunch of timespan queries, mixing up the bounds themselves, 

1757 # where they appear in the expression, and how we get the timespan into 

1758 # the expression. 

1759 

1760 # t1 is before the start of i1, so this should not include i1. 

1761 self.assertEqual(ids[:i1], query("visit.timespan OVERLAPS (null, t1)")) 

1762 # t2 is exactly at the start of i2, but ends are exclusive, so these 

1763 # should not include i2. 

1764 self.assertEqual(ids[i1:i2], query("(t1, t2) OVERLAPS visit.timespan")) 

1765 self.assertEqual(ids[:i2], query("visit.timespan < (t2, t4)")) 

1766 # t3 is in the middle of i3, so this should include i3. 

1767 self.assertEqual(ids[i2:i3 + 1], query("visit.timespan OVERLAPS ts23")) 

1768 # This one should not include t3 by the same reasoning. 

1769 self.assertEqual(ids[i3 + 1:], query("visit.timespan > (t1, t3)")) 

1770 # t4 is exactly at the end of i4, so this should include i4. 

1771 self.assertEqual(ids[i3:i4 + 1], query(f"visit.timespan OVERLAPS (T'{t3.tai.isot}', t4)")) 

1772 # i4's upper bound of t4 is exclusive so this should not include t4. 

1773 self.assertEqual(ids[i4 + 1:], query("visit.timespan OVERLAPS (t4, NULL)")) 

1774 

1775 # Now some timespan vs. time scalar queries. 

1776 self.assertEqual(ids[:i2], query("visit.timespan < t2")) 

1777 self.assertEqual(ids[:i2], query("t2 > visit.timespan")) 

1778 self.assertEqual(ids[i3 + 1:], query("visit.timespan > t3")) 

1779 self.assertEqual(ids[i3 + 1:], query("t3 < visit.timespan")) 

1780 self.assertEqual(ids[i3:i3+1], query("visit.timespan OVERLAPS t3")) 

1781 self.assertEqual(ids[i3:i3+1], query(f"T'{t3.tai.isot}' OVERLAPS visit.timespan")) 

1782 

1783 # Empty timespans should not overlap anything. 

1784 self.assertEqual([], query("visit.timespan OVERLAPS (t3, t2)"))