Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = ["RegistryTests"] 

24 

25from abc import ABC, abstractmethod 

26from collections import defaultdict 

27import itertools 

28import logging 

29import os 

30import re 

31import unittest 

32 

33import astropy.time 

34import sqlalchemy 

35from typing import Optional, Type, Union 

36 

37try: 

38 import numpy as np 

39except ImportError: 

40 np = None 

41 

42from ...core import ( 

43 DataCoordinate, 

44 DataCoordinateSequence, 

45 DataCoordinateSet, 

46 DatasetAssociation, 

47 DatasetRef, 

48 DatasetType, 

49 DimensionGraph, 

50 NamedValueSet, 

51 StorageClass, 

52 ddl, 

53 Timespan, 

54) 

55from .._registry import ( 

56 CollectionSummary, 

57 CollectionType, 

58 ConflictingDefinitionError, 

59 InconsistentDataIdError, 

60 OrphanedRecordError, 

61 Registry, 

62 RegistryConfig, 

63) 

64from .._exceptions import MissingCollectionError 

65from ..interfaces import ButlerAttributeExistsError 

66 

67 

68class RegistryTests(ABC): 

69 """Generic tests for the `Registry` class that can be subclassed to 

70 generate tests for different configurations. 

71 """ 

72 

73 collectionsManager: Optional[str] = None 

74 """Name of the collections manager class, if subclass provides value for 

75 this member then it overrides name specified in default configuration 

76 (`str`). 

77 """ 

78 

79 @classmethod 

80 @abstractmethod 

81 def getDataDir(cls) -> str: 

82 """Return the root directory containing test data YAML files. 

83 """ 

84 raise NotImplementedError() 

85 

86 def makeRegistryConfig(self) -> RegistryConfig: 

87 """Create RegistryConfig used to create a registry. 

88 

89 This method should be called by a subclass from `makeRegistry`. 

90 Returned instance will be pre-configured based on the values of class 

91 members, and default-configured for all other parametrs. Subclasses 

92 that need default configuration should just instantiate 

93 `RegistryConfig` directly. 

94 """ 

95 config = RegistryConfig() 

96 if self.collectionsManager: 

97 config["managers"]["collections"] = self.collectionsManager 

98 return config 

99 

100 @abstractmethod 

101 def makeRegistry(self) -> Registry: 

102 """Return the Registry instance to be tested. 

103 """ 

104 raise NotImplementedError() 

105 

106 def loadData(self, registry: Registry, filename: str): 

107 """Load registry test data from ``getDataDir/<filename>``, 

108 which should be a YAML import/export file. 

109 """ 

110 from ...transfers import YamlRepoImportBackend 

111 with open(os.path.join(self.getDataDir(), filename), 'r') as stream: 

112 backend = YamlRepoImportBackend(stream, registry) 

113 backend.register() 

114 backend.load(datastore=None) 

115 

116 def testOpaque(self): 

117 """Tests for `Registry.registerOpaqueTable`, 

118 `Registry.insertOpaqueData`, `Registry.fetchOpaqueData`, and 

119 `Registry.deleteOpaqueData`. 

120 """ 

121 registry = self.makeRegistry() 

122 table = "opaque_table_for_testing" 

123 registry.registerOpaqueTable( 

124 table, 

125 spec=ddl.TableSpec( 

126 fields=[ 

127 ddl.FieldSpec("id", dtype=sqlalchemy.BigInteger, primaryKey=True), 

128 ddl.FieldSpec("name", dtype=sqlalchemy.String, length=16, nullable=False), 

129 ddl.FieldSpec("count", dtype=sqlalchemy.SmallInteger, nullable=True), 

130 ], 

131 ) 

132 ) 

133 rows = [ 

134 {"id": 1, "name": "one", "count": None}, 

135 {"id": 2, "name": "two", "count": 5}, 

136 {"id": 3, "name": "three", "count": 6}, 

137 ] 

138 registry.insertOpaqueData(table, *rows) 

139 self.assertCountEqual(rows, list(registry.fetchOpaqueData(table))) 

140 self.assertEqual(rows[0:1], list(registry.fetchOpaqueData(table, id=1))) 

141 self.assertEqual(rows[1:2], list(registry.fetchOpaqueData(table, name="two"))) 

142 self.assertEqual([], list(registry.fetchOpaqueData(table, id=1, name="two"))) 

143 registry.deleteOpaqueData(table, id=3) 

144 self.assertCountEqual(rows[:2], list(registry.fetchOpaqueData(table))) 

145 registry.deleteOpaqueData(table) 

146 self.assertEqual([], list(registry.fetchOpaqueData(table))) 

147 

148 def testDatasetType(self): 

149 """Tests for `Registry.registerDatasetType` and 

150 `Registry.getDatasetType`. 

151 """ 

152 registry = self.makeRegistry() 

153 # Check valid insert 

154 datasetTypeName = "test" 

155 storageClass = StorageClass("testDatasetType") 

156 registry.storageClasses.registerStorageClass(storageClass) 

157 dimensions = registry.dimensions.extract(("instrument", "visit")) 

158 differentDimensions = registry.dimensions.extract(("instrument", "patch")) 

159 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

160 # Inserting for the first time should return True 

161 self.assertTrue(registry.registerDatasetType(inDatasetType)) 

162 outDatasetType1 = registry.getDatasetType(datasetTypeName) 

163 self.assertEqual(outDatasetType1, inDatasetType) 

164 

165 # Re-inserting should work 

166 self.assertFalse(registry.registerDatasetType(inDatasetType)) 

167 # Except when they are not identical 

168 with self.assertRaises(ConflictingDefinitionError): 

169 nonIdenticalDatasetType = DatasetType(datasetTypeName, differentDimensions, storageClass) 

170 registry.registerDatasetType(nonIdenticalDatasetType) 

171 

172 # Template can be None 

173 datasetTypeName = "testNoneTemplate" 

174 storageClass = StorageClass("testDatasetType2") 

175 registry.storageClasses.registerStorageClass(storageClass) 

176 dimensions = registry.dimensions.extract(("instrument", "visit")) 

177 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

178 registry.registerDatasetType(inDatasetType) 

179 outDatasetType2 = registry.getDatasetType(datasetTypeName) 

180 self.assertEqual(outDatasetType2, inDatasetType) 

181 

182 allTypes = set(registry.queryDatasetTypes()) 

183 self.assertEqual(allTypes, {outDatasetType1, outDatasetType2}) 

184 

185 def testDimensions(self): 

186 """Tests for `Registry.insertDimensionData`, 

187 `Registry.syncDimensionData`, and `Registry.expandDataId`. 

188 """ 

189 registry = self.makeRegistry() 

190 dimensionName = "instrument" 

191 dimension = registry.dimensions[dimensionName] 

192 dimensionValue = {"name": "DummyCam", "visit_max": 10, "exposure_max": 10, "detector_max": 2, 

193 "class_name": "lsst.obs.base.Instrument"} 

194 registry.insertDimensionData(dimensionName, dimensionValue) 

195 # Inserting the same value twice should fail 

196 with self.assertRaises(sqlalchemy.exc.IntegrityError): 

197 registry.insertDimensionData(dimensionName, dimensionValue) 

198 # expandDataId should retrieve the record we just inserted 

199 self.assertEqual( 

200 registry.expandDataId( 

201 instrument="DummyCam", 

202 graph=dimension.graph 

203 ).records[dimensionName].toDict(), 

204 dimensionValue 

205 ) 

206 # expandDataId should raise if there is no record with the given ID. 

207 with self.assertRaises(LookupError): 

208 registry.expandDataId({"instrument": "Unknown"}, graph=dimension.graph) 

209 # band doesn't have a table; insert should fail. 

210 with self.assertRaises(TypeError): 

211 registry.insertDimensionData("band", {"band": "i"}) 

212 dimensionName2 = "physical_filter" 

213 dimension2 = registry.dimensions[dimensionName2] 

214 dimensionValue2 = {"name": "DummyCam_i", "band": "i"} 

215 # Missing required dependency ("instrument") should fail 

216 with self.assertRaises(KeyError): 

217 registry.insertDimensionData(dimensionName2, dimensionValue2) 

218 # Adding required dependency should fix the failure 

219 dimensionValue2["instrument"] = "DummyCam" 

220 registry.insertDimensionData(dimensionName2, dimensionValue2) 

221 # expandDataId should retrieve the record we just inserted. 

222 self.assertEqual( 

223 registry.expandDataId( 

224 instrument="DummyCam", physical_filter="DummyCam_i", 

225 graph=dimension2.graph 

226 ).records[dimensionName2].toDict(), 

227 dimensionValue2 

228 ) 

229 # Use syncDimensionData to insert a new record successfully. 

230 dimensionName3 = "detector" 

231 dimensionValue3 = {"instrument": "DummyCam", "id": 1, "full_name": "one", 

232 "name_in_raft": "zero", "purpose": "SCIENCE"} 

233 self.assertTrue(registry.syncDimensionData(dimensionName3, dimensionValue3)) 

234 # Sync that again. Note that one field ("raft") is NULL, and that 

235 # should be okay. 

236 self.assertFalse(registry.syncDimensionData(dimensionName3, dimensionValue3)) 

237 # Now try that sync with the same primary key but a different value. 

238 # This should fail. 

239 with self.assertRaises(ConflictingDefinitionError): 

240 registry.syncDimensionData( 

241 dimensionName3, 

242 {"instrument": "DummyCam", "id": 1, "full_name": "one", 

243 "name_in_raft": "four", "purpose": "SCIENCE"} 

244 ) 

245 

246 @unittest.skipIf(np is None, "numpy not available.") 

247 def testNumpyDataId(self): 

248 """Test that we can use a numpy int in a dataId.""" 

249 registry = self.makeRegistry() 

250 dimensionEntries = [ 

251 ("instrument", {"instrument": "DummyCam"}), 

252 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}), 

253 # Using an np.int64 here fails unless Records.fromDict is also 

254 # patched to look for numbers.Integral 

255 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}), 

256 ] 

257 for args in dimensionEntries: 

258 registry.insertDimensionData(*args) 

259 

260 # Try a normal integer and something that looks like an int but 

261 # is not. 

262 for visit_id in (42, np.int64(42)): 

263 with self.subTest(visit_id=visit_id, id_type=type(visit_id).__name__): 

264 expanded = registry.expandDataId({"instrument": "DummyCam", "visit": visit_id}) 

265 self.assertEqual(expanded["visit"], int(visit_id)) 

266 self.assertIsInstance(expanded["visit"], int) 

267 

268 def testDataIdRelationships(self): 

269 """Test that `Registry.expandDataId` raises an exception when the given 

270 keys are inconsistent. 

271 """ 

272 registry = self.makeRegistry() 

273 self.loadData(registry, "base.yaml") 

274 # Insert a few more dimension records for the next test. 

275 registry.insertDimensionData( 

276 "exposure", 

277 {"instrument": "Cam1", "id": 1, "obs_id": "one", "physical_filter": "Cam1-G"}, 

278 ) 

279 registry.insertDimensionData( 

280 "exposure", 

281 {"instrument": "Cam1", "id": 2, "obs_id": "two", "physical_filter": "Cam1-G"}, 

282 ) 

283 registry.insertDimensionData( 

284 "visit_system", 

285 {"instrument": "Cam1", "id": 0, "name": "one-to-one"}, 

286 ) 

287 registry.insertDimensionData( 

288 "visit", 

289 {"instrument": "Cam1", "id": 1, "name": "one", "physical_filter": "Cam1-G", "visit_system": 0}, 

290 ) 

291 registry.insertDimensionData( 

292 "visit_definition", 

293 {"instrument": "Cam1", "visit": 1, "exposure": 1, "visit_system": 0}, 

294 ) 

295 with self.assertRaises(InconsistentDataIdError): 

296 registry.expandDataId( 

297 {"instrument": "Cam1", "visit": 1, "exposure": 2}, 

298 ) 

299 

300 def testDataset(self): 

301 """Basic tests for `Registry.insertDatasets`, `Registry.getDataset`, 

302 and `Registry.removeDatasets`. 

303 """ 

304 registry = self.makeRegistry() 

305 self.loadData(registry, "base.yaml") 

306 run = "test" 

307 registry.registerRun(run) 

308 datasetType = registry.getDatasetType("bias") 

309 dataId = {"instrument": "Cam1", "detector": 2} 

310 ref, = registry.insertDatasets(datasetType, dataIds=[dataId], run=run) 

311 outRef = registry.getDataset(ref.id) 

312 self.assertIsNotNone(ref.id) 

313 self.assertEqual(ref, outRef) 

314 with self.assertRaises(ConflictingDefinitionError): 

315 registry.insertDatasets(datasetType, dataIds=[dataId], run=run) 

316 registry.removeDatasets([ref]) 

317 self.assertIsNone(registry.findDataset(datasetType, dataId, collections=[run])) 

318 

319 def testFindDataset(self): 

320 """Tests for `Registry.findDataset`. 

321 """ 

322 registry = self.makeRegistry() 

323 self.loadData(registry, "base.yaml") 

324 run = "test" 

325 datasetType = registry.getDatasetType("bias") 

326 dataId = {"instrument": "Cam1", "detector": 4} 

327 registry.registerRun(run) 

328 inputRef, = registry.insertDatasets(datasetType, dataIds=[dataId], run=run) 

329 outputRef = registry.findDataset(datasetType, dataId, collections=[run]) 

330 self.assertEqual(outputRef, inputRef) 

331 # Check that retrieval with invalid dataId raises 

332 with self.assertRaises(LookupError): 

333 dataId = {"instrument": "Cam1"} # no detector 

334 registry.findDataset(datasetType, dataId, collections=run) 

335 # Check that different dataIds match to different datasets 

336 dataId1 = {"instrument": "Cam1", "detector": 1} 

337 inputRef1, = registry.insertDatasets(datasetType, dataIds=[dataId1], run=run) 

338 dataId2 = {"instrument": "Cam1", "detector": 2} 

339 inputRef2, = registry.insertDatasets(datasetType, dataIds=[dataId2], run=run) 

340 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef1) 

341 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef2) 

342 self.assertNotEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef2) 

343 self.assertNotEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef1) 

344 # Check that requesting a non-existing dataId returns None 

345 nonExistingDataId = {"instrument": "Cam1", "detector": 3} 

346 self.assertIsNone(registry.findDataset(datasetType, nonExistingDataId, collections=run)) 

347 

348 def testRemoveDatasetTypeSuccess(self): 

349 """Test that Registry.removeDatasetType works when there are no 

350 datasets of that type present. 

351 """ 

352 registry = self.makeRegistry() 

353 self.loadData(registry, "base.yaml") 

354 registry.removeDatasetType("flat") 

355 with self.assertRaises(KeyError): 

356 registry.getDatasetType("flat") 

357 

358 def testRemoveDatasetTypeFailure(self): 

359 """Test that Registry.removeDatasetType raises when there are datasets 

360 of that type present or if the dataset type is for a component. 

361 """ 

362 registry = self.makeRegistry() 

363 self.loadData(registry, "base.yaml") 

364 self.loadData(registry, "datasets.yaml") 

365 with self.assertRaises(OrphanedRecordError): 

366 registry.removeDatasetType("flat") 

367 with self.assertRaises(ValueError): 

368 registry.removeDatasetType(DatasetType.nameWithComponent("flat", "image")) 

369 

370 def testDatasetTypeComponentQueries(self): 

371 """Test component options when querying for dataset types. 

372 """ 

373 registry = self.makeRegistry() 

374 self.loadData(registry, "base.yaml") 

375 self.loadData(registry, "datasets.yaml") 

376 # Test querying for dataset types with different inputs. 

377 # First query for all dataset types; components should only be included 

378 # when components=True. 

379 self.assertEqual( 

380 {"bias", "flat"}, 

381 NamedValueSet(registry.queryDatasetTypes()).names 

382 ) 

383 self.assertEqual( 

384 {"bias", "flat"}, 

385 NamedValueSet(registry.queryDatasetTypes(components=False)).names 

386 ) 

387 self.assertLess( 

388 {"bias", "flat", "bias.wcs", "flat.photoCalib"}, 

389 NamedValueSet(registry.queryDatasetTypes(components=True)).names 

390 ) 

391 # Use a pattern that can match either parent or components. Again, 

392 # components are only returned if components=True. 

393 self.assertEqual( 

394 {"bias"}, 

395 NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"))).names 

396 ) 

397 self.assertEqual( 

398 {"bias"}, 

399 NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"), components=False)).names 

400 ) 

401 self.assertLess( 

402 {"bias", "bias.wcs"}, 

403 NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"), components=True)).names 

404 ) 

405 # This pattern matches only a component. In this case we also return 

406 # that component dataset type if components=None. 

407 self.assertEqual( 

408 {"bias.wcs"}, 

409 NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"))).names 

410 ) 

411 self.assertEqual( 

412 set(), 

413 NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"), components=False)).names 

414 ) 

415 self.assertEqual( 

416 {"bias.wcs"}, 

417 NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"), components=True)).names 

418 ) 

419 # Add a dataset type using a StorageClass that we'll then remove; check 

420 # that this does not affect our ability to query for dataset types 

421 # (though it will warn). 

422 tempStorageClass = StorageClass( 

423 name="TempStorageClass", 

424 components={"data", registry.storageClasses.getStorageClass("StructuredDataDict")} 

425 ) 

426 registry.storageClasses.registerStorageClass(tempStorageClass) 

427 datasetType = DatasetType("temporary", dimensions=["instrument"], storageClass=tempStorageClass, 

428 universe=registry.dimensions) 

429 registry.registerDatasetType(datasetType) 

430 registry.storageClasses._unregisterStorageClass(tempStorageClass.name) 

431 datasetType._storageClass = None 

432 del tempStorageClass 

433 # Querying for all dataset types, including components, should include 

434 # at least all non-component dataset types (and I don't want to 

435 # enumerate all of the Exposure components for bias and flat here). 

436 with self.assertLogs("lsst.daf.butler.registry._registry", logging.WARN) as cm: 

437 everything = NamedValueSet(registry.queryDatasetTypes(components=True)) 

438 self.assertIn("TempStorageClass", cm.output[0]) 

439 self.assertLess({"bias", "flat", "temporary"}, everything.names) 

440 # It should not include "temporary.columns", because we tried to remove 

441 # the storage class that would tell it about that. So if the next line 

442 # fails (i.e. "temporary.columns" _is_ in everything.names), it means 

443 # this part of the test isn't doing anything, because the _unregister 

444 # call about isn't simulating the real-life case we want it to 

445 # simulate, in which different versions of daf_butler in entirely 

446 # different Python processes interact with the same repo. 

447 self.assertNotIn("temporary.data", everything.names) 

448 # Query for dataset types that start with "temp". This should again 

449 # not include the component, and also not fail. 

450 with self.assertLogs("lsst.daf.butler.registry._registry", logging.WARN) as cm: 

451 startsWithTemp = NamedValueSet(registry.queryDatasetTypes(re.compile("temp.*"))) 

452 self.assertIn("TempStorageClass", cm.output[0]) 

453 self.assertEqual({"temporary"}, startsWithTemp.names) 

454 

455 def testComponentLookups(self): 

456 """Test searching for component datasets via their parents. 

457 """ 

458 registry = self.makeRegistry() 

459 self.loadData(registry, "base.yaml") 

460 self.loadData(registry, "datasets.yaml") 

461 # Test getting the child dataset type (which does still exist in the 

462 # Registry), and check for consistency with 

463 # DatasetRef.makeComponentRef. 

464 collection = "imported_g" 

465 parentType = registry.getDatasetType("bias") 

466 childType = registry.getDatasetType("bias.wcs") 

467 parentRefResolved = registry.findDataset(parentType, collections=collection, 

468 instrument="Cam1", detector=1) 

469 self.assertIsInstance(parentRefResolved, DatasetRef) 

470 self.assertEqual(childType, parentRefResolved.makeComponentRef("wcs").datasetType) 

471 # Search for a single dataset with findDataset. 

472 childRef1 = registry.findDataset("bias.wcs", collections=collection, 

473 dataId=parentRefResolved.dataId) 

474 self.assertEqual(childRef1, parentRefResolved.makeComponentRef("wcs")) 

475 # Search for detector data IDs constrained by component dataset 

476 # existence with queryDataIds. 

477 dataIds = registry.queryDataIds( 

478 ["detector"], 

479 datasets=["bias.wcs"], 

480 collections=collection, 

481 ).toSet() 

482 self.assertEqual( 

483 dataIds, 

484 DataCoordinateSet( 

485 { 

486 DataCoordinate.standardize(instrument="Cam1", detector=d, graph=parentType.dimensions) 

487 for d in (1, 2, 3) 

488 }, 

489 parentType.dimensions, 

490 ) 

491 ) 

492 # Search for multiple datasets of a single type with queryDatasets. 

493 childRefs2 = set(registry.queryDatasets( 

494 "bias.wcs", 

495 collections=collection, 

496 )) 

497 self.assertEqual( 

498 {ref.unresolved() for ref in childRefs2}, 

499 {DatasetRef(childType, dataId) for dataId in dataIds} 

500 ) 

501 

502 def testCollections(self): 

503 """Tests for registry methods that manage collections. 

504 """ 

505 registry = self.makeRegistry() 

506 self.loadData(registry, "base.yaml") 

507 self.loadData(registry, "datasets.yaml") 

508 run1 = "imported_g" 

509 run2 = "imported_r" 

510 # Test setting a collection docstring after it has been created. 

511 registry.setCollectionDocumentation(run1, "doc for run1") 

512 self.assertEqual(registry.getCollectionDocumentation(run1), "doc for run1") 

513 registry.setCollectionDocumentation(run1, None) 

514 self.assertIsNone(registry.getCollectionDocumentation(run1)) 

515 datasetType = "bias" 

516 # Find some datasets via their run's collection. 

517 dataId1 = {"instrument": "Cam1", "detector": 1} 

518 ref1 = registry.findDataset(datasetType, dataId1, collections=run1) 

519 self.assertIsNotNone(ref1) 

520 dataId2 = {"instrument": "Cam1", "detector": 2} 

521 ref2 = registry.findDataset(datasetType, dataId2, collections=run1) 

522 self.assertIsNotNone(ref2) 

523 # Associate those into a new collection,then look for them there. 

524 tag1 = "tag1" 

525 registry.registerCollection(tag1, type=CollectionType.TAGGED, doc="doc for tag1") 

526 self.assertEqual(registry.getCollectionDocumentation(tag1), "doc for tag1") 

527 registry.associate(tag1, [ref1, ref2]) 

528 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1) 

529 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

530 # Disassociate one and verify that we can't it there anymore... 

531 registry.disassociate(tag1, [ref1]) 

532 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=tag1)) 

533 # ...but we can still find ref2 in tag1, and ref1 in the run. 

534 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run1), ref1) 

535 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

536 collections = set(registry.queryCollections()) 

537 self.assertEqual(collections, {run1, run2, tag1}) 

538 # Associate both refs into tag1 again; ref2 is already there, but that 

539 # should be a harmless no-op. 

540 registry.associate(tag1, [ref1, ref2]) 

541 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1) 

542 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

543 # Get a different dataset (from a different run) that has the same 

544 # dataset type and data ID as ref2. 

545 ref2b = registry.findDataset(datasetType, dataId2, collections=run2) 

546 self.assertNotEqual(ref2, ref2b) 

547 # Attempting to associate that into tag1 should be an error. 

548 with self.assertRaises(ConflictingDefinitionError): 

549 registry.associate(tag1, [ref2b]) 

550 # That error shouldn't have messed up what we had before. 

551 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1) 

552 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

553 # Attempt to associate the conflicting dataset again, this time with 

554 # a dataset that isn't in the collection and won't cause a conflict. 

555 # Should also fail without modifying anything. 

556 dataId3 = {"instrument": "Cam1", "detector": 3} 

557 ref3 = registry.findDataset(datasetType, dataId3, collections=run1) 

558 with self.assertRaises(ConflictingDefinitionError): 

559 registry.associate(tag1, [ref3, ref2b]) 

560 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1) 

561 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

562 self.assertIsNone(registry.findDataset(datasetType, dataId3, collections=tag1)) 

563 # Register a chained collection that searches [tag1, run2] 

564 chain1 = "chain1" 

565 registry.registerCollection(chain1, type=CollectionType.CHAINED) 

566 self.assertIs(registry.getCollectionType(chain1), CollectionType.CHAINED) 

567 # Chained collection exists, but has no collections in it. 

568 self.assertFalse(registry.getCollectionChain(chain1)) 

569 # If we query for all collections, we should get the chained collection 

570 # only if we don't ask to flatten it (i.e. yield only its children). 

571 self.assertEqual(set(registry.queryCollections(flattenChains=False)), {tag1, run1, run2, chain1}) 

572 self.assertEqual(set(registry.queryCollections(flattenChains=True)), {tag1, run1, run2}) 

573 # Attempt to set its child collections to something circular; that 

574 # should fail. 

575 with self.assertRaises(ValueError): 

576 registry.setCollectionChain(chain1, [tag1, chain1]) 

577 # Add the child collections. 

578 registry.setCollectionChain(chain1, [tag1, run2]) 

579 self.assertEqual( 

580 list(registry.getCollectionChain(chain1)), 

581 [tag1, run2] 

582 ) 

583 # Searching for dataId1 or dataId2 in the chain should return ref1 and 

584 # ref2, because both are in tag1. 

585 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain1), ref1) 

586 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain1), ref2) 

587 # Now disassociate ref2 from tag1. The search (for bias) with 

588 # dataId2 in chain1 should then: 

589 # 1. not find it in tag1 

590 # 2. find a different dataset in run2 

591 registry.disassociate(tag1, [ref2]) 

592 ref2b = registry.findDataset(datasetType, dataId2, collections=chain1) 

593 self.assertNotEqual(ref2b, ref2) 

594 self.assertEqual(ref2b, registry.findDataset(datasetType, dataId2, collections=run2)) 

595 # Define a new chain so we can test recursive chains. 

596 chain2 = "chain2" 

597 registry.registerCollection(chain2, type=CollectionType.CHAINED) 

598 registry.setCollectionChain(chain2, [run2, chain1]) 

599 # Query for collections matching a regex. 

600 self.assertCountEqual( 

601 list(registry.queryCollections(re.compile("imported_."), flattenChains=False)), 

602 ["imported_r", "imported_g"] 

603 ) 

604 # Query for collections matching a regex or an explicit str. 

605 self.assertCountEqual( 

606 list(registry.queryCollections([re.compile("imported_."), "chain1"], flattenChains=False)), 

607 ["imported_r", "imported_g", "chain1"] 

608 ) 

609 # Search for bias with dataId1 should find it via tag1 in chain2, 

610 # recursing, because is not in run1. 

611 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=run2)) 

612 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain2), ref1) 

613 # Search for bias with dataId2 should find it in run2 (ref2b). 

614 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain2), ref2b) 

615 # Search for a flat that is in run2. That should not be found 

616 # at the front of chain2, because of the restriction to bias 

617 # on run2 there, but it should be found in at the end of chain1. 

618 dataId4 = {"instrument": "Cam1", "detector": 3, "physical_filter": "Cam1-R2"} 

619 ref4 = registry.findDataset("flat", dataId4, collections=run2) 

620 self.assertIsNotNone(ref4) 

621 self.assertEqual(ref4, registry.findDataset("flat", dataId4, collections=chain2)) 

622 # Deleting a collection that's part of a CHAINED collection is not 

623 # allowed, and is exception-safe. 

624 with self.assertRaises(Exception): 

625 registry.removeCollection(run2) 

626 self.assertEqual(registry.getCollectionType(run2), CollectionType.RUN) 

627 with self.assertRaises(Exception): 

628 registry.removeCollection(chain1) 

629 self.assertEqual(registry.getCollectionType(chain1), CollectionType.CHAINED) 

630 # Actually remove chain2, test that it's gone by asking for its type. 

631 registry.removeCollection(chain2) 

632 with self.assertRaises(MissingCollectionError): 

633 registry.getCollectionType(chain2) 

634 # Actually remove run2 and chain1, which should work now. 

635 registry.removeCollection(chain1) 

636 registry.removeCollection(run2) 

637 with self.assertRaises(MissingCollectionError): 

638 registry.getCollectionType(run2) 

639 with self.assertRaises(MissingCollectionError): 

640 registry.getCollectionType(chain1) 

641 # Remove tag1 as well, just to test that we can remove TAGGED 

642 # collections. 

643 registry.removeCollection(tag1) 

644 with self.assertRaises(MissingCollectionError): 

645 registry.getCollectionType(tag1) 

646 

647 def testBasicTransaction(self): 

648 """Test that all operations within a single transaction block are 

649 rolled back if an exception propagates out of the block. 

650 """ 

651 registry = self.makeRegistry() 

652 storageClass = StorageClass("testDatasetType") 

653 registry.storageClasses.registerStorageClass(storageClass) 

654 with registry.transaction(): 

655 registry.insertDimensionData("instrument", {"name": "Cam1", "class_name": "A"}) 

656 with self.assertRaises(ValueError): 

657 with registry.transaction(): 

658 registry.insertDimensionData("instrument", {"name": "Cam2"}) 

659 raise ValueError("Oops, something went wrong") 

660 # Cam1 should exist 

661 self.assertEqual(registry.expandDataId(instrument="Cam1").records["instrument"].class_name, "A") 

662 # But Cam2 and Cam3 should both not exist 

663 with self.assertRaises(LookupError): 

664 registry.expandDataId(instrument="Cam2") 

665 with self.assertRaises(LookupError): 

666 registry.expandDataId(instrument="Cam3") 

667 

668 def testNestedTransaction(self): 

669 """Test that operations within a transaction block are not rolled back 

670 if an exception propagates out of an inner transaction block and is 

671 then caught. 

672 """ 

673 registry = self.makeRegistry() 

674 dimension = registry.dimensions["instrument"] 

675 dataId1 = {"instrument": "DummyCam"} 

676 dataId2 = {"instrument": "DummyCam2"} 

677 checkpointReached = False 

678 with registry.transaction(): 

679 # This should be added and (ultimately) committed. 

680 registry.insertDimensionData(dimension, dataId1) 

681 with self.assertRaises(sqlalchemy.exc.IntegrityError): 

682 with registry.transaction(savepoint=True): 

683 # This does not conflict, and should succeed (but not 

684 # be committed). 

685 registry.insertDimensionData(dimension, dataId2) 

686 checkpointReached = True 

687 # This should conflict and raise, triggerring a rollback 

688 # of the previous insertion within the same transaction 

689 # context, but not the original insertion in the outer 

690 # block. 

691 registry.insertDimensionData(dimension, dataId1) 

692 self.assertTrue(checkpointReached) 

693 self.assertIsNotNone(registry.expandDataId(dataId1, graph=dimension.graph)) 

694 with self.assertRaises(LookupError): 

695 registry.expandDataId(dataId2, graph=dimension.graph) 

696 

697 def testInstrumentDimensions(self): 

698 """Test queries involving only instrument dimensions, with no joins to 

699 skymap.""" 

700 registry = self.makeRegistry() 

701 

702 # need a bunch of dimensions and datasets for test 

703 registry.insertDimensionData( 

704 "instrument", 

705 dict(name="DummyCam", visit_max=25, exposure_max=300, detector_max=6) 

706 ) 

707 registry.insertDimensionData( 

708 "physical_filter", 

709 dict(instrument="DummyCam", name="dummy_r", band="r"), 

710 dict(instrument="DummyCam", name="dummy_i", band="i"), 

711 ) 

712 registry.insertDimensionData( 

713 "detector", 

714 *[dict(instrument="DummyCam", id=i, full_name=str(i)) for i in range(1, 6)] 

715 ) 

716 registry.insertDimensionData( 

717 "visit_system", 

718 dict(instrument="DummyCam", id=1, name="default"), 

719 ) 

720 registry.insertDimensionData( 

721 "visit", 

722 dict(instrument="DummyCam", id=10, name="ten", physical_filter="dummy_i", visit_system=1), 

723 dict(instrument="DummyCam", id=11, name="eleven", physical_filter="dummy_r", visit_system=1), 

724 dict(instrument="DummyCam", id=20, name="twelve", physical_filter="dummy_r", visit_system=1), 

725 ) 

726 registry.insertDimensionData( 

727 "exposure", 

728 dict(instrument="DummyCam", id=100, obs_id="100", physical_filter="dummy_i"), 

729 dict(instrument="DummyCam", id=101, obs_id="101", physical_filter="dummy_i"), 

730 dict(instrument="DummyCam", id=110, obs_id="110", physical_filter="dummy_r"), 

731 dict(instrument="DummyCam", id=111, obs_id="111", physical_filter="dummy_r"), 

732 dict(instrument="DummyCam", id=200, obs_id="200", physical_filter="dummy_r"), 

733 dict(instrument="DummyCam", id=201, obs_id="201", physical_filter="dummy_r"), 

734 ) 

735 registry.insertDimensionData( 

736 "visit_definition", 

737 dict(instrument="DummyCam", exposure=100, visit_system=1, visit=10), 

738 dict(instrument="DummyCam", exposure=101, visit_system=1, visit=10), 

739 dict(instrument="DummyCam", exposure=110, visit_system=1, visit=11), 

740 dict(instrument="DummyCam", exposure=111, visit_system=1, visit=11), 

741 dict(instrument="DummyCam", exposure=200, visit_system=1, visit=20), 

742 dict(instrument="DummyCam", exposure=201, visit_system=1, visit=20), 

743 ) 

744 # dataset types 

745 run1 = "test1_r" 

746 run2 = "test2_r" 

747 tagged2 = "test2_t" 

748 registry.registerRun(run1) 

749 registry.registerRun(run2) 

750 registry.registerCollection(tagged2) 

751 storageClass = StorageClass("testDataset") 

752 registry.storageClasses.registerStorageClass(storageClass) 

753 rawType = DatasetType(name="RAW", 

754 dimensions=registry.dimensions.extract(("instrument", "exposure", "detector")), 

755 storageClass=storageClass) 

756 registry.registerDatasetType(rawType) 

757 calexpType = DatasetType(name="CALEXP", 

758 dimensions=registry.dimensions.extract(("instrument", "visit", "detector")), 

759 storageClass=storageClass) 

760 registry.registerDatasetType(calexpType) 

761 

762 # add pre-existing datasets 

763 for exposure in (100, 101, 110, 111): 

764 for detector in (1, 2, 3): 

765 # note that only 3 of 5 detectors have datasets 

766 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector) 

767 ref, = registry.insertDatasets(rawType, dataIds=[dataId], run=run1) 

768 # exposures 100 and 101 appear in both run1 and tagged2. 

769 # 100 has different datasets in the different collections 

770 # 101 has the same dataset in both collections. 

771 if exposure == 100: 

772 ref, = registry.insertDatasets(rawType, dataIds=[dataId], run=run2) 

773 if exposure in (100, 101): 

774 registry.associate(tagged2, [ref]) 

775 # Add pre-existing datasets to tagged2. 

776 for exposure in (200, 201): 

777 for detector in (3, 4, 5): 

778 # note that only 3 of 5 detectors have datasets 

779 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector) 

780 ref, = registry.insertDatasets(rawType, dataIds=[dataId], run=run2) 

781 registry.associate(tagged2, [ref]) 

782 

783 dimensions = DimensionGraph( 

784 registry.dimensions, 

785 dimensions=(rawType.dimensions.required | calexpType.dimensions.required) 

786 ) 

787 # Test that single dim string works as well as list of str 

788 rows = registry.queryDataIds("visit", datasets=rawType, collections=run1).expanded().toSet() 

789 rowsI = registry.queryDataIds(["visit"], datasets=rawType, collections=run1).expanded().toSet() 

790 self.assertEqual(rows, rowsI) 

791 # with empty expression 

792 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1).expanded().toSet() 

793 self.assertEqual(len(rows), 4*3) # 4 exposures times 3 detectors 

794 for dataId in rows: 

795 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit")) 

796 packer1 = registry.dimensions.makePacker("visit_detector", dataId) 

797 packer2 = registry.dimensions.makePacker("exposure_detector", dataId) 

798 self.assertEqual(packer1.unpack(packer1.pack(dataId)), 

799 DataCoordinate.standardize(dataId, graph=packer1.dimensions)) 

800 self.assertEqual(packer2.unpack(packer2.pack(dataId)), 

801 DataCoordinate.standardize(dataId, graph=packer2.dimensions)) 

802 self.assertNotEqual(packer1.pack(dataId), packer2.pack(dataId)) 

803 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), 

804 (100, 101, 110, 111)) 

805 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 11)) 

806 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3)) 

807 

808 # second collection 

809 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=tagged2).toSet() 

810 self.assertEqual(len(rows), 4*3) # 4 exposures times 3 detectors 

811 for dataId in rows: 

812 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit")) 

813 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), 

814 (100, 101, 200, 201)) 

815 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 20)) 

816 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3, 4, 5)) 

817 

818 # with two input datasets 

819 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=[run1, tagged2]).toSet() 

820 self.assertEqual(len(set(rows)), 6*3) # 6 exposures times 3 detectors; set needed to de-dupe 

821 for dataId in rows: 

822 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit")) 

823 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), 

824 (100, 101, 110, 111, 200, 201)) 

825 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 11, 20)) 

826 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3, 4, 5)) 

827 

828 # limit to single visit 

829 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1, 

830 where="visit = 10", instrument="DummyCam").toSet() 

831 self.assertEqual(len(rows), 2*3) # 2 exposures times 3 detectors 

832 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101)) 

833 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10,)) 

834 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3)) 

835 

836 # more limiting expression, using link names instead of Table.column 

837 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1, 

838 where="visit = 10 and detector > 1 and 'DummyCam'=instrument").toSet() 

839 self.assertEqual(len(rows), 2*2) # 2 exposures times 2 detectors 

840 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101)) 

841 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10,)) 

842 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (2, 3)) 

843 

844 # expression excludes everything 

845 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1, 

846 where="visit > 1000", instrument="DummyCam").toSet() 

847 self.assertEqual(len(rows), 0) 

848 

849 # Selecting by physical_filter, this is not in the dimensions, but it 

850 # is a part of the full expression so it should work too. 

851 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1, 

852 where="physical_filter = 'dummy_r'", instrument="DummyCam").toSet() 

853 self.assertEqual(len(rows), 2*3) # 2 exposures times 3 detectors 

854 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (110, 111)) 

855 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (11,)) 

856 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3)) 

857 

858 def testSkyMapDimensions(self): 

859 """Tests involving only skymap dimensions, no joins to instrument.""" 

860 registry = self.makeRegistry() 

861 

862 # need a bunch of dimensions and datasets for test, we want 

863 # "band" in the test so also have to add physical_filter 

864 # dimensions 

865 registry.insertDimensionData( 

866 "instrument", 

867 dict(instrument="DummyCam") 

868 ) 

869 registry.insertDimensionData( 

870 "physical_filter", 

871 dict(instrument="DummyCam", name="dummy_r", band="r"), 

872 dict(instrument="DummyCam", name="dummy_i", band="i"), 

873 ) 

874 registry.insertDimensionData( 

875 "skymap", 

876 dict(name="DummyMap", hash="sha!".encode("utf8")) 

877 ) 

878 for tract in range(10): 

879 registry.insertDimensionData("tract", dict(skymap="DummyMap", id=tract)) 

880 registry.insertDimensionData( 

881 "patch", 

882 *[dict(skymap="DummyMap", tract=tract, id=patch, cell_x=0, cell_y=0) 

883 for patch in range(10)] 

884 ) 

885 

886 # dataset types 

887 run = "test" 

888 registry.registerRun(run) 

889 storageClass = StorageClass("testDataset") 

890 registry.storageClasses.registerStorageClass(storageClass) 

891 calexpType = DatasetType(name="deepCoadd_calexp", 

892 dimensions=registry.dimensions.extract(("skymap", "tract", "patch", 

893 "band")), 

894 storageClass=storageClass) 

895 registry.registerDatasetType(calexpType) 

896 mergeType = DatasetType(name="deepCoadd_mergeDet", 

897 dimensions=registry.dimensions.extract(("skymap", "tract", "patch")), 

898 storageClass=storageClass) 

899 registry.registerDatasetType(mergeType) 

900 measType = DatasetType(name="deepCoadd_meas", 

901 dimensions=registry.dimensions.extract(("skymap", "tract", "patch", 

902 "band")), 

903 storageClass=storageClass) 

904 registry.registerDatasetType(measType) 

905 

906 dimensions = DimensionGraph( 

907 registry.dimensions, 

908 dimensions=(calexpType.dimensions.required | mergeType.dimensions.required 

909 | measType.dimensions.required) 

910 ) 

911 

912 # add pre-existing datasets 

913 for tract in (1, 3, 5): 

914 for patch in (2, 4, 6, 7): 

915 dataId = dict(skymap="DummyMap", tract=tract, patch=patch) 

916 registry.insertDatasets(mergeType, dataIds=[dataId], run=run) 

917 for aFilter in ("i", "r"): 

918 dataId = dict(skymap="DummyMap", tract=tract, patch=patch, band=aFilter) 

919 registry.insertDatasets(calexpType, dataIds=[dataId], run=run) 

920 

921 # with empty expression 

922 rows = registry.queryDataIds(dimensions, 

923 datasets=[calexpType, mergeType], collections=run).toSet() 

924 self.assertEqual(len(rows), 3*4*2) # 4 tracts x 4 patches x 2 filters 

925 for dataId in rows: 

926 self.assertCountEqual(dataId.keys(), ("skymap", "tract", "patch", "band")) 

927 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 3, 5)) 

928 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 4, 6, 7)) 

929 self.assertCountEqual(set(dataId["band"] for dataId in rows), ("i", "r")) 

930 

931 # limit to 2 tracts and 2 patches 

932 rows = registry.queryDataIds(dimensions, 

933 datasets=[calexpType, mergeType], collections=run, 

934 where="tract IN (1, 5) AND patch IN (2, 7)", skymap="DummyMap").toSet() 

935 self.assertEqual(len(rows), 2*2*2) # 2 tracts x 2 patches x 2 filters 

936 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 5)) 

937 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 7)) 

938 self.assertCountEqual(set(dataId["band"] for dataId in rows), ("i", "r")) 

939 

940 # limit to single filter 

941 rows = registry.queryDataIds(dimensions, 

942 datasets=[calexpType, mergeType], collections=run, 

943 where="band = 'i'").toSet() 

944 self.assertEqual(len(rows), 3*4*1) # 4 tracts x 4 patches x 2 filters 

945 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 3, 5)) 

946 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 4, 6, 7)) 

947 self.assertCountEqual(set(dataId["band"] for dataId in rows), ("i",)) 

948 

949 # expression excludes everything, specifying non-existing skymap is 

950 # not a fatal error, it's operator error 

951 rows = registry.queryDataIds(dimensions, 

952 datasets=[calexpType, mergeType], collections=run, 

953 where="skymap = 'Mars'").toSet() 

954 self.assertEqual(len(rows), 0) 

955 

956 def testSpatialJoin(self): 

957 """Test queries that involve spatial overlap joins. 

958 """ 

959 registry = self.makeRegistry() 

960 self.loadData(registry, "hsc-rc2-subset.yaml") 

961 

962 # Dictionary of spatial DatabaseDimensionElements, keyed by the name of 

963 # the TopologicalFamily they belong to. We'll relate all elements in 

964 # each family to all of the elements in each other family. 

965 families = defaultdict(set) 

966 # Dictionary of {element.name: {dataId: region}}. 

967 regions = {} 

968 for element in registry.dimensions.getDatabaseElements(): 

969 if element.spatial is not None: 

970 families[element.spatial.name].add(element) 

971 regions[element.name] = { 

972 record.dataId: record.region for record in registry.queryDimensionRecords(element) 

973 } 

974 

975 # If this check fails, it's not necessarily a problem - it may just be 

976 # a reasonable change to the default dimension definitions - but the 

977 # test below depends on there being more than one family to do anything 

978 # useful. 

979 self.assertEqual(len(families), 2) 

980 

981 # Overlap DatabaseDimensionElements with each other. 

982 for family1, family2 in itertools.combinations(families, 2): 

983 for element1, element2 in itertools.product(families[family1], families[family2]): 

984 graph = DimensionGraph.union(element1.graph, element2.graph) 

985 # Construct expected set of overlapping data IDs via a 

986 # brute-force comparison of the regions we've already fetched. 

987 expected = { 

988 DataCoordinate.standardize( 

989 {**dataId1.byName(), **dataId2.byName()}, 

990 graph=graph 

991 ) 

992 for (dataId1, region1), (dataId2, region2) 

993 in itertools.product(regions[element1.name].items(), regions[element2.name].items()) 

994 if not region1.isDisjointFrom(region2) 

995 } 

996 self.assertGreater(len(expected), 2, msg="Test that we aren't just comparing empty sets.") 

997 queried = set(registry.queryDataIds(graph)) 

998 self.assertEqual(expected, queried) 

999 

1000 # Overlap each DatabaseDimensionElement with the commonSkyPix system. 

1001 commonSkyPix = registry.dimensions.commonSkyPix 

1002 for elementName, regions in regions.items(): 

1003 graph = DimensionGraph.union(registry.dimensions[elementName].graph, commonSkyPix.graph) 

1004 expected = set() 

1005 for dataId, region in regions.items(): 

1006 for begin, end in commonSkyPix.pixelization.envelope(region): 

1007 expected.update( 

1008 DataCoordinate.standardize( 

1009 {commonSkyPix.name: index, **dataId.byName()}, 

1010 graph=graph 

1011 ) 

1012 for index in range(begin, end) 

1013 ) 

1014 self.assertGreater(len(expected), 2, msg="Test that we aren't just comparing empty sets.") 

1015 queried = set(registry.queryDataIds(graph)) 

1016 self.assertEqual(expected, queried) 

1017 

1018 def testAbstractQuery(self): 

1019 """Test that we can run a query that just lists the known 

1020 bands. This is tricky because band is 

1021 backed by a query against physical_filter. 

1022 """ 

1023 registry = self.makeRegistry() 

1024 registry.insertDimensionData("instrument", dict(name="DummyCam")) 

1025 registry.insertDimensionData( 

1026 "physical_filter", 

1027 dict(instrument="DummyCam", name="dummy_i", band="i"), 

1028 dict(instrument="DummyCam", name="dummy_i2", band="i"), 

1029 dict(instrument="DummyCam", name="dummy_r", band="r"), 

1030 ) 

1031 rows = registry.queryDataIds(["band"]).toSet() 

1032 self.assertCountEqual( 

1033 rows, 

1034 [DataCoordinate.standardize(band="i", universe=registry.dimensions), 

1035 DataCoordinate.standardize(band="r", universe=registry.dimensions)] 

1036 ) 

1037 

1038 def testAttributeManager(self): 

1039 """Test basic functionality of attribute manager. 

1040 """ 

1041 # number of attributes with schema versions in a fresh database, 

1042 # 6 managers with 3 records per manager, plus config for dimensions 

1043 VERSION_COUNT = 6 * 3 + 1 

1044 

1045 registry = self.makeRegistry() 

1046 attributes = registry._managers.attributes 

1047 

1048 # check what get() returns for non-existing key 

1049 self.assertIsNone(attributes.get("attr")) 

1050 self.assertEqual(attributes.get("attr", ""), "") 

1051 self.assertEqual(attributes.get("attr", "Value"), "Value") 

1052 self.assertEqual(len(list(attributes.items())), VERSION_COUNT) 

1053 

1054 # cannot store empty key or value 

1055 with self.assertRaises(ValueError): 

1056 attributes.set("", "value") 

1057 with self.assertRaises(ValueError): 

1058 attributes.set("attr", "") 

1059 

1060 # set value of non-existing key 

1061 attributes.set("attr", "value") 

1062 self.assertEqual(len(list(attributes.items())), VERSION_COUNT + 1) 

1063 self.assertEqual(attributes.get("attr"), "value") 

1064 

1065 # update value of existing key 

1066 with self.assertRaises(ButlerAttributeExistsError): 

1067 attributes.set("attr", "value2") 

1068 

1069 attributes.set("attr", "value2", force=True) 

1070 self.assertEqual(len(list(attributes.items())), VERSION_COUNT + 1) 

1071 self.assertEqual(attributes.get("attr"), "value2") 

1072 

1073 # delete existing key 

1074 self.assertTrue(attributes.delete("attr")) 

1075 self.assertEqual(len(list(attributes.items())), VERSION_COUNT) 

1076 

1077 # delete non-existing key 

1078 self.assertFalse(attributes.delete("non-attr")) 

1079 

1080 # store bunch of keys and get the list back 

1081 data = [ 

1082 ("version.core", "1.2.3"), 

1083 ("version.dimensions", "3.2.1"), 

1084 ("config.managers.opaque", "ByNameOpaqueTableStorageManager"), 

1085 ] 

1086 for key, value in data: 

1087 attributes.set(key, value) 

1088 items = dict(attributes.items()) 

1089 for key, value in data: 

1090 self.assertEqual(items[key], value) 

1091 

1092 def testQueryDatasetsDeduplication(self): 

1093 """Test that the findFirst option to queryDatasets selects datasets 

1094 from collections in the order given". 

1095 """ 

1096 registry = self.makeRegistry() 

1097 self.loadData(registry, "base.yaml") 

1098 self.loadData(registry, "datasets.yaml") 

1099 self.assertCountEqual( 

1100 list(registry.queryDatasets("bias", collections=["imported_g", "imported_r"])), 

1101 [ 

1102 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"), 

1103 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g"), 

1104 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g"), 

1105 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r"), 

1106 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r"), 

1107 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

1108 ] 

1109 ) 

1110 self.assertCountEqual( 

1111 list(registry.queryDatasets("bias", collections=["imported_g", "imported_r"], 

1112 findFirst=True)), 

1113 [ 

1114 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"), 

1115 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g"), 

1116 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g"), 

1117 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

1118 ] 

1119 ) 

1120 self.assertCountEqual( 

1121 list(registry.queryDatasets("bias", collections=["imported_r", "imported_g"], 

1122 findFirst=True)), 

1123 [ 

1124 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"), 

1125 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r"), 

1126 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r"), 

1127 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

1128 ] 

1129 ) 

1130 

1131 def testQueryResults(self): 

1132 """Test querying for data IDs and then manipulating the QueryResults 

1133 object returned to perform other queries. 

1134 """ 

1135 registry = self.makeRegistry() 

1136 self.loadData(registry, "base.yaml") 

1137 self.loadData(registry, "datasets.yaml") 

1138 bias = registry.getDatasetType("bias") 

1139 flat = registry.getDatasetType("flat") 

1140 # Obtain expected results from methods other than those we're testing 

1141 # here. That includes: 

1142 # - the dimensions of the data IDs we want to query: 

1143 expectedGraph = DimensionGraph(registry.dimensions, names=["detector", "physical_filter"]) 

1144 # - the dimensions of some other data IDs we'll extract from that: 

1145 expectedSubsetGraph = DimensionGraph(registry.dimensions, names=["detector"]) 

1146 # - the data IDs we expect to obtain from the first queries: 

1147 expectedDataIds = DataCoordinateSet( 

1148 { 

1149 DataCoordinate.standardize(instrument="Cam1", detector=d, physical_filter=p, 

1150 universe=registry.dimensions) 

1151 for d, p in itertools.product({1, 2, 3}, {"Cam1-G", "Cam1-R1", "Cam1-R2"}) 

1152 }, 

1153 graph=expectedGraph, 

1154 hasFull=False, 

1155 hasRecords=False, 

1156 ) 

1157 # - the flat datasets we expect to find from those data IDs, in just 

1158 # one collection (so deduplication is irrelevant): 

1159 expectedFlats = [ 

1160 registry.findDataset(flat, instrument="Cam1", detector=1, physical_filter="Cam1-R1", 

1161 collections="imported_r"), 

1162 registry.findDataset(flat, instrument="Cam1", detector=2, physical_filter="Cam1-R1", 

1163 collections="imported_r"), 

1164 registry.findDataset(flat, instrument="Cam1", detector=3, physical_filter="Cam1-R2", 

1165 collections="imported_r"), 

1166 ] 

1167 # - the data IDs we expect to extract from that: 

1168 expectedSubsetDataIds = expectedDataIds.subset(expectedSubsetGraph) 

1169 # - the bias datasets we expect to find from those data IDs, after we 

1170 # subset-out the physical_filter dimension, both with duplicates: 

1171 expectedAllBiases = [ 

1172 registry.findDataset(bias, instrument="Cam1", detector=1, collections="imported_g"), 

1173 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_g"), 

1174 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_g"), 

1175 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_r"), 

1176 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_r"), 

1177 ] 

1178 # - ...and without duplicates: 

1179 expectedDeduplicatedBiases = [ 

1180 registry.findDataset(bias, instrument="Cam1", detector=1, collections="imported_g"), 

1181 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_r"), 

1182 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_r"), 

1183 ] 

1184 # Test against those expected results, using a "lazy" query for the 

1185 # data IDs (which re-executes that query each time we use it to do 

1186 # something new). 

1187 dataIds = registry.queryDataIds( 

1188 ["detector", "physical_filter"], 

1189 where="detector.purpose = 'SCIENCE'", # this rejects detector=4 

1190 instrument="Cam1", 

1191 ) 

1192 self.assertEqual(dataIds.graph, expectedGraph) 

1193 self.assertEqual(dataIds.toSet(), expectedDataIds) 

1194 self.assertCountEqual( 

1195 list( 

1196 dataIds.findDatasets( 

1197 flat, 

1198 collections=["imported_r"], 

1199 ) 

1200 ), 

1201 expectedFlats, 

1202 ) 

1203 subsetDataIds = dataIds.subset(expectedSubsetGraph, unique=True) 

1204 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph) 

1205 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds) 

1206 self.assertCountEqual( 

1207 list( 

1208 subsetDataIds.findDatasets( 

1209 bias, 

1210 collections=["imported_r", "imported_g"], 

1211 findFirst=False 

1212 ) 

1213 ), 

1214 expectedAllBiases 

1215 ) 

1216 self.assertCountEqual( 

1217 list( 

1218 subsetDataIds.findDatasets( 

1219 bias, 

1220 collections=["imported_r", "imported_g"], 

1221 findFirst=True 

1222 ) 

1223 ), expectedDeduplicatedBiases 

1224 ) 

1225 # Materialize the bias dataset queries (only) by putting the results 

1226 # into temporary tables, then repeat those tests. 

1227 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], 

1228 findFirst=False).materialize() as biases: 

1229 self.assertCountEqual(list(biases), expectedAllBiases) 

1230 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], 

1231 findFirst=True).materialize() as biases: 

1232 self.assertCountEqual(list(biases), expectedDeduplicatedBiases) 

1233 # Materialize the data ID subset query, but not the dataset queries. 

1234 with subsetDataIds.materialize() as subsetDataIds: 

1235 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph) 

1236 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds) 

1237 self.assertCountEqual( 

1238 list( 

1239 subsetDataIds.findDatasets( 

1240 bias, 

1241 collections=["imported_r", "imported_g"], 

1242 findFirst=False 

1243 ) 

1244 ), 

1245 expectedAllBiases 

1246 ) 

1247 self.assertCountEqual( 

1248 list( 

1249 subsetDataIds.findDatasets( 

1250 bias, 

1251 collections=["imported_r", "imported_g"], 

1252 findFirst=True 

1253 ) 

1254 ), expectedDeduplicatedBiases 

1255 ) 

1256 # Materialize the dataset queries, too. 

1257 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], 

1258 findFirst=False).materialize() as biases: 

1259 self.assertCountEqual(list(biases), expectedAllBiases) 

1260 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], 

1261 findFirst=True).materialize() as biases: 

1262 self.assertCountEqual(list(biases), expectedDeduplicatedBiases) 

1263 # Materialize the original query, but none of the follow-up queries. 

1264 with dataIds.materialize() as dataIds: 

1265 self.assertEqual(dataIds.graph, expectedGraph) 

1266 self.assertEqual(dataIds.toSet(), expectedDataIds) 

1267 self.assertCountEqual( 

1268 list( 

1269 dataIds.findDatasets( 

1270 flat, 

1271 collections=["imported_r"], 

1272 ) 

1273 ), 

1274 expectedFlats, 

1275 ) 

1276 subsetDataIds = dataIds.subset(expectedSubsetGraph, unique=True) 

1277 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph) 

1278 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds) 

1279 self.assertCountEqual( 

1280 list( 

1281 subsetDataIds.findDatasets( 

1282 bias, 

1283 collections=["imported_r", "imported_g"], 

1284 findFirst=False 

1285 ) 

1286 ), 

1287 expectedAllBiases 

1288 ) 

1289 self.assertCountEqual( 

1290 list( 

1291 subsetDataIds.findDatasets( 

1292 bias, 

1293 collections=["imported_r", "imported_g"], 

1294 findFirst=True 

1295 ) 

1296 ), expectedDeduplicatedBiases 

1297 ) 

1298 # Materialize just the bias dataset queries. 

1299 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], 

1300 findFirst=False).materialize() as biases: 

1301 self.assertCountEqual(list(biases), expectedAllBiases) 

1302 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], 

1303 findFirst=True).materialize() as biases: 

1304 self.assertCountEqual(list(biases), expectedDeduplicatedBiases) 

1305 # Materialize the subset data ID query, but not the dataset 

1306 # queries. 

1307 with subsetDataIds.materialize() as subsetDataIds: 

1308 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph) 

1309 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds) 

1310 self.assertCountEqual( 

1311 list( 

1312 subsetDataIds.findDatasets( 

1313 bias, 

1314 collections=["imported_r", "imported_g"], 

1315 findFirst=False 

1316 ) 

1317 ), 

1318 expectedAllBiases 

1319 ) 

1320 self.assertCountEqual( 

1321 list( 

1322 subsetDataIds.findDatasets( 

1323 bias, 

1324 collections=["imported_r", "imported_g"], 

1325 findFirst=True 

1326 ) 

1327 ), expectedDeduplicatedBiases 

1328 ) 

1329 # Materialize the bias dataset queries, too, so now we're 

1330 # materializing every single step. 

1331 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], 

1332 findFirst=False).materialize() as biases: 

1333 self.assertCountEqual(list(biases), expectedAllBiases) 

1334 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], 

1335 findFirst=True).materialize() as biases: 

1336 self.assertCountEqual(list(biases), expectedDeduplicatedBiases) 

1337 

1338 def testEmptyDimensionsQueries(self): 

1339 """Test Query and QueryResults objects in the case where there are no 

1340 dimensions. 

1341 """ 

1342 # Set up test data: one dataset type, two runs, one dataset in each. 

1343 registry = self.makeRegistry() 

1344 self.loadData(registry, "base.yaml") 

1345 schema = DatasetType("schema", dimensions=registry.dimensions.empty, storageClass="Catalog") 

1346 registry.registerDatasetType(schema) 

1347 dataId = DataCoordinate.makeEmpty(registry.dimensions) 

1348 run1 = "run1" 

1349 run2 = "run2" 

1350 registry.registerRun(run1) 

1351 registry.registerRun(run2) 

1352 (dataset1,) = registry.insertDatasets(schema, dataIds=[dataId], run=run1) 

1353 (dataset2,) = registry.insertDatasets(schema, dataIds=[dataId], run=run2) 

1354 # Query directly for both of the datasets, and each one, one at a time. 

1355 self.assertCountEqual( 

1356 list(registry.queryDatasets(schema, collections=[run1, run2], findFirst=False)), 

1357 [dataset1, dataset2] 

1358 ) 

1359 self.assertEqual( 

1360 list(registry.queryDatasets(schema, collections=[run1, run2], findFirst=True)), 

1361 [dataset1], 

1362 ) 

1363 self.assertEqual( 

1364 list(registry.queryDatasets(schema, collections=[run2, run1], findFirst=True)), 

1365 [dataset2], 

1366 ) 

1367 # Query for data IDs with no dimensions. 

1368 dataIds = registry.queryDataIds([]) 

1369 self.assertEqual( 

1370 dataIds.toSequence(), 

1371 DataCoordinateSequence([dataId], registry.dimensions.empty) 

1372 ) 

1373 # Use queried data IDs to find the datasets. 

1374 self.assertCountEqual( 

1375 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False)), 

1376 [dataset1, dataset2], 

1377 ) 

1378 self.assertEqual( 

1379 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True)), 

1380 [dataset1], 

1381 ) 

1382 self.assertEqual( 

1383 list(dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True)), 

1384 [dataset2], 

1385 ) 

1386 # Now materialize the data ID query results and repeat those tests. 

1387 with dataIds.materialize() as dataIds: 

1388 self.assertEqual( 

1389 dataIds.toSequence(), 

1390 DataCoordinateSequence([dataId], registry.dimensions.empty) 

1391 ) 

1392 self.assertCountEqual( 

1393 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False)), 

1394 [dataset1, dataset2], 

1395 ) 

1396 self.assertEqual( 

1397 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True)), 

1398 [dataset1], 

1399 ) 

1400 self.assertEqual( 

1401 list(dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True)), 

1402 [dataset2], 

1403 ) 

1404 # Query for non-empty data IDs, then subset that to get the empty one. 

1405 # Repeat the above tests starting from that. 

1406 dataIds = registry.queryDataIds(["instrument"]).subset(registry.dimensions.empty, unique=True) 

1407 self.assertEqual( 

1408 dataIds.toSequence(), 

1409 DataCoordinateSequence([dataId], registry.dimensions.empty) 

1410 ) 

1411 self.assertCountEqual( 

1412 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False)), 

1413 [dataset1, dataset2], 

1414 ) 

1415 self.assertEqual( 

1416 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True)), 

1417 [dataset1], 

1418 ) 

1419 self.assertEqual( 

1420 list(dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True)), 

1421 [dataset2], 

1422 ) 

1423 with dataIds.materialize() as dataIds: 

1424 self.assertEqual( 

1425 dataIds.toSequence(), 

1426 DataCoordinateSequence([dataId], registry.dimensions.empty) 

1427 ) 

1428 self.assertCountEqual( 

1429 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False)), 

1430 [dataset1, dataset2], 

1431 ) 

1432 self.assertEqual( 

1433 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True)), 

1434 [dataset1], 

1435 ) 

1436 self.assertEqual( 

1437 list(dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True)), 

1438 [dataset2], 

1439 ) 

1440 # Query for non-empty data IDs, then materialize, then subset to get 

1441 # the empty one. Repeat again. 

1442 with registry.queryDataIds(["instrument"]).materialize() as nonEmptyDataIds: 

1443 dataIds = nonEmptyDataIds.subset(registry.dimensions.empty, unique=True) 

1444 self.assertEqual( 

1445 dataIds.toSequence(), 

1446 DataCoordinateSequence([dataId], registry.dimensions.empty) 

1447 ) 

1448 self.assertCountEqual( 

1449 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False)), 

1450 [dataset1, dataset2], 

1451 ) 

1452 self.assertEqual( 

1453 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True)), 

1454 [dataset1], 

1455 ) 

1456 self.assertEqual( 

1457 list(dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True)), 

1458 [dataset2], 

1459 ) 

1460 with dataIds.materialize() as dataIds: 

1461 self.assertEqual( 

1462 dataIds.toSequence(), 

1463 DataCoordinateSequence([dataId], registry.dimensions.empty) 

1464 ) 

1465 self.assertCountEqual( 

1466 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False)), 

1467 [dataset1, dataset2], 

1468 ) 

1469 self.assertEqual( 

1470 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True)), 

1471 [dataset1], 

1472 ) 

1473 self.assertEqual( 

1474 list(dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True)), 

1475 [dataset2], 

1476 ) 

1477 

1478 def testCalibrationCollections(self): 

1479 """Test operations on `~CollectionType.CALIBRATION` collections, 

1480 including `Registry.certify`, `Registry.decertify`, and 

1481 `Registry.findDataset`. 

1482 """ 

1483 # Setup - make a Registry, fill it with some datasets in 

1484 # non-calibration collections. 

1485 registry = self.makeRegistry() 

1486 self.loadData(registry, "base.yaml") 

1487 self.loadData(registry, "datasets.yaml") 

1488 # Set up some timestamps. 

1489 t1 = astropy.time.Time('2020-01-01T01:00:00', format="isot", scale="tai") 

1490 t2 = astropy.time.Time('2020-01-01T02:00:00', format="isot", scale="tai") 

1491 t3 = astropy.time.Time('2020-01-01T03:00:00', format="isot", scale="tai") 

1492 t4 = astropy.time.Time('2020-01-01T04:00:00', format="isot", scale="tai") 

1493 t5 = astropy.time.Time('2020-01-01T05:00:00', format="isot", scale="tai") 

1494 allTimespans = [ 

1495 Timespan(a, b) for a, b in itertools.combinations([None, t1, t2, t3, t4, t5, None], r=2) 

1496 ] 

1497 # Get references to some datasets. 

1498 bias2a = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g") 

1499 bias3a = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g") 

1500 bias2b = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r") 

1501 bias3b = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r") 

1502 # Register the main calibration collection we'll be working with. 

1503 collection = "Cam1/calibs/default" 

1504 registry.registerCollection(collection, type=CollectionType.CALIBRATION) 

1505 # Cannot associate into a calibration collection (no timespan). 

1506 with self.assertRaises(TypeError): 

1507 registry.associate(collection, [bias2a]) 

1508 # Certify 2a dataset with [t2, t4) validity. 

1509 registry.certify(collection, [bias2a], Timespan(begin=t2, end=t4)) 

1510 # We should not be able to certify 2b with anything overlapping that 

1511 # window. 

1512 with self.assertRaises(ConflictingDefinitionError): 

1513 registry.certify(collection, [bias2b], Timespan(begin=None, end=t3)) 

1514 with self.assertRaises(ConflictingDefinitionError): 

1515 registry.certify(collection, [bias2b], Timespan(begin=None, end=t5)) 

1516 with self.assertRaises(ConflictingDefinitionError): 

1517 registry.certify(collection, [bias2b], Timespan(begin=t1, end=t3)) 

1518 with self.assertRaises(ConflictingDefinitionError): 

1519 registry.certify(collection, [bias2b], Timespan(begin=t1, end=t5)) 

1520 with self.assertRaises(ConflictingDefinitionError): 

1521 registry.certify(collection, [bias2b], Timespan(begin=t1, end=None)) 

1522 with self.assertRaises(ConflictingDefinitionError): 

1523 registry.certify(collection, [bias2b], Timespan(begin=t2, end=t3)) 

1524 with self.assertRaises(ConflictingDefinitionError): 

1525 registry.certify(collection, [bias2b], Timespan(begin=t2, end=t5)) 

1526 with self.assertRaises(ConflictingDefinitionError): 

1527 registry.certify(collection, [bias2b], Timespan(begin=t2, end=None)) 

1528 # We should be able to certify 3a with a range overlapping that window, 

1529 # because it's for a different detector. 

1530 # We'll certify 3a over [t1, t3). 

1531 registry.certify(collection, [bias3a], Timespan(begin=t1, end=t3)) 

1532 # Now we'll certify 2b and 3b together over [t4, ∞). 

1533 registry.certify(collection, [bias2b, bias3b], Timespan(begin=t4, end=None)) 

1534 

1535 # Fetch all associations and check that they are what we expect. 

1536 self.assertCountEqual( 

1537 list( 

1538 registry.queryDatasetAssociations( 

1539 "bias", 

1540 collections=[collection, "imported_g", "imported_r"], 

1541 ) 

1542 ), 

1543 [ 

1544 DatasetAssociation( 

1545 ref=registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"), 

1546 collection="imported_g", 

1547 timespan=None, 

1548 ), 

1549 DatasetAssociation( 

1550 ref=registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

1551 collection="imported_r", 

1552 timespan=None, 

1553 ), 

1554 DatasetAssociation(ref=bias2a, collection="imported_g", timespan=None), 

1555 DatasetAssociation(ref=bias3a, collection="imported_g", timespan=None), 

1556 DatasetAssociation(ref=bias2b, collection="imported_r", timespan=None), 

1557 DatasetAssociation(ref=bias3b, collection="imported_r", timespan=None), 

1558 DatasetAssociation(ref=bias2a, collection=collection, timespan=Timespan(begin=t2, end=t4)), 

1559 DatasetAssociation(ref=bias3a, collection=collection, timespan=Timespan(begin=t1, end=t3)), 

1560 DatasetAssociation(ref=bias2b, collection=collection, timespan=Timespan(begin=t4, end=None)), 

1561 DatasetAssociation(ref=bias3b, collection=collection, timespan=Timespan(begin=t4, end=None)), 

1562 ] 

1563 ) 

1564 

1565 class Ambiguous: 

1566 """Tag class to denote lookups that are expected to be ambiguous. 

1567 """ 

1568 pass 

1569 

1570 def assertLookup(detector: int, timespan: Timespan, 

1571 expected: Optional[Union[DatasetRef, Type[Ambiguous]]]) -> None: 

1572 """Local function that asserts that a bias lookup returns the given 

1573 expected result. 

1574 """ 

1575 if expected is Ambiguous: 

1576 with self.assertRaises(RuntimeError): 

1577 registry.findDataset("bias", collections=collection, instrument="Cam1", 

1578 detector=detector, timespan=timespan) 

1579 else: 

1580 self.assertEqual( 

1581 expected, 

1582 registry.findDataset("bias", collections=collection, instrument="Cam1", 

1583 detector=detector, timespan=timespan) 

1584 ) 

1585 

1586 # Systematically test lookups against expected results. 

1587 assertLookup(detector=2, timespan=Timespan(None, t1), expected=None) 

1588 assertLookup(detector=2, timespan=Timespan(None, t2), expected=None) 

1589 assertLookup(detector=2, timespan=Timespan(None, t3), expected=bias2a) 

1590 assertLookup(detector=2, timespan=Timespan(None, t4), expected=bias2a) 

1591 assertLookup(detector=2, timespan=Timespan(None, t5), expected=Ambiguous) 

1592 assertLookup(detector=2, timespan=Timespan(None, None), expected=Ambiguous) 

1593 assertLookup(detector=2, timespan=Timespan(t1, t2), expected=None) 

1594 assertLookup(detector=2, timespan=Timespan(t1, t3), expected=bias2a) 

1595 assertLookup(detector=2, timespan=Timespan(t1, t4), expected=bias2a) 

1596 assertLookup(detector=2, timespan=Timespan(t1, t5), expected=Ambiguous) 

1597 assertLookup(detector=2, timespan=Timespan(t1, None), expected=Ambiguous) 

1598 assertLookup(detector=2, timespan=Timespan(t2, t3), expected=bias2a) 

1599 assertLookup(detector=2, timespan=Timespan(t2, t4), expected=bias2a) 

1600 assertLookup(detector=2, timespan=Timespan(t2, t5), expected=Ambiguous) 

1601 assertLookup(detector=2, timespan=Timespan(t2, None), expected=Ambiguous) 

1602 assertLookup(detector=2, timespan=Timespan(t3, t4), expected=bias2a) 

1603 assertLookup(detector=2, timespan=Timespan(t3, t5), expected=Ambiguous) 

1604 assertLookup(detector=2, timespan=Timespan(t3, None), expected=Ambiguous) 

1605 assertLookup(detector=2, timespan=Timespan(t4, t5), expected=bias2b) 

1606 assertLookup(detector=2, timespan=Timespan(t4, None), expected=bias2b) 

1607 assertLookup(detector=2, timespan=Timespan(t5, None), expected=bias2b) 

1608 assertLookup(detector=3, timespan=Timespan(None, t1), expected=None) 

1609 assertLookup(detector=3, timespan=Timespan(None, t2), expected=bias3a) 

1610 assertLookup(detector=3, timespan=Timespan(None, t3), expected=bias3a) 

1611 assertLookup(detector=3, timespan=Timespan(None, t4), expected=bias3a) 

1612 assertLookup(detector=3, timespan=Timespan(None, t5), expected=Ambiguous) 

1613 assertLookup(detector=3, timespan=Timespan(None, None), expected=Ambiguous) 

1614 assertLookup(detector=3, timespan=Timespan(t1, t2), expected=bias3a) 

1615 assertLookup(detector=3, timespan=Timespan(t1, t3), expected=bias3a) 

1616 assertLookup(detector=3, timespan=Timespan(t1, t4), expected=bias3a) 

1617 assertLookup(detector=3, timespan=Timespan(t1, t5), expected=Ambiguous) 

1618 assertLookup(detector=3, timespan=Timespan(t1, None), expected=Ambiguous) 

1619 assertLookup(detector=3, timespan=Timespan(t2, t3), expected=bias3a) 

1620 assertLookup(detector=3, timespan=Timespan(t2, t4), expected=bias3a) 

1621 assertLookup(detector=3, timespan=Timespan(t2, t5), expected=Ambiguous) 

1622 assertLookup(detector=3, timespan=Timespan(t2, None), expected=Ambiguous) 

1623 assertLookup(detector=3, timespan=Timespan(t3, t4), expected=None) 

1624 assertLookup(detector=3, timespan=Timespan(t3, t5), expected=bias3b) 

1625 assertLookup(detector=3, timespan=Timespan(t3, None), expected=bias3b) 

1626 assertLookup(detector=3, timespan=Timespan(t4, t5), expected=bias3b) 

1627 assertLookup(detector=3, timespan=Timespan(t4, None), expected=bias3b) 

1628 assertLookup(detector=3, timespan=Timespan(t5, None), expected=bias3b) 

1629 

1630 # Decertify [t3, t5) for all data IDs, and do test lookups again. 

1631 # This should truncate bias2a to [t2, t3), leave bias3a unchanged at 

1632 # [t1, t3), and truncate bias2b and bias3b to [t5, ∞). 

1633 registry.decertify(collection=collection, datasetType="bias", timespan=Timespan(t3, t5)) 

1634 assertLookup(detector=2, timespan=Timespan(None, t1), expected=None) 

1635 assertLookup(detector=2, timespan=Timespan(None, t2), expected=None) 

1636 assertLookup(detector=2, timespan=Timespan(None, t3), expected=bias2a) 

1637 assertLookup(detector=2, timespan=Timespan(None, t4), expected=bias2a) 

1638 assertLookup(detector=2, timespan=Timespan(None, t5), expected=bias2a) 

1639 assertLookup(detector=2, timespan=Timespan(None, None), expected=Ambiguous) 

1640 assertLookup(detector=2, timespan=Timespan(t1, t2), expected=None) 

1641 assertLookup(detector=2, timespan=Timespan(t1, t3), expected=bias2a) 

1642 assertLookup(detector=2, timespan=Timespan(t1, t4), expected=bias2a) 

1643 assertLookup(detector=2, timespan=Timespan(t1, t5), expected=bias2a) 

1644 assertLookup(detector=2, timespan=Timespan(t1, None), expected=Ambiguous) 

1645 assertLookup(detector=2, timespan=Timespan(t2, t3), expected=bias2a) 

1646 assertLookup(detector=2, timespan=Timespan(t2, t4), expected=bias2a) 

1647 assertLookup(detector=2, timespan=Timespan(t2, t5), expected=bias2a) 

1648 assertLookup(detector=2, timespan=Timespan(t2, None), expected=Ambiguous) 

1649 assertLookup(detector=2, timespan=Timespan(t3, t4), expected=None) 

1650 assertLookup(detector=2, timespan=Timespan(t3, t5), expected=None) 

1651 assertLookup(detector=2, timespan=Timespan(t3, None), expected=bias2b) 

1652 assertLookup(detector=2, timespan=Timespan(t4, t5), expected=None) 

1653 assertLookup(detector=2, timespan=Timespan(t4, None), expected=bias2b) 

1654 assertLookup(detector=2, timespan=Timespan(t5, None), expected=bias2b) 

1655 assertLookup(detector=3, timespan=Timespan(None, t1), expected=None) 

1656 assertLookup(detector=3, timespan=Timespan(None, t2), expected=bias3a) 

1657 assertLookup(detector=3, timespan=Timespan(None, t3), expected=bias3a) 

1658 assertLookup(detector=3, timespan=Timespan(None, t4), expected=bias3a) 

1659 assertLookup(detector=3, timespan=Timespan(None, t5), expected=bias3a) 

1660 assertLookup(detector=3, timespan=Timespan(None, None), expected=Ambiguous) 

1661 assertLookup(detector=3, timespan=Timespan(t1, t2), expected=bias3a) 

1662 assertLookup(detector=3, timespan=Timespan(t1, t3), expected=bias3a) 

1663 assertLookup(detector=3, timespan=Timespan(t1, t4), expected=bias3a) 

1664 assertLookup(detector=3, timespan=Timespan(t1, t5), expected=bias3a) 

1665 assertLookup(detector=3, timespan=Timespan(t1, None), expected=Ambiguous) 

1666 assertLookup(detector=3, timespan=Timespan(t2, t3), expected=bias3a) 

1667 assertLookup(detector=3, timespan=Timespan(t2, t4), expected=bias3a) 

1668 assertLookup(detector=3, timespan=Timespan(t2, t5), expected=bias3a) 

1669 assertLookup(detector=3, timespan=Timespan(t2, None), expected=Ambiguous) 

1670 assertLookup(detector=3, timespan=Timespan(t3, t4), expected=None) 

1671 assertLookup(detector=3, timespan=Timespan(t3, t5), expected=None) 

1672 assertLookup(detector=3, timespan=Timespan(t3, None), expected=bias3b) 

1673 assertLookup(detector=3, timespan=Timespan(t4, t5), expected=None) 

1674 assertLookup(detector=3, timespan=Timespan(t4, None), expected=bias3b) 

1675 assertLookup(detector=3, timespan=Timespan(t5, None), expected=bias3b) 

1676 

1677 # Decertify everything, this time with explicit data IDs, then check 

1678 # that no lookups succeed. 

1679 registry.decertify( 

1680 collection, "bias", Timespan(None, None), 

1681 dataIds=[ 

1682 dict(instrument="Cam1", detector=2), 

1683 dict(instrument="Cam1", detector=3), 

1684 ] 

1685 ) 

1686 for detector in (2, 3): 

1687 for timespan in allTimespans: 

1688 assertLookup(detector=detector, timespan=timespan, expected=None) 

1689 # Certify bias2a and bias3a over (-∞, ∞), check that all lookups return 

1690 # those. 

1691 registry.certify(collection, [bias2a, bias3a], Timespan(None, None),) 

1692 for timespan in allTimespans: 

1693 assertLookup(detector=2, timespan=timespan, expected=bias2a) 

1694 assertLookup(detector=3, timespan=timespan, expected=bias3a) 

1695 # Decertify just bias2 over [t2, t4). 

1696 # This should split a single certification row into two (and leave the 

1697 # other existing row, for bias3a, alone). 

1698 registry.decertify(collection, "bias", Timespan(t2, t4), 

1699 dataIds=[dict(instrument="Cam1", detector=2)]) 

1700 for timespan in allTimespans: 

1701 assertLookup(detector=3, timespan=timespan, expected=bias3a) 

1702 overlapsBefore = timespan.overlaps(Timespan(None, t2)) 

1703 overlapsAfter = timespan.overlaps(Timespan(t4, None)) 

1704 if overlapsBefore and overlapsAfter: 

1705 expected = Ambiguous 

1706 elif overlapsBefore or overlapsAfter: 

1707 expected = bias2a 

1708 else: 

1709 expected = None 

1710 assertLookup(detector=2, timespan=timespan, expected=expected) 

1711 

1712 def testIngestTimeQuery(self): 

1713 

1714 registry = self.makeRegistry() 

1715 self.loadData(registry, "base.yaml") 

1716 self.loadData(registry, "datasets.yaml") 

1717 

1718 datasets = list(registry.queryDatasets(..., collections=...)) 

1719 len0 = len(datasets) 

1720 self.assertGreater(len0, 0) 

1721 

1722 where = "ingest_date > T'2000-01-01'" 

1723 datasets = list(registry.queryDatasets(..., collections=..., where=where)) 

1724 len1 = len(datasets) 

1725 self.assertEqual(len0, len1) 

1726 

1727 # no one will ever use this piece of software in 30 years 

1728 where = "ingest_date > T'2050-01-01'" 

1729 datasets = list(registry.queryDatasets(..., collections=..., where=where)) 

1730 len2 = len(datasets) 

1731 self.assertEqual(len2, 0) 

1732 

1733 def testTimespanQueries(self): 

1734 """Test query expressions involving timespans. 

1735 """ 

1736 registry = self.makeRegistry() 

1737 self.loadData(registry, "hsc-rc2-subset.yaml") 

1738 # All exposures in the database; mapping from ID to timespan. 

1739 visits = {record.id: record.timespan for record in registry.queryDimensionRecords("visit")} 

1740 # Just those IDs, sorted (which is also temporal sorting, because HSC 

1741 # exposure IDs are monotonically increasing). 

1742 ids = sorted(visits.keys()) 

1743 self.assertGreater(len(ids), 20) 

1744 # Pick some quasi-random indexes into `ids` to play with. 

1745 i1 = int(len(ids)*0.1) 

1746 i2 = int(len(ids)*0.3) 

1747 i3 = int(len(ids)*0.6) 

1748 i4 = int(len(ids)*0.8) 

1749 # Extract some times from those: just before the beginning of i1 (which 

1750 # should be after the end of the exposure before), exactly the 

1751 # beginning of i2, just after the beginning of i3 (and before its end), 

1752 # and the exact end of i4. 

1753 t1 = visits[ids[i1]].begin - astropy.time.TimeDelta(1.0, format="sec") 

1754 self.assertGreater(t1, visits[ids[i1 - 1]].end) 

1755 t2 = visits[ids[i2]].begin 

1756 t3 = visits[ids[i3]].begin + astropy.time.TimeDelta(1.0, format="sec") 

1757 self.assertLess(t3, visits[ids[i3]].end) 

1758 t4 = visits[ids[i4]].end 

1759 # Make sure those are actually in order. 

1760 self.assertEqual([t1, t2, t3, t4], sorted([t4, t3, t2, t1])) 

1761 

1762 bind = { 

1763 "t1": t1, 

1764 "t2": t2, 

1765 "t3": t3, 

1766 "t4": t4, 

1767 "ts23": Timespan(t2, t3), 

1768 } 

1769 

1770 def query(where): 

1771 """Helper function that queries for visit data IDs and returns 

1772 results as a sorted, deduplicated list of visit IDs. 

1773 """ 

1774 return sorted( 

1775 {dataId["visit"] for dataId in registry.queryDataIds("visit", 

1776 instrument="HSC", 

1777 bind=bind, 

1778 where=where)} 

1779 ) 

1780 

1781 # Try a bunch of timespan queries, mixing up the bounds themselves, 

1782 # where they appear in the expression, and how we get the timespan into 

1783 # the expression. 

1784 

1785 # t1 is before the start of i1, so this should not include i1. 

1786 self.assertEqual(ids[:i1], query("visit.timespan OVERLAPS (null, t1)")) 

1787 # t2 is exactly at the start of i2, but ends are exclusive, so these 

1788 # should not include i2. 

1789 self.assertEqual(ids[i1:i2], query("(t1, t2) OVERLAPS visit.timespan")) 

1790 self.assertEqual(ids[:i2], query("visit.timespan < (t2, t4)")) 

1791 # t3 is in the middle of i3, so this should include i3. 

1792 self.assertEqual(ids[i2:i3 + 1], query("visit.timespan OVERLAPS ts23")) 

1793 # This one should not include t3 by the same reasoning. 

1794 self.assertEqual(ids[i3 + 1:], query("visit.timespan > (t1, t3)")) 

1795 # t4 is exactly at the end of i4, so this should include i4. 

1796 self.assertEqual(ids[i3:i4 + 1], query(f"visit.timespan OVERLAPS (T'{t3.tai.isot}', t4)")) 

1797 # i4's upper bound of t4 is exclusive so this should not include t4. 

1798 self.assertEqual(ids[i4 + 1:], query("visit.timespan OVERLAPS (t4, NULL)")) 

1799 

1800 # Now some timespan vs. time scalar queries. 

1801 self.assertEqual(ids[:i2], query("visit.timespan < t2")) 

1802 self.assertEqual(ids[:i2], query("t2 > visit.timespan")) 

1803 self.assertEqual(ids[i3 + 1:], query("visit.timespan > t3")) 

1804 self.assertEqual(ids[i3 + 1:], query("t3 < visit.timespan")) 

1805 self.assertEqual(ids[i3:i3+1], query("visit.timespan OVERLAPS t3")) 

1806 self.assertEqual(ids[i3:i3+1], query(f"T'{t3.tai.isot}' OVERLAPS visit.timespan")) 

1807 

1808 # Empty timespans should not overlap anything. 

1809 self.assertEqual([], query("visit.timespan OVERLAPS (t3, t2)")) 

1810 

1811 def testCollectionSummaries(self): 

1812 """Test recording and retrieval of collection summaries. 

1813 """ 

1814 self.maxDiff = None 

1815 registry = self.makeRegistry() 

1816 # Importing datasets from yaml should go through the code path where 

1817 # we update collection summaries as we insert datasets. 

1818 self.loadData(registry, "base.yaml") 

1819 self.loadData(registry, "datasets.yaml") 

1820 flat = registry.getDatasetType("flat") 

1821 expected1 = CollectionSummary.makeEmpty(registry.dimensions) 

1822 expected1.datasetTypes.add(registry.getDatasetType("bias")) 

1823 expected1.datasetTypes.add(flat) 

1824 expected1.dimensions.update_extract( 

1825 DataCoordinate.standardize(instrument="Cam1", universe=registry.dimensions) 

1826 ) 

1827 self.assertEqual(registry.getCollectionSummary("imported_g"), expected1) 

1828 self.assertEqual(registry.getCollectionSummary("imported_r"), expected1) 

1829 # Create a chained collection with both of the imported runs; the 

1830 # summary should be the same, because it's a union with itself. 

1831 chain = "chain" 

1832 registry.registerCollection(chain, CollectionType.CHAINED) 

1833 registry.setCollectionChain(chain, ["imported_r", "imported_g"]) 

1834 self.assertEqual(registry.getCollectionSummary(chain), expected1) 

1835 # Associate flats only into a tagged collection and a calibration 

1836 # collection to check summaries of those. 

1837 tag = "tag" 

1838 registry.registerCollection(tag, CollectionType.TAGGED) 

1839 registry.associate(tag, registry.queryDatasets(flat, collections="imported_g")) 

1840 calibs = "calibs" 

1841 registry.registerCollection(calibs, CollectionType.CALIBRATION) 

1842 registry.certify(calibs, registry.queryDatasets(flat, collections="imported_g"), 

1843 timespan=Timespan(None, None)) 

1844 expected2 = expected1.copy() 

1845 expected2.datasetTypes.discard("bias") 

1846 self.assertEqual(registry.getCollectionSummary(tag), expected2) 

1847 self.assertEqual(registry.getCollectionSummary(calibs), expected2) 

1848 # Explicitly calling Registry.refresh() should load those same 

1849 # summaries, via a totally different code path. 

1850 registry.refresh() 

1851 self.assertEqual(registry.getCollectionSummary("imported_g"), expected1) 

1852 self.assertEqual(registry.getCollectionSummary("imported_r"), expected1) 

1853 self.assertEqual(registry.getCollectionSummary(tag), expected2) 

1854 self.assertEqual(registry.getCollectionSummary(calibs), expected2) 

1855 

1856 def testUnrelatedDimensionQueries(self): 

1857 """Test that WHERE expressions in queries can reference dimensions that 

1858 are not in the result set. 

1859 """ 

1860 registry = self.makeRegistry() 

1861 # There is no data to back this query, but it should still return 

1862 # zero records instead of raising. 

1863 self.assertFalse( 

1864 set(registry.queryDataIds(["visit", "detector"], 

1865 where="instrument='Cam1' AND skymap='not_here' AND tract=0")), 

1866 )