Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = ["RegistryTests"] 

24 

25from abc import ABC, abstractmethod 

26from collections import defaultdict 

27import itertools 

28import logging 

29import os 

30import re 

31import unittest 

32 

33import astropy.time 

34import sqlalchemy 

35from typing import Optional, Type, Union 

36 

37try: 

38 import numpy as np 

39except ImportError: 

40 np = None 

41 

42from ...core import ( 

43 DataCoordinate, 

44 DataCoordinateSequence, 

45 DataCoordinateSet, 

46 DatasetAssociation, 

47 DatasetRef, 

48 DatasetType, 

49 DimensionGraph, 

50 NamedValueSet, 

51 StorageClass, 

52 ddl, 

53 Timespan, 

54) 

55from .._registry import ( 

56 CollectionSummary, 

57 CollectionType, 

58 ConflictingDefinitionError, 

59 InconsistentDataIdError, 

60 OrphanedRecordError, 

61 Registry, 

62 RegistryConfig, 

63) 

64from .._exceptions import MissingCollectionError 

65from ..interfaces import ButlerAttributeExistsError 

66 

67 

68class RegistryTests(ABC): 

69 """Generic tests for the `Registry` class that can be subclassed to 

70 generate tests for different configurations. 

71 """ 

72 

73 collectionsManager: Optional[str] = None 

74 """Name of the collections manager class, if subclass provides value for 

75 this member then it overrides name specified in default configuration 

76 (`str`). 

77 """ 

78 

79 @classmethod 

80 @abstractmethod 

81 def getDataDir(cls) -> str: 

82 """Return the root directory containing test data YAML files. 

83 """ 

84 raise NotImplementedError() 

85 

86 def makeRegistryConfig(self) -> RegistryConfig: 

87 """Create RegistryConfig used to create a registry. 

88 

89 This method should be called by a subclass from `makeRegistry`. 

90 Returned instance will be pre-configured based on the values of class 

91 members, and default-configured for all other parametrs. Subclasses 

92 that need default configuration should just instantiate 

93 `RegistryConfig` directly. 

94 """ 

95 config = RegistryConfig() 

96 if self.collectionsManager: 

97 config["managers"]["collections"] = self.collectionsManager 

98 return config 

99 

100 @abstractmethod 

101 def makeRegistry(self) -> Registry: 

102 """Return the Registry instance to be tested. 

103 """ 

104 raise NotImplementedError() 

105 

106 def loadData(self, registry: Registry, filename: str): 

107 """Load registry test data from ``getDataDir/<filename>``, 

108 which should be a YAML import/export file. 

109 """ 

110 from ...transfers import YamlRepoImportBackend 

111 with open(os.path.join(self.getDataDir(), filename), 'r') as stream: 

112 backend = YamlRepoImportBackend(stream, registry) 

113 backend.register() 

114 backend.load(datastore=None) 

115 

116 def testOpaque(self): 

117 """Tests for `Registry.registerOpaqueTable`, 

118 `Registry.insertOpaqueData`, `Registry.fetchOpaqueData`, and 

119 `Registry.deleteOpaqueData`. 

120 """ 

121 registry = self.makeRegistry() 

122 table = "opaque_table_for_testing" 

123 registry.registerOpaqueTable( 

124 table, 

125 spec=ddl.TableSpec( 

126 fields=[ 

127 ddl.FieldSpec("id", dtype=sqlalchemy.BigInteger, primaryKey=True), 

128 ddl.FieldSpec("name", dtype=sqlalchemy.String, length=16, nullable=False), 

129 ddl.FieldSpec("count", dtype=sqlalchemy.SmallInteger, nullable=True), 

130 ], 

131 ) 

132 ) 

133 rows = [ 

134 {"id": 1, "name": "one", "count": None}, 

135 {"id": 2, "name": "two", "count": 5}, 

136 {"id": 3, "name": "three", "count": 6}, 

137 ] 

138 registry.insertOpaqueData(table, *rows) 

139 self.assertCountEqual(rows, list(registry.fetchOpaqueData(table))) 

140 self.assertEqual(rows[0:1], list(registry.fetchOpaqueData(table, id=1))) 

141 self.assertEqual(rows[1:2], list(registry.fetchOpaqueData(table, name="two"))) 

142 self.assertEqual([], list(registry.fetchOpaqueData(table, id=1, name="two"))) 

143 registry.deleteOpaqueData(table, id=3) 

144 self.assertCountEqual(rows[:2], list(registry.fetchOpaqueData(table))) 

145 registry.deleteOpaqueData(table) 

146 self.assertEqual([], list(registry.fetchOpaqueData(table))) 

147 

148 def testDatasetType(self): 

149 """Tests for `Registry.registerDatasetType` and 

150 `Registry.getDatasetType`. 

151 """ 

152 registry = self.makeRegistry() 

153 # Check valid insert 

154 datasetTypeName = "test" 

155 storageClass = StorageClass("testDatasetType") 

156 registry.storageClasses.registerStorageClass(storageClass) 

157 dimensions = registry.dimensions.extract(("instrument", "visit")) 

158 differentDimensions = registry.dimensions.extract(("instrument", "patch")) 

159 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

160 # Inserting for the first time should return True 

161 self.assertTrue(registry.registerDatasetType(inDatasetType)) 

162 outDatasetType1 = registry.getDatasetType(datasetTypeName) 

163 self.assertEqual(outDatasetType1, inDatasetType) 

164 

165 # Re-inserting should work 

166 self.assertFalse(registry.registerDatasetType(inDatasetType)) 

167 # Except when they are not identical 

168 with self.assertRaises(ConflictingDefinitionError): 

169 nonIdenticalDatasetType = DatasetType(datasetTypeName, differentDimensions, storageClass) 

170 registry.registerDatasetType(nonIdenticalDatasetType) 

171 

172 # Template can be None 

173 datasetTypeName = "testNoneTemplate" 

174 storageClass = StorageClass("testDatasetType2") 

175 registry.storageClasses.registerStorageClass(storageClass) 

176 dimensions = registry.dimensions.extract(("instrument", "visit")) 

177 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

178 registry.registerDatasetType(inDatasetType) 

179 outDatasetType2 = registry.getDatasetType(datasetTypeName) 

180 self.assertEqual(outDatasetType2, inDatasetType) 

181 

182 allTypes = set(registry.queryDatasetTypes()) 

183 self.assertEqual(allTypes, {outDatasetType1, outDatasetType2}) 

184 

185 def testDimensions(self): 

186 """Tests for `Registry.insertDimensionData`, 

187 `Registry.syncDimensionData`, and `Registry.expandDataId`. 

188 """ 

189 registry = self.makeRegistry() 

190 dimensionName = "instrument" 

191 dimension = registry.dimensions[dimensionName] 

192 dimensionValue = {"name": "DummyCam", "visit_max": 10, "exposure_max": 10, "detector_max": 2, 

193 "class_name": "lsst.obs.base.Instrument"} 

194 registry.insertDimensionData(dimensionName, dimensionValue) 

195 # Inserting the same value twice should fail 

196 with self.assertRaises(sqlalchemy.exc.IntegrityError): 

197 registry.insertDimensionData(dimensionName, dimensionValue) 

198 # expandDataId should retrieve the record we just inserted 

199 self.assertEqual( 

200 registry.expandDataId( 

201 instrument="DummyCam", 

202 graph=dimension.graph 

203 ).records[dimensionName].toDict(), 

204 dimensionValue 

205 ) 

206 # expandDataId should raise if there is no record with the given ID. 

207 with self.assertRaises(LookupError): 

208 registry.expandDataId({"instrument": "Unknown"}, graph=dimension.graph) 

209 # band doesn't have a table; insert should fail. 

210 with self.assertRaises(TypeError): 

211 registry.insertDimensionData("band", {"band": "i"}) 

212 dimensionName2 = "physical_filter" 

213 dimension2 = registry.dimensions[dimensionName2] 

214 dimensionValue2 = {"name": "DummyCam_i", "band": "i"} 

215 # Missing required dependency ("instrument") should fail 

216 with self.assertRaises(KeyError): 

217 registry.insertDimensionData(dimensionName2, dimensionValue2) 

218 # Adding required dependency should fix the failure 

219 dimensionValue2["instrument"] = "DummyCam" 

220 registry.insertDimensionData(dimensionName2, dimensionValue2) 

221 # expandDataId should retrieve the record we just inserted. 

222 self.assertEqual( 

223 registry.expandDataId( 

224 instrument="DummyCam", physical_filter="DummyCam_i", 

225 graph=dimension2.graph 

226 ).records[dimensionName2].toDict(), 

227 dimensionValue2 

228 ) 

229 # Use syncDimensionData to insert a new record successfully. 

230 dimensionName3 = "detector" 

231 dimensionValue3 = {"instrument": "DummyCam", "id": 1, "full_name": "one", 

232 "name_in_raft": "zero", "purpose": "SCIENCE"} 

233 self.assertTrue(registry.syncDimensionData(dimensionName3, dimensionValue3)) 

234 # Sync that again. Note that one field ("raft") is NULL, and that 

235 # should be okay. 

236 self.assertFalse(registry.syncDimensionData(dimensionName3, dimensionValue3)) 

237 # Now try that sync with the same primary key but a different value. 

238 # This should fail. 

239 with self.assertRaises(ConflictingDefinitionError): 

240 registry.syncDimensionData( 

241 dimensionName3, 

242 {"instrument": "DummyCam", "id": 1, "full_name": "one", 

243 "name_in_raft": "four", "purpose": "SCIENCE"} 

244 ) 

245 

246 @unittest.skipIf(np is None, "numpy not available.") 

247 def testNumpyDataId(self): 

248 """Test that we can use a numpy int in a dataId.""" 

249 registry = self.makeRegistry() 

250 dimensionEntries = [ 

251 ("instrument", {"instrument": "DummyCam"}), 

252 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}), 

253 # Using an np.int64 here fails unless Records.fromDict is also 

254 # patched to look for numbers.Integral 

255 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}), 

256 ] 

257 for args in dimensionEntries: 

258 registry.insertDimensionData(*args) 

259 

260 # Try a normal integer and something that looks like an int but 

261 # is not. 

262 for visit_id in (42, np.int64(42)): 

263 with self.subTest(visit_id=visit_id, id_type=type(visit_id).__name__): 

264 expanded = registry.expandDataId({"instrument": "DummyCam", "visit": visit_id}) 

265 self.assertEqual(expanded["visit"], int(visit_id)) 

266 self.assertIsInstance(expanded["visit"], int) 

267 

268 def testDataIdRelationships(self): 

269 """Test that `Registry.expandDataId` raises an exception when the given 

270 keys are inconsistent. 

271 """ 

272 registry = self.makeRegistry() 

273 self.loadData(registry, "base.yaml") 

274 # Insert a few more dimension records for the next test. 

275 registry.insertDimensionData( 

276 "exposure", 

277 {"instrument": "Cam1", "id": 1, "obs_id": "one", "physical_filter": "Cam1-G"}, 

278 ) 

279 registry.insertDimensionData( 

280 "exposure", 

281 {"instrument": "Cam1", "id": 2, "obs_id": "two", "physical_filter": "Cam1-G"}, 

282 ) 

283 registry.insertDimensionData( 

284 "visit_system", 

285 {"instrument": "Cam1", "id": 0, "name": "one-to-one"}, 

286 ) 

287 registry.insertDimensionData( 

288 "visit", 

289 {"instrument": "Cam1", "id": 1, "name": "one", "physical_filter": "Cam1-G", "visit_system": 0}, 

290 ) 

291 registry.insertDimensionData( 

292 "visit_definition", 

293 {"instrument": "Cam1", "visit": 1, "exposure": 1, "visit_system": 0}, 

294 ) 

295 with self.assertRaises(InconsistentDataIdError): 

296 registry.expandDataId( 

297 {"instrument": "Cam1", "visit": 1, "exposure": 2}, 

298 ) 

299 

300 def testDataset(self): 

301 """Basic tests for `Registry.insertDatasets`, `Registry.getDataset`, 

302 and `Registry.removeDatasets`. 

303 """ 

304 registry = self.makeRegistry() 

305 self.loadData(registry, "base.yaml") 

306 run = "test" 

307 registry.registerRun(run) 

308 datasetType = registry.getDatasetType("bias") 

309 dataId = {"instrument": "Cam1", "detector": 2} 

310 ref, = registry.insertDatasets(datasetType, dataIds=[dataId], run=run) 

311 outRef = registry.getDataset(ref.id) 

312 self.assertIsNotNone(ref.id) 

313 self.assertEqual(ref, outRef) 

314 with self.assertRaises(ConflictingDefinitionError): 

315 registry.insertDatasets(datasetType, dataIds=[dataId], run=run) 

316 registry.removeDatasets([ref]) 

317 self.assertIsNone(registry.findDataset(datasetType, dataId, collections=[run])) 

318 

319 def testFindDataset(self): 

320 """Tests for `Registry.findDataset`. 

321 """ 

322 registry = self.makeRegistry() 

323 self.loadData(registry, "base.yaml") 

324 run = "test" 

325 datasetType = registry.getDatasetType("bias") 

326 dataId = {"instrument": "Cam1", "detector": 4} 

327 registry.registerRun(run) 

328 inputRef, = registry.insertDatasets(datasetType, dataIds=[dataId], run=run) 

329 outputRef = registry.findDataset(datasetType, dataId, collections=[run]) 

330 self.assertEqual(outputRef, inputRef) 

331 # Check that retrieval with invalid dataId raises 

332 with self.assertRaises(LookupError): 

333 dataId = {"instrument": "Cam1"} # no detector 

334 registry.findDataset(datasetType, dataId, collections=run) 

335 # Check that different dataIds match to different datasets 

336 dataId1 = {"instrument": "Cam1", "detector": 1} 

337 inputRef1, = registry.insertDatasets(datasetType, dataIds=[dataId1], run=run) 

338 dataId2 = {"instrument": "Cam1", "detector": 2} 

339 inputRef2, = registry.insertDatasets(datasetType, dataIds=[dataId2], run=run) 

340 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef1) 

341 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef2) 

342 self.assertNotEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef2) 

343 self.assertNotEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef1) 

344 # Check that requesting a non-existing dataId returns None 

345 nonExistingDataId = {"instrument": "Cam1", "detector": 3} 

346 self.assertIsNone(registry.findDataset(datasetType, nonExistingDataId, collections=run)) 

347 

348 def testRemoveDatasetTypeSuccess(self): 

349 """Test that Registry.removeDatasetType works when there are no 

350 datasets of that type present. 

351 """ 

352 registry = self.makeRegistry() 

353 self.loadData(registry, "base.yaml") 

354 registry.removeDatasetType("flat") 

355 with self.assertRaises(KeyError): 

356 registry.getDatasetType("flat") 

357 

358 def testRemoveDatasetTypeFailure(self): 

359 """Test that Registry.removeDatasetType raises when there are datasets 

360 of that type present or if the dataset type is for a component. 

361 """ 

362 registry = self.makeRegistry() 

363 self.loadData(registry, "base.yaml") 

364 self.loadData(registry, "datasets.yaml") 

365 with self.assertRaises(OrphanedRecordError): 

366 registry.removeDatasetType("flat") 

367 with self.assertRaises(ValueError): 

368 registry.removeDatasetType(DatasetType.nameWithComponent("flat", "image")) 

369 

370 def testDatasetTypeComponentQueries(self): 

371 """Test component options when querying for dataset types. 

372 """ 

373 registry = self.makeRegistry() 

374 self.loadData(registry, "base.yaml") 

375 self.loadData(registry, "datasets.yaml") 

376 # Test querying for dataset types with different inputs. 

377 # First query for all dataset types; components should only be included 

378 # when components=True. 

379 self.assertEqual( 

380 {"bias", "flat"}, 

381 NamedValueSet(registry.queryDatasetTypes()).names 

382 ) 

383 self.assertEqual( 

384 {"bias", "flat"}, 

385 NamedValueSet(registry.queryDatasetTypes(components=False)).names 

386 ) 

387 self.assertLess( 

388 {"bias", "flat", "bias.wcs", "flat.photoCalib"}, 

389 NamedValueSet(registry.queryDatasetTypes(components=True)).names 

390 ) 

391 # Use a pattern that can match either parent or components. Again, 

392 # components are only returned if components=True. 

393 self.assertEqual( 

394 {"bias"}, 

395 NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"))).names 

396 ) 

397 self.assertEqual( 

398 {"bias"}, 

399 NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"), components=False)).names 

400 ) 

401 self.assertLess( 

402 {"bias", "bias.wcs"}, 

403 NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"), components=True)).names 

404 ) 

405 # This pattern matches only a component. In this case we also return 

406 # that component dataset type if components=None. 

407 self.assertEqual( 

408 {"bias.wcs"}, 

409 NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"))).names 

410 ) 

411 self.assertEqual( 

412 set(), 

413 NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"), components=False)).names 

414 ) 

415 self.assertEqual( 

416 {"bias.wcs"}, 

417 NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"), components=True)).names 

418 ) 

419 # Add a dataset type using a StorageClass that we'll then remove; check 

420 # that this does not affect our ability to query for dataset types 

421 # (though it will warn). 

422 tempStorageClass = StorageClass( 

423 name="TempStorageClass", 

424 components={"data", registry.storageClasses.getStorageClass("StructuredDataDict")} 

425 ) 

426 registry.storageClasses.registerStorageClass(tempStorageClass) 

427 datasetType = DatasetType("temporary", dimensions=["instrument"], storageClass=tempStorageClass, 

428 universe=registry.dimensions) 

429 registry.registerDatasetType(datasetType) 

430 registry.storageClasses._unregisterStorageClass(tempStorageClass.name) 

431 datasetType._storageClass = None 

432 del tempStorageClass 

433 # Querying for all dataset types, including components, should include 

434 # at least all non-component dataset types (and I don't want to 

435 # enumerate all of the Exposure components for bias and flat here). 

436 with self.assertLogs("lsst.daf.butler.registry._registry", logging.WARN) as cm: 

437 everything = NamedValueSet(registry.queryDatasetTypes(components=True)) 

438 self.assertIn("TempStorageClass", cm.output[0]) 

439 self.assertLess({"bias", "flat", "temporary"}, everything.names) 

440 # It should not include "temporary.columns", because we tried to remove 

441 # the storage class that would tell it about that. So if the next line 

442 # fails (i.e. "temporary.columns" _is_ in everything.names), it means 

443 # this part of the test isn't doing anything, because the _unregister 

444 # call about isn't simulating the real-life case we want it to 

445 # simulate, in which different versions of daf_butler in entirely 

446 # different Python processes interact with the same repo. 

447 self.assertNotIn("temporary.data", everything.names) 

448 # Query for dataset types that start with "temp". This should again 

449 # not include the component, and also not fail. 

450 with self.assertLogs("lsst.daf.butler.registry._registry", logging.WARN) as cm: 

451 startsWithTemp = NamedValueSet(registry.queryDatasetTypes(re.compile("temp.*"))) 

452 self.assertIn("TempStorageClass", cm.output[0]) 

453 self.assertEqual({"temporary"}, startsWithTemp.names) 

454 

455 def testComponentLookups(self): 

456 """Test searching for component datasets via their parents. 

457 """ 

458 registry = self.makeRegistry() 

459 self.loadData(registry, "base.yaml") 

460 self.loadData(registry, "datasets.yaml") 

461 # Test getting the child dataset type (which does still exist in the 

462 # Registry), and check for consistency with 

463 # DatasetRef.makeComponentRef. 

464 collection = "imported_g" 

465 parentType = registry.getDatasetType("bias") 

466 childType = registry.getDatasetType("bias.wcs") 

467 parentRefResolved = registry.findDataset(parentType, collections=collection, 

468 instrument="Cam1", detector=1) 

469 self.assertIsInstance(parentRefResolved, DatasetRef) 

470 self.assertEqual(childType, parentRefResolved.makeComponentRef("wcs").datasetType) 

471 # Search for a single dataset with findDataset. 

472 childRef1 = registry.findDataset("bias.wcs", collections=collection, 

473 dataId=parentRefResolved.dataId) 

474 self.assertEqual(childRef1, parentRefResolved.makeComponentRef("wcs")) 

475 # Search for detector data IDs constrained by component dataset 

476 # existence with queryDataIds. 

477 dataIds = registry.queryDataIds( 

478 ["detector"], 

479 datasets=["bias.wcs"], 

480 collections=collection, 

481 ).toSet() 

482 self.assertEqual( 

483 dataIds, 

484 DataCoordinateSet( 

485 { 

486 DataCoordinate.standardize(instrument="Cam1", detector=d, graph=parentType.dimensions) 

487 for d in (1, 2, 3) 

488 }, 

489 parentType.dimensions, 

490 ) 

491 ) 

492 # Search for multiple datasets of a single type with queryDatasets. 

493 childRefs2 = set(registry.queryDatasets( 

494 "bias.wcs", 

495 collections=collection, 

496 )) 

497 self.assertEqual( 

498 {ref.unresolved() for ref in childRefs2}, 

499 {DatasetRef(childType, dataId) for dataId in dataIds} 

500 ) 

501 

502 def testCollections(self): 

503 """Tests for registry methods that manage collections. 

504 """ 

505 registry = self.makeRegistry() 

506 self.loadData(registry, "base.yaml") 

507 self.loadData(registry, "datasets.yaml") 

508 run1 = "imported_g" 

509 run2 = "imported_r" 

510 # Test setting a collection docstring after it has been created. 

511 registry.setCollectionDocumentation(run1, "doc for run1") 

512 self.assertEqual(registry.getCollectionDocumentation(run1), "doc for run1") 

513 registry.setCollectionDocumentation(run1, None) 

514 self.assertIsNone(registry.getCollectionDocumentation(run1)) 

515 datasetType = "bias" 

516 # Find some datasets via their run's collection. 

517 dataId1 = {"instrument": "Cam1", "detector": 1} 

518 ref1 = registry.findDataset(datasetType, dataId1, collections=run1) 

519 self.assertIsNotNone(ref1) 

520 dataId2 = {"instrument": "Cam1", "detector": 2} 

521 ref2 = registry.findDataset(datasetType, dataId2, collections=run1) 

522 self.assertIsNotNone(ref2) 

523 # Associate those into a new collection,then look for them there. 

524 tag1 = "tag1" 

525 registry.registerCollection(tag1, type=CollectionType.TAGGED, doc="doc for tag1") 

526 self.assertEqual(registry.getCollectionDocumentation(tag1), "doc for tag1") 

527 registry.associate(tag1, [ref1, ref2]) 

528 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1) 

529 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

530 # Disassociate one and verify that we can't it there anymore... 

531 registry.disassociate(tag1, [ref1]) 

532 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=tag1)) 

533 # ...but we can still find ref2 in tag1, and ref1 in the run. 

534 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run1), ref1) 

535 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

536 collections = set(registry.queryCollections()) 

537 self.assertEqual(collections, {run1, run2, tag1}) 

538 # Associate both refs into tag1 again; ref2 is already there, but that 

539 # should be a harmless no-op. 

540 registry.associate(tag1, [ref1, ref2]) 

541 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1) 

542 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

543 # Get a different dataset (from a different run) that has the same 

544 # dataset type and data ID as ref2. 

545 ref2b = registry.findDataset(datasetType, dataId2, collections=run2) 

546 self.assertNotEqual(ref2, ref2b) 

547 # Attempting to associate that into tag1 should be an error. 

548 with self.assertRaises(ConflictingDefinitionError): 

549 registry.associate(tag1, [ref2b]) 

550 # That error shouldn't have messed up what we had before. 

551 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1) 

552 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

553 # Attempt to associate the conflicting dataset again, this time with 

554 # a dataset that isn't in the collection and won't cause a conflict. 

555 # Should also fail without modifying anything. 

556 dataId3 = {"instrument": "Cam1", "detector": 3} 

557 ref3 = registry.findDataset(datasetType, dataId3, collections=run1) 

558 with self.assertRaises(ConflictingDefinitionError): 

559 registry.associate(tag1, [ref3, ref2b]) 

560 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1) 

561 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

562 self.assertIsNone(registry.findDataset(datasetType, dataId3, collections=tag1)) 

563 # Register a chained collection that searches [tag1, run2] 

564 chain1 = "chain1" 

565 registry.registerCollection(chain1, type=CollectionType.CHAINED) 

566 self.assertIs(registry.getCollectionType(chain1), CollectionType.CHAINED) 

567 # Chained collection exists, but has no collections in it. 

568 self.assertFalse(registry.getCollectionChain(chain1)) 

569 # If we query for all collections, we should get the chained collection 

570 # only if we don't ask to flatten it (i.e. yield only its children). 

571 self.assertEqual(set(registry.queryCollections(flattenChains=False)), {tag1, run1, run2, chain1}) 

572 self.assertEqual(set(registry.queryCollections(flattenChains=True)), {tag1, run1, run2}) 

573 # Attempt to set its child collections to something circular; that 

574 # should fail. 

575 with self.assertRaises(ValueError): 

576 registry.setCollectionChain(chain1, [tag1, chain1]) 

577 # Add the child collections. 

578 registry.setCollectionChain(chain1, [tag1, run2]) 

579 self.assertEqual( 

580 list(registry.getCollectionChain(chain1)), 

581 [tag1, run2] 

582 ) 

583 # Searching for dataId1 or dataId2 in the chain should return ref1 and 

584 # ref2, because both are in tag1. 

585 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain1), ref1) 

586 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain1), ref2) 

587 # Now disassociate ref2 from tag1. The search (for bias) with 

588 # dataId2 in chain1 should then: 

589 # 1. not find it in tag1 

590 # 2. find a different dataset in run2 

591 registry.disassociate(tag1, [ref2]) 

592 ref2b = registry.findDataset(datasetType, dataId2, collections=chain1) 

593 self.assertNotEqual(ref2b, ref2) 

594 self.assertEqual(ref2b, registry.findDataset(datasetType, dataId2, collections=run2)) 

595 # Define a new chain so we can test recursive chains. 

596 chain2 = "chain2" 

597 registry.registerCollection(chain2, type=CollectionType.CHAINED) 

598 registry.setCollectionChain(chain2, [run2, chain1]) 

599 # Query for collections matching a regex. 

600 self.assertCountEqual( 

601 list(registry.queryCollections(re.compile("imported_."), flattenChains=False)), 

602 ["imported_r", "imported_g"] 

603 ) 

604 # Query for collections matching a regex or an explicit str. 

605 self.assertCountEqual( 

606 list(registry.queryCollections([re.compile("imported_."), "chain1"], flattenChains=False)), 

607 ["imported_r", "imported_g", "chain1"] 

608 ) 

609 # Search for bias with dataId1 should find it via tag1 in chain2, 

610 # recursing, because is not in run1. 

611 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=run2)) 

612 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain2), ref1) 

613 # Search for bias with dataId2 should find it in run2 (ref2b). 

614 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain2), ref2b) 

615 # Search for a flat that is in run2. That should not be found 

616 # at the front of chain2, because of the restriction to bias 

617 # on run2 there, but it should be found in at the end of chain1. 

618 dataId4 = {"instrument": "Cam1", "detector": 3, "physical_filter": "Cam1-R2"} 

619 ref4 = registry.findDataset("flat", dataId4, collections=run2) 

620 self.assertIsNotNone(ref4) 

621 self.assertEqual(ref4, registry.findDataset("flat", dataId4, collections=chain2)) 

622 # Deleting a collection that's part of a CHAINED collection is not 

623 # allowed, and is exception-safe. 

624 with self.assertRaises(Exception): 

625 registry.removeCollection(run2) 

626 self.assertEqual(registry.getCollectionType(run2), CollectionType.RUN) 

627 with self.assertRaises(Exception): 

628 registry.removeCollection(chain1) 

629 self.assertEqual(registry.getCollectionType(chain1), CollectionType.CHAINED) 

630 # Actually remove chain2, test that it's gone by asking for its type. 

631 registry.removeCollection(chain2) 

632 with self.assertRaises(MissingCollectionError): 

633 registry.getCollectionType(chain2) 

634 # Actually remove run2 and chain1, which should work now. 

635 registry.removeCollection(chain1) 

636 registry.removeCollection(run2) 

637 with self.assertRaises(MissingCollectionError): 

638 registry.getCollectionType(run2) 

639 with self.assertRaises(MissingCollectionError): 

640 registry.getCollectionType(chain1) 

641 # Remove tag1 as well, just to test that we can remove TAGGED 

642 # collections. 

643 registry.removeCollection(tag1) 

644 with self.assertRaises(MissingCollectionError): 

645 registry.getCollectionType(tag1) 

646 

647 def testCollectionChainFlatten(self): 

648 """Test that Registry.setCollectionChain obeys its 'flatten' option. 

649 """ 

650 registry = self.makeRegistry() 

651 registry.registerCollection("inner", CollectionType.CHAINED) 

652 registry.registerCollection("innermost", CollectionType.RUN) 

653 registry.setCollectionChain("inner", ["innermost"]) 

654 registry.registerCollection("outer", CollectionType.CHAINED) 

655 registry.setCollectionChain("outer", ["inner"], flatten=False) 

656 self.assertEqual(list(registry.getCollectionChain("outer")), ["inner"]) 

657 registry.setCollectionChain("outer", ["inner"], flatten=True) 

658 self.assertEqual(list(registry.getCollectionChain("outer")), ["innermost"]) 

659 

660 def testBasicTransaction(self): 

661 """Test that all operations within a single transaction block are 

662 rolled back if an exception propagates out of the block. 

663 """ 

664 registry = self.makeRegistry() 

665 storageClass = StorageClass("testDatasetType") 

666 registry.storageClasses.registerStorageClass(storageClass) 

667 with registry.transaction(): 

668 registry.insertDimensionData("instrument", {"name": "Cam1", "class_name": "A"}) 

669 with self.assertRaises(ValueError): 

670 with registry.transaction(): 

671 registry.insertDimensionData("instrument", {"name": "Cam2"}) 

672 raise ValueError("Oops, something went wrong") 

673 # Cam1 should exist 

674 self.assertEqual(registry.expandDataId(instrument="Cam1").records["instrument"].class_name, "A") 

675 # But Cam2 and Cam3 should both not exist 

676 with self.assertRaises(LookupError): 

677 registry.expandDataId(instrument="Cam2") 

678 with self.assertRaises(LookupError): 

679 registry.expandDataId(instrument="Cam3") 

680 

681 def testNestedTransaction(self): 

682 """Test that operations within a transaction block are not rolled back 

683 if an exception propagates out of an inner transaction block and is 

684 then caught. 

685 """ 

686 registry = self.makeRegistry() 

687 dimension = registry.dimensions["instrument"] 

688 dataId1 = {"instrument": "DummyCam"} 

689 dataId2 = {"instrument": "DummyCam2"} 

690 checkpointReached = False 

691 with registry.transaction(): 

692 # This should be added and (ultimately) committed. 

693 registry.insertDimensionData(dimension, dataId1) 

694 with self.assertRaises(sqlalchemy.exc.IntegrityError): 

695 with registry.transaction(savepoint=True): 

696 # This does not conflict, and should succeed (but not 

697 # be committed). 

698 registry.insertDimensionData(dimension, dataId2) 

699 checkpointReached = True 

700 # This should conflict and raise, triggerring a rollback 

701 # of the previous insertion within the same transaction 

702 # context, but not the original insertion in the outer 

703 # block. 

704 registry.insertDimensionData(dimension, dataId1) 

705 self.assertTrue(checkpointReached) 

706 self.assertIsNotNone(registry.expandDataId(dataId1, graph=dimension.graph)) 

707 with self.assertRaises(LookupError): 

708 registry.expandDataId(dataId2, graph=dimension.graph) 

709 

710 def testInstrumentDimensions(self): 

711 """Test queries involving only instrument dimensions, with no joins to 

712 skymap.""" 

713 registry = self.makeRegistry() 

714 

715 # need a bunch of dimensions and datasets for test 

716 registry.insertDimensionData( 

717 "instrument", 

718 dict(name="DummyCam", visit_max=25, exposure_max=300, detector_max=6) 

719 ) 

720 registry.insertDimensionData( 

721 "physical_filter", 

722 dict(instrument="DummyCam", name="dummy_r", band="r"), 

723 dict(instrument="DummyCam", name="dummy_i", band="i"), 

724 ) 

725 registry.insertDimensionData( 

726 "detector", 

727 *[dict(instrument="DummyCam", id=i, full_name=str(i)) for i in range(1, 6)] 

728 ) 

729 registry.insertDimensionData( 

730 "visit_system", 

731 dict(instrument="DummyCam", id=1, name="default"), 

732 ) 

733 registry.insertDimensionData( 

734 "visit", 

735 dict(instrument="DummyCam", id=10, name="ten", physical_filter="dummy_i", visit_system=1), 

736 dict(instrument="DummyCam", id=11, name="eleven", physical_filter="dummy_r", visit_system=1), 

737 dict(instrument="DummyCam", id=20, name="twelve", physical_filter="dummy_r", visit_system=1), 

738 ) 

739 registry.insertDimensionData( 

740 "exposure", 

741 dict(instrument="DummyCam", id=100, obs_id="100", physical_filter="dummy_i"), 

742 dict(instrument="DummyCam", id=101, obs_id="101", physical_filter="dummy_i"), 

743 dict(instrument="DummyCam", id=110, obs_id="110", physical_filter="dummy_r"), 

744 dict(instrument="DummyCam", id=111, obs_id="111", physical_filter="dummy_r"), 

745 dict(instrument="DummyCam", id=200, obs_id="200", physical_filter="dummy_r"), 

746 dict(instrument="DummyCam", id=201, obs_id="201", physical_filter="dummy_r"), 

747 ) 

748 registry.insertDimensionData( 

749 "visit_definition", 

750 dict(instrument="DummyCam", exposure=100, visit_system=1, visit=10), 

751 dict(instrument="DummyCam", exposure=101, visit_system=1, visit=10), 

752 dict(instrument="DummyCam", exposure=110, visit_system=1, visit=11), 

753 dict(instrument="DummyCam", exposure=111, visit_system=1, visit=11), 

754 dict(instrument="DummyCam", exposure=200, visit_system=1, visit=20), 

755 dict(instrument="DummyCam", exposure=201, visit_system=1, visit=20), 

756 ) 

757 # dataset types 

758 run1 = "test1_r" 

759 run2 = "test2_r" 

760 tagged2 = "test2_t" 

761 registry.registerRun(run1) 

762 registry.registerRun(run2) 

763 registry.registerCollection(tagged2) 

764 storageClass = StorageClass("testDataset") 

765 registry.storageClasses.registerStorageClass(storageClass) 

766 rawType = DatasetType(name="RAW", 

767 dimensions=registry.dimensions.extract(("instrument", "exposure", "detector")), 

768 storageClass=storageClass) 

769 registry.registerDatasetType(rawType) 

770 calexpType = DatasetType(name="CALEXP", 

771 dimensions=registry.dimensions.extract(("instrument", "visit", "detector")), 

772 storageClass=storageClass) 

773 registry.registerDatasetType(calexpType) 

774 

775 # add pre-existing datasets 

776 for exposure in (100, 101, 110, 111): 

777 for detector in (1, 2, 3): 

778 # note that only 3 of 5 detectors have datasets 

779 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector) 

780 ref, = registry.insertDatasets(rawType, dataIds=[dataId], run=run1) 

781 # exposures 100 and 101 appear in both run1 and tagged2. 

782 # 100 has different datasets in the different collections 

783 # 101 has the same dataset in both collections. 

784 if exposure == 100: 

785 ref, = registry.insertDatasets(rawType, dataIds=[dataId], run=run2) 

786 if exposure in (100, 101): 

787 registry.associate(tagged2, [ref]) 

788 # Add pre-existing datasets to tagged2. 

789 for exposure in (200, 201): 

790 for detector in (3, 4, 5): 

791 # note that only 3 of 5 detectors have datasets 

792 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector) 

793 ref, = registry.insertDatasets(rawType, dataIds=[dataId], run=run2) 

794 registry.associate(tagged2, [ref]) 

795 

796 dimensions = DimensionGraph( 

797 registry.dimensions, 

798 dimensions=(rawType.dimensions.required | calexpType.dimensions.required) 

799 ) 

800 # Test that single dim string works as well as list of str 

801 rows = registry.queryDataIds("visit", datasets=rawType, collections=run1).expanded().toSet() 

802 rowsI = registry.queryDataIds(["visit"], datasets=rawType, collections=run1).expanded().toSet() 

803 self.assertEqual(rows, rowsI) 

804 # with empty expression 

805 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1).expanded().toSet() 

806 self.assertEqual(len(rows), 4*3) # 4 exposures times 3 detectors 

807 for dataId in rows: 

808 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit")) 

809 packer1 = registry.dimensions.makePacker("visit_detector", dataId) 

810 packer2 = registry.dimensions.makePacker("exposure_detector", dataId) 

811 self.assertEqual(packer1.unpack(packer1.pack(dataId)), 

812 DataCoordinate.standardize(dataId, graph=packer1.dimensions)) 

813 self.assertEqual(packer2.unpack(packer2.pack(dataId)), 

814 DataCoordinate.standardize(dataId, graph=packer2.dimensions)) 

815 self.assertNotEqual(packer1.pack(dataId), packer2.pack(dataId)) 

816 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), 

817 (100, 101, 110, 111)) 

818 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 11)) 

819 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3)) 

820 

821 # second collection 

822 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=tagged2).toSet() 

823 self.assertEqual(len(rows), 4*3) # 4 exposures times 3 detectors 

824 for dataId in rows: 

825 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit")) 

826 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), 

827 (100, 101, 200, 201)) 

828 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 20)) 

829 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3, 4, 5)) 

830 

831 # with two input datasets 

832 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=[run1, tagged2]).toSet() 

833 self.assertEqual(len(set(rows)), 6*3) # 6 exposures times 3 detectors; set needed to de-dupe 

834 for dataId in rows: 

835 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit")) 

836 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), 

837 (100, 101, 110, 111, 200, 201)) 

838 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 11, 20)) 

839 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3, 4, 5)) 

840 

841 # limit to single visit 

842 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1, 

843 where="visit = 10", instrument="DummyCam").toSet() 

844 self.assertEqual(len(rows), 2*3) # 2 exposures times 3 detectors 

845 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101)) 

846 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10,)) 

847 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3)) 

848 

849 # more limiting expression, using link names instead of Table.column 

850 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1, 

851 where="visit = 10 and detector > 1 and 'DummyCam'=instrument").toSet() 

852 self.assertEqual(len(rows), 2*2) # 2 exposures times 2 detectors 

853 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101)) 

854 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10,)) 

855 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (2, 3)) 

856 

857 # expression excludes everything 

858 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1, 

859 where="visit > 1000", instrument="DummyCam").toSet() 

860 self.assertEqual(len(rows), 0) 

861 

862 # Selecting by physical_filter, this is not in the dimensions, but it 

863 # is a part of the full expression so it should work too. 

864 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1, 

865 where="physical_filter = 'dummy_r'", instrument="DummyCam").toSet() 

866 self.assertEqual(len(rows), 2*3) # 2 exposures times 3 detectors 

867 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (110, 111)) 

868 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (11,)) 

869 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3)) 

870 

871 def testSkyMapDimensions(self): 

872 """Tests involving only skymap dimensions, no joins to instrument.""" 

873 registry = self.makeRegistry() 

874 

875 # need a bunch of dimensions and datasets for test, we want 

876 # "band" in the test so also have to add physical_filter 

877 # dimensions 

878 registry.insertDimensionData( 

879 "instrument", 

880 dict(instrument="DummyCam") 

881 ) 

882 registry.insertDimensionData( 

883 "physical_filter", 

884 dict(instrument="DummyCam", name="dummy_r", band="r"), 

885 dict(instrument="DummyCam", name="dummy_i", band="i"), 

886 ) 

887 registry.insertDimensionData( 

888 "skymap", 

889 dict(name="DummyMap", hash="sha!".encode("utf8")) 

890 ) 

891 for tract in range(10): 

892 registry.insertDimensionData("tract", dict(skymap="DummyMap", id=tract)) 

893 registry.insertDimensionData( 

894 "patch", 

895 *[dict(skymap="DummyMap", tract=tract, id=patch, cell_x=0, cell_y=0) 

896 for patch in range(10)] 

897 ) 

898 

899 # dataset types 

900 run = "test" 

901 registry.registerRun(run) 

902 storageClass = StorageClass("testDataset") 

903 registry.storageClasses.registerStorageClass(storageClass) 

904 calexpType = DatasetType(name="deepCoadd_calexp", 

905 dimensions=registry.dimensions.extract(("skymap", "tract", "patch", 

906 "band")), 

907 storageClass=storageClass) 

908 registry.registerDatasetType(calexpType) 

909 mergeType = DatasetType(name="deepCoadd_mergeDet", 

910 dimensions=registry.dimensions.extract(("skymap", "tract", "patch")), 

911 storageClass=storageClass) 

912 registry.registerDatasetType(mergeType) 

913 measType = DatasetType(name="deepCoadd_meas", 

914 dimensions=registry.dimensions.extract(("skymap", "tract", "patch", 

915 "band")), 

916 storageClass=storageClass) 

917 registry.registerDatasetType(measType) 

918 

919 dimensions = DimensionGraph( 

920 registry.dimensions, 

921 dimensions=(calexpType.dimensions.required | mergeType.dimensions.required 

922 | measType.dimensions.required) 

923 ) 

924 

925 # add pre-existing datasets 

926 for tract in (1, 3, 5): 

927 for patch in (2, 4, 6, 7): 

928 dataId = dict(skymap="DummyMap", tract=tract, patch=patch) 

929 registry.insertDatasets(mergeType, dataIds=[dataId], run=run) 

930 for aFilter in ("i", "r"): 

931 dataId = dict(skymap="DummyMap", tract=tract, patch=patch, band=aFilter) 

932 registry.insertDatasets(calexpType, dataIds=[dataId], run=run) 

933 

934 # with empty expression 

935 rows = registry.queryDataIds(dimensions, 

936 datasets=[calexpType, mergeType], collections=run).toSet() 

937 self.assertEqual(len(rows), 3*4*2) # 4 tracts x 4 patches x 2 filters 

938 for dataId in rows: 

939 self.assertCountEqual(dataId.keys(), ("skymap", "tract", "patch", "band")) 

940 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 3, 5)) 

941 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 4, 6, 7)) 

942 self.assertCountEqual(set(dataId["band"] for dataId in rows), ("i", "r")) 

943 

944 # limit to 2 tracts and 2 patches 

945 rows = registry.queryDataIds(dimensions, 

946 datasets=[calexpType, mergeType], collections=run, 

947 where="tract IN (1, 5) AND patch IN (2, 7)", skymap="DummyMap").toSet() 

948 self.assertEqual(len(rows), 2*2*2) # 2 tracts x 2 patches x 2 filters 

949 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 5)) 

950 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 7)) 

951 self.assertCountEqual(set(dataId["band"] for dataId in rows), ("i", "r")) 

952 

953 # limit to single filter 

954 rows = registry.queryDataIds(dimensions, 

955 datasets=[calexpType, mergeType], collections=run, 

956 where="band = 'i'").toSet() 

957 self.assertEqual(len(rows), 3*4*1) # 4 tracts x 4 patches x 2 filters 

958 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 3, 5)) 

959 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 4, 6, 7)) 

960 self.assertCountEqual(set(dataId["band"] for dataId in rows), ("i",)) 

961 

962 # expression excludes everything, specifying non-existing skymap is 

963 # not a fatal error, it's operator error 

964 rows = registry.queryDataIds(dimensions, 

965 datasets=[calexpType, mergeType], collections=run, 

966 where="skymap = 'Mars'").toSet() 

967 self.assertEqual(len(rows), 0) 

968 

969 def testSpatialJoin(self): 

970 """Test queries that involve spatial overlap joins. 

971 """ 

972 registry = self.makeRegistry() 

973 self.loadData(registry, "hsc-rc2-subset.yaml") 

974 

975 # Dictionary of spatial DatabaseDimensionElements, keyed by the name of 

976 # the TopologicalFamily they belong to. We'll relate all elements in 

977 # each family to all of the elements in each other family. 

978 families = defaultdict(set) 

979 # Dictionary of {element.name: {dataId: region}}. 

980 regions = {} 

981 for element in registry.dimensions.getDatabaseElements(): 

982 if element.spatial is not None: 

983 families[element.spatial.name].add(element) 

984 regions[element.name] = { 

985 record.dataId: record.region for record in registry.queryDimensionRecords(element) 

986 } 

987 

988 # If this check fails, it's not necessarily a problem - it may just be 

989 # a reasonable change to the default dimension definitions - but the 

990 # test below depends on there being more than one family to do anything 

991 # useful. 

992 self.assertEqual(len(families), 2) 

993 

994 # Overlap DatabaseDimensionElements with each other. 

995 for family1, family2 in itertools.combinations(families, 2): 

996 for element1, element2 in itertools.product(families[family1], families[family2]): 

997 graph = DimensionGraph.union(element1.graph, element2.graph) 

998 # Construct expected set of overlapping data IDs via a 

999 # brute-force comparison of the regions we've already fetched. 

1000 expected = { 

1001 DataCoordinate.standardize( 

1002 {**dataId1.byName(), **dataId2.byName()}, 

1003 graph=graph 

1004 ) 

1005 for (dataId1, region1), (dataId2, region2) 

1006 in itertools.product(regions[element1.name].items(), regions[element2.name].items()) 

1007 if not region1.isDisjointFrom(region2) 

1008 } 

1009 self.assertGreater(len(expected), 2, msg="Test that we aren't just comparing empty sets.") 

1010 queried = set(registry.queryDataIds(graph)) 

1011 self.assertEqual(expected, queried) 

1012 

1013 # Overlap each DatabaseDimensionElement with the commonSkyPix system. 

1014 commonSkyPix = registry.dimensions.commonSkyPix 

1015 for elementName, regions in regions.items(): 

1016 graph = DimensionGraph.union(registry.dimensions[elementName].graph, commonSkyPix.graph) 

1017 expected = set() 

1018 for dataId, region in regions.items(): 

1019 for begin, end in commonSkyPix.pixelization.envelope(region): 

1020 expected.update( 

1021 DataCoordinate.standardize( 

1022 {commonSkyPix.name: index, **dataId.byName()}, 

1023 graph=graph 

1024 ) 

1025 for index in range(begin, end) 

1026 ) 

1027 self.assertGreater(len(expected), 2, msg="Test that we aren't just comparing empty sets.") 

1028 queried = set(registry.queryDataIds(graph)) 

1029 self.assertEqual(expected, queried) 

1030 

1031 def testAbstractQuery(self): 

1032 """Test that we can run a query that just lists the known 

1033 bands. This is tricky because band is 

1034 backed by a query against physical_filter. 

1035 """ 

1036 registry = self.makeRegistry() 

1037 registry.insertDimensionData("instrument", dict(name="DummyCam")) 

1038 registry.insertDimensionData( 

1039 "physical_filter", 

1040 dict(instrument="DummyCam", name="dummy_i", band="i"), 

1041 dict(instrument="DummyCam", name="dummy_i2", band="i"), 

1042 dict(instrument="DummyCam", name="dummy_r", band="r"), 

1043 ) 

1044 rows = registry.queryDataIds(["band"]).toSet() 

1045 self.assertCountEqual( 

1046 rows, 

1047 [DataCoordinate.standardize(band="i", universe=registry.dimensions), 

1048 DataCoordinate.standardize(band="r", universe=registry.dimensions)] 

1049 ) 

1050 

1051 def testAttributeManager(self): 

1052 """Test basic functionality of attribute manager. 

1053 """ 

1054 # number of attributes with schema versions in a fresh database, 

1055 # 6 managers with 3 records per manager, plus config for dimensions 

1056 VERSION_COUNT = 6 * 3 + 1 

1057 

1058 registry = self.makeRegistry() 

1059 attributes = registry._managers.attributes 

1060 

1061 # check what get() returns for non-existing key 

1062 self.assertIsNone(attributes.get("attr")) 

1063 self.assertEqual(attributes.get("attr", ""), "") 

1064 self.assertEqual(attributes.get("attr", "Value"), "Value") 

1065 self.assertEqual(len(list(attributes.items())), VERSION_COUNT) 

1066 

1067 # cannot store empty key or value 

1068 with self.assertRaises(ValueError): 

1069 attributes.set("", "value") 

1070 with self.assertRaises(ValueError): 

1071 attributes.set("attr", "") 

1072 

1073 # set value of non-existing key 

1074 attributes.set("attr", "value") 

1075 self.assertEqual(len(list(attributes.items())), VERSION_COUNT + 1) 

1076 self.assertEqual(attributes.get("attr"), "value") 

1077 

1078 # update value of existing key 

1079 with self.assertRaises(ButlerAttributeExistsError): 

1080 attributes.set("attr", "value2") 

1081 

1082 attributes.set("attr", "value2", force=True) 

1083 self.assertEqual(len(list(attributes.items())), VERSION_COUNT + 1) 

1084 self.assertEqual(attributes.get("attr"), "value2") 

1085 

1086 # delete existing key 

1087 self.assertTrue(attributes.delete("attr")) 

1088 self.assertEqual(len(list(attributes.items())), VERSION_COUNT) 

1089 

1090 # delete non-existing key 

1091 self.assertFalse(attributes.delete("non-attr")) 

1092 

1093 # store bunch of keys and get the list back 

1094 data = [ 

1095 ("version.core", "1.2.3"), 

1096 ("version.dimensions", "3.2.1"), 

1097 ("config.managers.opaque", "ByNameOpaqueTableStorageManager"), 

1098 ] 

1099 for key, value in data: 

1100 attributes.set(key, value) 

1101 items = dict(attributes.items()) 

1102 for key, value in data: 

1103 self.assertEqual(items[key], value) 

1104 

1105 def testQueryDatasetsDeduplication(self): 

1106 """Test that the findFirst option to queryDatasets selects datasets 

1107 from collections in the order given". 

1108 """ 

1109 registry = self.makeRegistry() 

1110 self.loadData(registry, "base.yaml") 

1111 self.loadData(registry, "datasets.yaml") 

1112 self.assertCountEqual( 

1113 list(registry.queryDatasets("bias", collections=["imported_g", "imported_r"])), 

1114 [ 

1115 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"), 

1116 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g"), 

1117 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g"), 

1118 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r"), 

1119 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r"), 

1120 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

1121 ] 

1122 ) 

1123 self.assertCountEqual( 

1124 list(registry.queryDatasets("bias", collections=["imported_g", "imported_r"], 

1125 findFirst=True)), 

1126 [ 

1127 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"), 

1128 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g"), 

1129 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g"), 

1130 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

1131 ] 

1132 ) 

1133 self.assertCountEqual( 

1134 list(registry.queryDatasets("bias", collections=["imported_r", "imported_g"], 

1135 findFirst=True)), 

1136 [ 

1137 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"), 

1138 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r"), 

1139 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r"), 

1140 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

1141 ] 

1142 ) 

1143 

1144 def testQueryResults(self): 

1145 """Test querying for data IDs and then manipulating the QueryResults 

1146 object returned to perform other queries. 

1147 """ 

1148 registry = self.makeRegistry() 

1149 self.loadData(registry, "base.yaml") 

1150 self.loadData(registry, "datasets.yaml") 

1151 bias = registry.getDatasetType("bias") 

1152 flat = registry.getDatasetType("flat") 

1153 # Obtain expected results from methods other than those we're testing 

1154 # here. That includes: 

1155 # - the dimensions of the data IDs we want to query: 

1156 expectedGraph = DimensionGraph(registry.dimensions, names=["detector", "physical_filter"]) 

1157 # - the dimensions of some other data IDs we'll extract from that: 

1158 expectedSubsetGraph = DimensionGraph(registry.dimensions, names=["detector"]) 

1159 # - the data IDs we expect to obtain from the first queries: 

1160 expectedDataIds = DataCoordinateSet( 

1161 { 

1162 DataCoordinate.standardize(instrument="Cam1", detector=d, physical_filter=p, 

1163 universe=registry.dimensions) 

1164 for d, p in itertools.product({1, 2, 3}, {"Cam1-G", "Cam1-R1", "Cam1-R2"}) 

1165 }, 

1166 graph=expectedGraph, 

1167 hasFull=False, 

1168 hasRecords=False, 

1169 ) 

1170 # - the flat datasets we expect to find from those data IDs, in just 

1171 # one collection (so deduplication is irrelevant): 

1172 expectedFlats = [ 

1173 registry.findDataset(flat, instrument="Cam1", detector=1, physical_filter="Cam1-R1", 

1174 collections="imported_r"), 

1175 registry.findDataset(flat, instrument="Cam1", detector=2, physical_filter="Cam1-R1", 

1176 collections="imported_r"), 

1177 registry.findDataset(flat, instrument="Cam1", detector=3, physical_filter="Cam1-R2", 

1178 collections="imported_r"), 

1179 ] 

1180 # - the data IDs we expect to extract from that: 

1181 expectedSubsetDataIds = expectedDataIds.subset(expectedSubsetGraph) 

1182 # - the bias datasets we expect to find from those data IDs, after we 

1183 # subset-out the physical_filter dimension, both with duplicates: 

1184 expectedAllBiases = [ 

1185 registry.findDataset(bias, instrument="Cam1", detector=1, collections="imported_g"), 

1186 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_g"), 

1187 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_g"), 

1188 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_r"), 

1189 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_r"), 

1190 ] 

1191 # - ...and without duplicates: 

1192 expectedDeduplicatedBiases = [ 

1193 registry.findDataset(bias, instrument="Cam1", detector=1, collections="imported_g"), 

1194 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_r"), 

1195 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_r"), 

1196 ] 

1197 # Test against those expected results, using a "lazy" query for the 

1198 # data IDs (which re-executes that query each time we use it to do 

1199 # something new). 

1200 dataIds = registry.queryDataIds( 

1201 ["detector", "physical_filter"], 

1202 where="detector.purpose = 'SCIENCE'", # this rejects detector=4 

1203 instrument="Cam1", 

1204 ) 

1205 self.assertEqual(dataIds.graph, expectedGraph) 

1206 self.assertEqual(dataIds.toSet(), expectedDataIds) 

1207 self.assertCountEqual( 

1208 list( 

1209 dataIds.findDatasets( 

1210 flat, 

1211 collections=["imported_r"], 

1212 ) 

1213 ), 

1214 expectedFlats, 

1215 ) 

1216 subsetDataIds = dataIds.subset(expectedSubsetGraph, unique=True) 

1217 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph) 

1218 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds) 

1219 self.assertCountEqual( 

1220 list( 

1221 subsetDataIds.findDatasets( 

1222 bias, 

1223 collections=["imported_r", "imported_g"], 

1224 findFirst=False 

1225 ) 

1226 ), 

1227 expectedAllBiases 

1228 ) 

1229 self.assertCountEqual( 

1230 list( 

1231 subsetDataIds.findDatasets( 

1232 bias, 

1233 collections=["imported_r", "imported_g"], 

1234 findFirst=True 

1235 ) 

1236 ), expectedDeduplicatedBiases 

1237 ) 

1238 # Materialize the bias dataset queries (only) by putting the results 

1239 # into temporary tables, then repeat those tests. 

1240 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], 

1241 findFirst=False).materialize() as biases: 

1242 self.assertCountEqual(list(biases), expectedAllBiases) 

1243 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], 

1244 findFirst=True).materialize() as biases: 

1245 self.assertCountEqual(list(biases), expectedDeduplicatedBiases) 

1246 # Materialize the data ID subset query, but not the dataset queries. 

1247 with subsetDataIds.materialize() as subsetDataIds: 

1248 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph) 

1249 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds) 

1250 self.assertCountEqual( 

1251 list( 

1252 subsetDataIds.findDatasets( 

1253 bias, 

1254 collections=["imported_r", "imported_g"], 

1255 findFirst=False 

1256 ) 

1257 ), 

1258 expectedAllBiases 

1259 ) 

1260 self.assertCountEqual( 

1261 list( 

1262 subsetDataIds.findDatasets( 

1263 bias, 

1264 collections=["imported_r", "imported_g"], 

1265 findFirst=True 

1266 ) 

1267 ), expectedDeduplicatedBiases 

1268 ) 

1269 # Materialize the dataset queries, too. 

1270 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], 

1271 findFirst=False).materialize() as biases: 

1272 self.assertCountEqual(list(biases), expectedAllBiases) 

1273 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], 

1274 findFirst=True).materialize() as biases: 

1275 self.assertCountEqual(list(biases), expectedDeduplicatedBiases) 

1276 # Materialize the original query, but none of the follow-up queries. 

1277 with dataIds.materialize() as dataIds: 

1278 self.assertEqual(dataIds.graph, expectedGraph) 

1279 self.assertEqual(dataIds.toSet(), expectedDataIds) 

1280 self.assertCountEqual( 

1281 list( 

1282 dataIds.findDatasets( 

1283 flat, 

1284 collections=["imported_r"], 

1285 ) 

1286 ), 

1287 expectedFlats, 

1288 ) 

1289 subsetDataIds = dataIds.subset(expectedSubsetGraph, unique=True) 

1290 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph) 

1291 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds) 

1292 self.assertCountEqual( 

1293 list( 

1294 subsetDataIds.findDatasets( 

1295 bias, 

1296 collections=["imported_r", "imported_g"], 

1297 findFirst=False 

1298 ) 

1299 ), 

1300 expectedAllBiases 

1301 ) 

1302 self.assertCountEqual( 

1303 list( 

1304 subsetDataIds.findDatasets( 

1305 bias, 

1306 collections=["imported_r", "imported_g"], 

1307 findFirst=True 

1308 ) 

1309 ), expectedDeduplicatedBiases 

1310 ) 

1311 # Materialize just the bias dataset queries. 

1312 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], 

1313 findFirst=False).materialize() as biases: 

1314 self.assertCountEqual(list(biases), expectedAllBiases) 

1315 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], 

1316 findFirst=True).materialize() as biases: 

1317 self.assertCountEqual(list(biases), expectedDeduplicatedBiases) 

1318 # Materialize the subset data ID query, but not the dataset 

1319 # queries. 

1320 with subsetDataIds.materialize() as subsetDataIds: 

1321 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph) 

1322 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds) 

1323 self.assertCountEqual( 

1324 list( 

1325 subsetDataIds.findDatasets( 

1326 bias, 

1327 collections=["imported_r", "imported_g"], 

1328 findFirst=False 

1329 ) 

1330 ), 

1331 expectedAllBiases 

1332 ) 

1333 self.assertCountEqual( 

1334 list( 

1335 subsetDataIds.findDatasets( 

1336 bias, 

1337 collections=["imported_r", "imported_g"], 

1338 findFirst=True 

1339 ) 

1340 ), expectedDeduplicatedBiases 

1341 ) 

1342 # Materialize the bias dataset queries, too, so now we're 

1343 # materializing every single step. 

1344 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], 

1345 findFirst=False).materialize() as biases: 

1346 self.assertCountEqual(list(biases), expectedAllBiases) 

1347 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], 

1348 findFirst=True).materialize() as biases: 

1349 self.assertCountEqual(list(biases), expectedDeduplicatedBiases) 

1350 

1351 def testEmptyDimensionsQueries(self): 

1352 """Test Query and QueryResults objects in the case where there are no 

1353 dimensions. 

1354 """ 

1355 # Set up test data: one dataset type, two runs, one dataset in each. 

1356 registry = self.makeRegistry() 

1357 self.loadData(registry, "base.yaml") 

1358 schema = DatasetType("schema", dimensions=registry.dimensions.empty, storageClass="Catalog") 

1359 registry.registerDatasetType(schema) 

1360 dataId = DataCoordinate.makeEmpty(registry.dimensions) 

1361 run1 = "run1" 

1362 run2 = "run2" 

1363 registry.registerRun(run1) 

1364 registry.registerRun(run2) 

1365 (dataset1,) = registry.insertDatasets(schema, dataIds=[dataId], run=run1) 

1366 (dataset2,) = registry.insertDatasets(schema, dataIds=[dataId], run=run2) 

1367 # Query directly for both of the datasets, and each one, one at a time. 

1368 self.assertCountEqual( 

1369 list(registry.queryDatasets(schema, collections=[run1, run2], findFirst=False)), 

1370 [dataset1, dataset2] 

1371 ) 

1372 self.assertEqual( 

1373 list(registry.queryDatasets(schema, collections=[run1, run2], findFirst=True)), 

1374 [dataset1], 

1375 ) 

1376 self.assertEqual( 

1377 list(registry.queryDatasets(schema, collections=[run2, run1], findFirst=True)), 

1378 [dataset2], 

1379 ) 

1380 # Query for data IDs with no dimensions. 

1381 dataIds = registry.queryDataIds([]) 

1382 self.assertEqual( 

1383 dataIds.toSequence(), 

1384 DataCoordinateSequence([dataId], registry.dimensions.empty) 

1385 ) 

1386 # Use queried data IDs to find the datasets. 

1387 self.assertCountEqual( 

1388 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False)), 

1389 [dataset1, dataset2], 

1390 ) 

1391 self.assertEqual( 

1392 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True)), 

1393 [dataset1], 

1394 ) 

1395 self.assertEqual( 

1396 list(dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True)), 

1397 [dataset2], 

1398 ) 

1399 # Now materialize the data ID query results and repeat those tests. 

1400 with dataIds.materialize() as dataIds: 

1401 self.assertEqual( 

1402 dataIds.toSequence(), 

1403 DataCoordinateSequence([dataId], registry.dimensions.empty) 

1404 ) 

1405 self.assertCountEqual( 

1406 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False)), 

1407 [dataset1, dataset2], 

1408 ) 

1409 self.assertEqual( 

1410 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True)), 

1411 [dataset1], 

1412 ) 

1413 self.assertEqual( 

1414 list(dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True)), 

1415 [dataset2], 

1416 ) 

1417 # Query for non-empty data IDs, then subset that to get the empty one. 

1418 # Repeat the above tests starting from that. 

1419 dataIds = registry.queryDataIds(["instrument"]).subset(registry.dimensions.empty, unique=True) 

1420 self.assertEqual( 

1421 dataIds.toSequence(), 

1422 DataCoordinateSequence([dataId], registry.dimensions.empty) 

1423 ) 

1424 self.assertCountEqual( 

1425 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False)), 

1426 [dataset1, dataset2], 

1427 ) 

1428 self.assertEqual( 

1429 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True)), 

1430 [dataset1], 

1431 ) 

1432 self.assertEqual( 

1433 list(dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True)), 

1434 [dataset2], 

1435 ) 

1436 with dataIds.materialize() as dataIds: 

1437 self.assertEqual( 

1438 dataIds.toSequence(), 

1439 DataCoordinateSequence([dataId], registry.dimensions.empty) 

1440 ) 

1441 self.assertCountEqual( 

1442 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False)), 

1443 [dataset1, dataset2], 

1444 ) 

1445 self.assertEqual( 

1446 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True)), 

1447 [dataset1], 

1448 ) 

1449 self.assertEqual( 

1450 list(dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True)), 

1451 [dataset2], 

1452 ) 

1453 # Query for non-empty data IDs, then materialize, then subset to get 

1454 # the empty one. Repeat again. 

1455 with registry.queryDataIds(["instrument"]).materialize() as nonEmptyDataIds: 

1456 dataIds = nonEmptyDataIds.subset(registry.dimensions.empty, unique=True) 

1457 self.assertEqual( 

1458 dataIds.toSequence(), 

1459 DataCoordinateSequence([dataId], registry.dimensions.empty) 

1460 ) 

1461 self.assertCountEqual( 

1462 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False)), 

1463 [dataset1, dataset2], 

1464 ) 

1465 self.assertEqual( 

1466 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True)), 

1467 [dataset1], 

1468 ) 

1469 self.assertEqual( 

1470 list(dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True)), 

1471 [dataset2], 

1472 ) 

1473 with dataIds.materialize() as dataIds: 

1474 self.assertEqual( 

1475 dataIds.toSequence(), 

1476 DataCoordinateSequence([dataId], registry.dimensions.empty) 

1477 ) 

1478 self.assertCountEqual( 

1479 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False)), 

1480 [dataset1, dataset2], 

1481 ) 

1482 self.assertEqual( 

1483 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True)), 

1484 [dataset1], 

1485 ) 

1486 self.assertEqual( 

1487 list(dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True)), 

1488 [dataset2], 

1489 ) 

1490 

1491 def testCalibrationCollections(self): 

1492 """Test operations on `~CollectionType.CALIBRATION` collections, 

1493 including `Registry.certify`, `Registry.decertify`, and 

1494 `Registry.findDataset`. 

1495 """ 

1496 # Setup - make a Registry, fill it with some datasets in 

1497 # non-calibration collections. 

1498 registry = self.makeRegistry() 

1499 self.loadData(registry, "base.yaml") 

1500 self.loadData(registry, "datasets.yaml") 

1501 # Set up some timestamps. 

1502 t1 = astropy.time.Time('2020-01-01T01:00:00', format="isot", scale="tai") 

1503 t2 = astropy.time.Time('2020-01-01T02:00:00', format="isot", scale="tai") 

1504 t3 = astropy.time.Time('2020-01-01T03:00:00', format="isot", scale="tai") 

1505 t4 = astropy.time.Time('2020-01-01T04:00:00', format="isot", scale="tai") 

1506 t5 = astropy.time.Time('2020-01-01T05:00:00', format="isot", scale="tai") 

1507 allTimespans = [ 

1508 Timespan(a, b) for a, b in itertools.combinations([None, t1, t2, t3, t4, t5, None], r=2) 

1509 ] 

1510 # Get references to some datasets. 

1511 bias2a = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g") 

1512 bias3a = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g") 

1513 bias2b = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r") 

1514 bias3b = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r") 

1515 # Register the main calibration collection we'll be working with. 

1516 collection = "Cam1/calibs/default" 

1517 registry.registerCollection(collection, type=CollectionType.CALIBRATION) 

1518 # Cannot associate into a calibration collection (no timespan). 

1519 with self.assertRaises(TypeError): 

1520 registry.associate(collection, [bias2a]) 

1521 # Certify 2a dataset with [t2, t4) validity. 

1522 registry.certify(collection, [bias2a], Timespan(begin=t2, end=t4)) 

1523 # We should not be able to certify 2b with anything overlapping that 

1524 # window. 

1525 with self.assertRaises(ConflictingDefinitionError): 

1526 registry.certify(collection, [bias2b], Timespan(begin=None, end=t3)) 

1527 with self.assertRaises(ConflictingDefinitionError): 

1528 registry.certify(collection, [bias2b], Timespan(begin=None, end=t5)) 

1529 with self.assertRaises(ConflictingDefinitionError): 

1530 registry.certify(collection, [bias2b], Timespan(begin=t1, end=t3)) 

1531 with self.assertRaises(ConflictingDefinitionError): 

1532 registry.certify(collection, [bias2b], Timespan(begin=t1, end=t5)) 

1533 with self.assertRaises(ConflictingDefinitionError): 

1534 registry.certify(collection, [bias2b], Timespan(begin=t1, end=None)) 

1535 with self.assertRaises(ConflictingDefinitionError): 

1536 registry.certify(collection, [bias2b], Timespan(begin=t2, end=t3)) 

1537 with self.assertRaises(ConflictingDefinitionError): 

1538 registry.certify(collection, [bias2b], Timespan(begin=t2, end=t5)) 

1539 with self.assertRaises(ConflictingDefinitionError): 

1540 registry.certify(collection, [bias2b], Timespan(begin=t2, end=None)) 

1541 # We should be able to certify 3a with a range overlapping that window, 

1542 # because it's for a different detector. 

1543 # We'll certify 3a over [t1, t3). 

1544 registry.certify(collection, [bias3a], Timespan(begin=t1, end=t3)) 

1545 # Now we'll certify 2b and 3b together over [t4, ∞). 

1546 registry.certify(collection, [bias2b, bias3b], Timespan(begin=t4, end=None)) 

1547 

1548 # Fetch all associations and check that they are what we expect. 

1549 self.assertCountEqual( 

1550 list( 

1551 registry.queryDatasetAssociations( 

1552 "bias", 

1553 collections=[collection, "imported_g", "imported_r"], 

1554 ) 

1555 ), 

1556 [ 

1557 DatasetAssociation( 

1558 ref=registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"), 

1559 collection="imported_g", 

1560 timespan=None, 

1561 ), 

1562 DatasetAssociation( 

1563 ref=registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

1564 collection="imported_r", 

1565 timespan=None, 

1566 ), 

1567 DatasetAssociation(ref=bias2a, collection="imported_g", timespan=None), 

1568 DatasetAssociation(ref=bias3a, collection="imported_g", timespan=None), 

1569 DatasetAssociation(ref=bias2b, collection="imported_r", timespan=None), 

1570 DatasetAssociation(ref=bias3b, collection="imported_r", timespan=None), 

1571 DatasetAssociation(ref=bias2a, collection=collection, timespan=Timespan(begin=t2, end=t4)), 

1572 DatasetAssociation(ref=bias3a, collection=collection, timespan=Timespan(begin=t1, end=t3)), 

1573 DatasetAssociation(ref=bias2b, collection=collection, timespan=Timespan(begin=t4, end=None)), 

1574 DatasetAssociation(ref=bias3b, collection=collection, timespan=Timespan(begin=t4, end=None)), 

1575 ] 

1576 ) 

1577 

1578 class Ambiguous: 

1579 """Tag class to denote lookups that are expected to be ambiguous. 

1580 """ 

1581 pass 

1582 

1583 def assertLookup(detector: int, timespan: Timespan, 

1584 expected: Optional[Union[DatasetRef, Type[Ambiguous]]]) -> None: 

1585 """Local function that asserts that a bias lookup returns the given 

1586 expected result. 

1587 """ 

1588 if expected is Ambiguous: 

1589 with self.assertRaises(RuntimeError): 

1590 registry.findDataset("bias", collections=collection, instrument="Cam1", 

1591 detector=detector, timespan=timespan) 

1592 else: 

1593 self.assertEqual( 

1594 expected, 

1595 registry.findDataset("bias", collections=collection, instrument="Cam1", 

1596 detector=detector, timespan=timespan) 

1597 ) 

1598 

1599 # Systematically test lookups against expected results. 

1600 assertLookup(detector=2, timespan=Timespan(None, t1), expected=None) 

1601 assertLookup(detector=2, timespan=Timespan(None, t2), expected=None) 

1602 assertLookup(detector=2, timespan=Timespan(None, t3), expected=bias2a) 

1603 assertLookup(detector=2, timespan=Timespan(None, t4), expected=bias2a) 

1604 assertLookup(detector=2, timespan=Timespan(None, t5), expected=Ambiguous) 

1605 assertLookup(detector=2, timespan=Timespan(None, None), expected=Ambiguous) 

1606 assertLookup(detector=2, timespan=Timespan(t1, t2), expected=None) 

1607 assertLookup(detector=2, timespan=Timespan(t1, t3), expected=bias2a) 

1608 assertLookup(detector=2, timespan=Timespan(t1, t4), expected=bias2a) 

1609 assertLookup(detector=2, timespan=Timespan(t1, t5), expected=Ambiguous) 

1610 assertLookup(detector=2, timespan=Timespan(t1, None), expected=Ambiguous) 

1611 assertLookup(detector=2, timespan=Timespan(t2, t3), expected=bias2a) 

1612 assertLookup(detector=2, timespan=Timespan(t2, t4), expected=bias2a) 

1613 assertLookup(detector=2, timespan=Timespan(t2, t5), expected=Ambiguous) 

1614 assertLookup(detector=2, timespan=Timespan(t2, None), expected=Ambiguous) 

1615 assertLookup(detector=2, timespan=Timespan(t3, t4), expected=bias2a) 

1616 assertLookup(detector=2, timespan=Timespan(t3, t5), expected=Ambiguous) 

1617 assertLookup(detector=2, timespan=Timespan(t3, None), expected=Ambiguous) 

1618 assertLookup(detector=2, timespan=Timespan(t4, t5), expected=bias2b) 

1619 assertLookup(detector=2, timespan=Timespan(t4, None), expected=bias2b) 

1620 assertLookup(detector=2, timespan=Timespan(t5, None), expected=bias2b) 

1621 assertLookup(detector=3, timespan=Timespan(None, t1), expected=None) 

1622 assertLookup(detector=3, timespan=Timespan(None, t2), expected=bias3a) 

1623 assertLookup(detector=3, timespan=Timespan(None, t3), expected=bias3a) 

1624 assertLookup(detector=3, timespan=Timespan(None, t4), expected=bias3a) 

1625 assertLookup(detector=3, timespan=Timespan(None, t5), expected=Ambiguous) 

1626 assertLookup(detector=3, timespan=Timespan(None, None), expected=Ambiguous) 

1627 assertLookup(detector=3, timespan=Timespan(t1, t2), expected=bias3a) 

1628 assertLookup(detector=3, timespan=Timespan(t1, t3), expected=bias3a) 

1629 assertLookup(detector=3, timespan=Timespan(t1, t4), expected=bias3a) 

1630 assertLookup(detector=3, timespan=Timespan(t1, t5), expected=Ambiguous) 

1631 assertLookup(detector=3, timespan=Timespan(t1, None), expected=Ambiguous) 

1632 assertLookup(detector=3, timespan=Timespan(t2, t3), expected=bias3a) 

1633 assertLookup(detector=3, timespan=Timespan(t2, t4), expected=bias3a) 

1634 assertLookup(detector=3, timespan=Timespan(t2, t5), expected=Ambiguous) 

1635 assertLookup(detector=3, timespan=Timespan(t2, None), expected=Ambiguous) 

1636 assertLookup(detector=3, timespan=Timespan(t3, t4), expected=None) 

1637 assertLookup(detector=3, timespan=Timespan(t3, t5), expected=bias3b) 

1638 assertLookup(detector=3, timespan=Timespan(t3, None), expected=bias3b) 

1639 assertLookup(detector=3, timespan=Timespan(t4, t5), expected=bias3b) 

1640 assertLookup(detector=3, timespan=Timespan(t4, None), expected=bias3b) 

1641 assertLookup(detector=3, timespan=Timespan(t5, None), expected=bias3b) 

1642 

1643 # Decertify [t3, t5) for all data IDs, and do test lookups again. 

1644 # This should truncate bias2a to [t2, t3), leave bias3a unchanged at 

1645 # [t1, t3), and truncate bias2b and bias3b to [t5, ∞). 

1646 registry.decertify(collection=collection, datasetType="bias", timespan=Timespan(t3, t5)) 

1647 assertLookup(detector=2, timespan=Timespan(None, t1), expected=None) 

1648 assertLookup(detector=2, timespan=Timespan(None, t2), expected=None) 

1649 assertLookup(detector=2, timespan=Timespan(None, t3), expected=bias2a) 

1650 assertLookup(detector=2, timespan=Timespan(None, t4), expected=bias2a) 

1651 assertLookup(detector=2, timespan=Timespan(None, t5), expected=bias2a) 

1652 assertLookup(detector=2, timespan=Timespan(None, None), expected=Ambiguous) 

1653 assertLookup(detector=2, timespan=Timespan(t1, t2), expected=None) 

1654 assertLookup(detector=2, timespan=Timespan(t1, t3), expected=bias2a) 

1655 assertLookup(detector=2, timespan=Timespan(t1, t4), expected=bias2a) 

1656 assertLookup(detector=2, timespan=Timespan(t1, t5), expected=bias2a) 

1657 assertLookup(detector=2, timespan=Timespan(t1, None), expected=Ambiguous) 

1658 assertLookup(detector=2, timespan=Timespan(t2, t3), expected=bias2a) 

1659 assertLookup(detector=2, timespan=Timespan(t2, t4), expected=bias2a) 

1660 assertLookup(detector=2, timespan=Timespan(t2, t5), expected=bias2a) 

1661 assertLookup(detector=2, timespan=Timespan(t2, None), expected=Ambiguous) 

1662 assertLookup(detector=2, timespan=Timespan(t3, t4), expected=None) 

1663 assertLookup(detector=2, timespan=Timespan(t3, t5), expected=None) 

1664 assertLookup(detector=2, timespan=Timespan(t3, None), expected=bias2b) 

1665 assertLookup(detector=2, timespan=Timespan(t4, t5), expected=None) 

1666 assertLookup(detector=2, timespan=Timespan(t4, None), expected=bias2b) 

1667 assertLookup(detector=2, timespan=Timespan(t5, None), expected=bias2b) 

1668 assertLookup(detector=3, timespan=Timespan(None, t1), expected=None) 

1669 assertLookup(detector=3, timespan=Timespan(None, t2), expected=bias3a) 

1670 assertLookup(detector=3, timespan=Timespan(None, t3), expected=bias3a) 

1671 assertLookup(detector=3, timespan=Timespan(None, t4), expected=bias3a) 

1672 assertLookup(detector=3, timespan=Timespan(None, t5), expected=bias3a) 

1673 assertLookup(detector=3, timespan=Timespan(None, None), expected=Ambiguous) 

1674 assertLookup(detector=3, timespan=Timespan(t1, t2), expected=bias3a) 

1675 assertLookup(detector=3, timespan=Timespan(t1, t3), expected=bias3a) 

1676 assertLookup(detector=3, timespan=Timespan(t1, t4), expected=bias3a) 

1677 assertLookup(detector=3, timespan=Timespan(t1, t5), expected=bias3a) 

1678 assertLookup(detector=3, timespan=Timespan(t1, None), expected=Ambiguous) 

1679 assertLookup(detector=3, timespan=Timespan(t2, t3), expected=bias3a) 

1680 assertLookup(detector=3, timespan=Timespan(t2, t4), expected=bias3a) 

1681 assertLookup(detector=3, timespan=Timespan(t2, t5), expected=bias3a) 

1682 assertLookup(detector=3, timespan=Timespan(t2, None), expected=Ambiguous) 

1683 assertLookup(detector=3, timespan=Timespan(t3, t4), expected=None) 

1684 assertLookup(detector=3, timespan=Timespan(t3, t5), expected=None) 

1685 assertLookup(detector=3, timespan=Timespan(t3, None), expected=bias3b) 

1686 assertLookup(detector=3, timespan=Timespan(t4, t5), expected=None) 

1687 assertLookup(detector=3, timespan=Timespan(t4, None), expected=bias3b) 

1688 assertLookup(detector=3, timespan=Timespan(t5, None), expected=bias3b) 

1689 

1690 # Decertify everything, this time with explicit data IDs, then check 

1691 # that no lookups succeed. 

1692 registry.decertify( 

1693 collection, "bias", Timespan(None, None), 

1694 dataIds=[ 

1695 dict(instrument="Cam1", detector=2), 

1696 dict(instrument="Cam1", detector=3), 

1697 ] 

1698 ) 

1699 for detector in (2, 3): 

1700 for timespan in allTimespans: 

1701 assertLookup(detector=detector, timespan=timespan, expected=None) 

1702 # Certify bias2a and bias3a over (-∞, ∞), check that all lookups return 

1703 # those. 

1704 registry.certify(collection, [bias2a, bias3a], Timespan(None, None),) 

1705 for timespan in allTimespans: 

1706 assertLookup(detector=2, timespan=timespan, expected=bias2a) 

1707 assertLookup(detector=3, timespan=timespan, expected=bias3a) 

1708 # Decertify just bias2 over [t2, t4). 

1709 # This should split a single certification row into two (and leave the 

1710 # other existing row, for bias3a, alone). 

1711 registry.decertify(collection, "bias", Timespan(t2, t4), 

1712 dataIds=[dict(instrument="Cam1", detector=2)]) 

1713 for timespan in allTimespans: 

1714 assertLookup(detector=3, timespan=timespan, expected=bias3a) 

1715 overlapsBefore = timespan.overlaps(Timespan(None, t2)) 

1716 overlapsAfter = timespan.overlaps(Timespan(t4, None)) 

1717 if overlapsBefore and overlapsAfter: 

1718 expected = Ambiguous 

1719 elif overlapsBefore or overlapsAfter: 

1720 expected = bias2a 

1721 else: 

1722 expected = None 

1723 assertLookup(detector=2, timespan=timespan, expected=expected) 

1724 

1725 def testIngestTimeQuery(self): 

1726 

1727 registry = self.makeRegistry() 

1728 self.loadData(registry, "base.yaml") 

1729 self.loadData(registry, "datasets.yaml") 

1730 

1731 datasets = list(registry.queryDatasets(..., collections=...)) 

1732 len0 = len(datasets) 

1733 self.assertGreater(len0, 0) 

1734 

1735 where = "ingest_date > T'2000-01-01'" 

1736 datasets = list(registry.queryDatasets(..., collections=..., where=where)) 

1737 len1 = len(datasets) 

1738 self.assertEqual(len0, len1) 

1739 

1740 # no one will ever use this piece of software in 30 years 

1741 where = "ingest_date > T'2050-01-01'" 

1742 datasets = list(registry.queryDatasets(..., collections=..., where=where)) 

1743 len2 = len(datasets) 

1744 self.assertEqual(len2, 0) 

1745 

1746 def testTimespanQueries(self): 

1747 """Test query expressions involving timespans. 

1748 """ 

1749 registry = self.makeRegistry() 

1750 self.loadData(registry, "hsc-rc2-subset.yaml") 

1751 # All exposures in the database; mapping from ID to timespan. 

1752 visits = {record.id: record.timespan for record in registry.queryDimensionRecords("visit")} 

1753 # Just those IDs, sorted (which is also temporal sorting, because HSC 

1754 # exposure IDs are monotonically increasing). 

1755 ids = sorted(visits.keys()) 

1756 self.assertGreater(len(ids), 20) 

1757 # Pick some quasi-random indexes into `ids` to play with. 

1758 i1 = int(len(ids)*0.1) 

1759 i2 = int(len(ids)*0.3) 

1760 i3 = int(len(ids)*0.6) 

1761 i4 = int(len(ids)*0.8) 

1762 # Extract some times from those: just before the beginning of i1 (which 

1763 # should be after the end of the exposure before), exactly the 

1764 # beginning of i2, just after the beginning of i3 (and before its end), 

1765 # and the exact end of i4. 

1766 t1 = visits[ids[i1]].begin - astropy.time.TimeDelta(1.0, format="sec") 

1767 self.assertGreater(t1, visits[ids[i1 - 1]].end) 

1768 t2 = visits[ids[i2]].begin 

1769 t3 = visits[ids[i3]].begin + astropy.time.TimeDelta(1.0, format="sec") 

1770 self.assertLess(t3, visits[ids[i3]].end) 

1771 t4 = visits[ids[i4]].end 

1772 # Make sure those are actually in order. 

1773 self.assertEqual([t1, t2, t3, t4], sorted([t4, t3, t2, t1])) 

1774 

1775 bind = { 

1776 "t1": t1, 

1777 "t2": t2, 

1778 "t3": t3, 

1779 "t4": t4, 

1780 "ts23": Timespan(t2, t3), 

1781 } 

1782 

1783 def query(where): 

1784 """Helper function that queries for visit data IDs and returns 

1785 results as a sorted, deduplicated list of visit IDs. 

1786 """ 

1787 return sorted( 

1788 {dataId["visit"] for dataId in registry.queryDataIds("visit", 

1789 instrument="HSC", 

1790 bind=bind, 

1791 where=where)} 

1792 ) 

1793 

1794 # Try a bunch of timespan queries, mixing up the bounds themselves, 

1795 # where they appear in the expression, and how we get the timespan into 

1796 # the expression. 

1797 

1798 # t1 is before the start of i1, so this should not include i1. 

1799 self.assertEqual(ids[:i1], query("visit.timespan OVERLAPS (null, t1)")) 

1800 # t2 is exactly at the start of i2, but ends are exclusive, so these 

1801 # should not include i2. 

1802 self.assertEqual(ids[i1:i2], query("(t1, t2) OVERLAPS visit.timespan")) 

1803 self.assertEqual(ids[:i2], query("visit.timespan < (t2, t4)")) 

1804 # t3 is in the middle of i3, so this should include i3. 

1805 self.assertEqual(ids[i2:i3 + 1], query("visit.timespan OVERLAPS ts23")) 

1806 # This one should not include t3 by the same reasoning. 

1807 self.assertEqual(ids[i3 + 1:], query("visit.timespan > (t1, t3)")) 

1808 # t4 is exactly at the end of i4, so this should include i4. 

1809 self.assertEqual(ids[i3:i4 + 1], query(f"visit.timespan OVERLAPS (T'{t3.tai.isot}', t4)")) 

1810 # i4's upper bound of t4 is exclusive so this should not include t4. 

1811 self.assertEqual(ids[i4 + 1:], query("visit.timespan OVERLAPS (t4, NULL)")) 

1812 

1813 # Now some timespan vs. time scalar queries. 

1814 self.assertEqual(ids[:i2], query("visit.timespan < t2")) 

1815 self.assertEqual(ids[:i2], query("t2 > visit.timespan")) 

1816 self.assertEqual(ids[i3 + 1:], query("visit.timespan > t3")) 

1817 self.assertEqual(ids[i3 + 1:], query("t3 < visit.timespan")) 

1818 self.assertEqual(ids[i3:i3+1], query("visit.timespan OVERLAPS t3")) 

1819 self.assertEqual(ids[i3:i3+1], query(f"T'{t3.tai.isot}' OVERLAPS visit.timespan")) 

1820 

1821 # Empty timespans should not overlap anything. 

1822 self.assertEqual([], query("visit.timespan OVERLAPS (t3, t2)")) 

1823 

1824 def testCollectionSummaries(self): 

1825 """Test recording and retrieval of collection summaries. 

1826 """ 

1827 self.maxDiff = None 

1828 registry = self.makeRegistry() 

1829 # Importing datasets from yaml should go through the code path where 

1830 # we update collection summaries as we insert datasets. 

1831 self.loadData(registry, "base.yaml") 

1832 self.loadData(registry, "datasets.yaml") 

1833 flat = registry.getDatasetType("flat") 

1834 expected1 = CollectionSummary.makeEmpty(registry.dimensions) 

1835 expected1.datasetTypes.add(registry.getDatasetType("bias")) 

1836 expected1.datasetTypes.add(flat) 

1837 expected1.dimensions.update_extract( 

1838 DataCoordinate.standardize(instrument="Cam1", universe=registry.dimensions) 

1839 ) 

1840 self.assertEqual(registry.getCollectionSummary("imported_g"), expected1) 

1841 self.assertEqual(registry.getCollectionSummary("imported_r"), expected1) 

1842 # Create a chained collection with both of the imported runs; the 

1843 # summary should be the same, because it's a union with itself. 

1844 chain = "chain" 

1845 registry.registerCollection(chain, CollectionType.CHAINED) 

1846 registry.setCollectionChain(chain, ["imported_r", "imported_g"]) 

1847 self.assertEqual(registry.getCollectionSummary(chain), expected1) 

1848 # Associate flats only into a tagged collection and a calibration 

1849 # collection to check summaries of those. 

1850 tag = "tag" 

1851 registry.registerCollection(tag, CollectionType.TAGGED) 

1852 registry.associate(tag, registry.queryDatasets(flat, collections="imported_g")) 

1853 calibs = "calibs" 

1854 registry.registerCollection(calibs, CollectionType.CALIBRATION) 

1855 registry.certify(calibs, registry.queryDatasets(flat, collections="imported_g"), 

1856 timespan=Timespan(None, None)) 

1857 expected2 = expected1.copy() 

1858 expected2.datasetTypes.discard("bias") 

1859 self.assertEqual(registry.getCollectionSummary(tag), expected2) 

1860 self.assertEqual(registry.getCollectionSummary(calibs), expected2) 

1861 # Explicitly calling Registry.refresh() should load those same 

1862 # summaries, via a totally different code path. 

1863 registry.refresh() 

1864 self.assertEqual(registry.getCollectionSummary("imported_g"), expected1) 

1865 self.assertEqual(registry.getCollectionSummary("imported_r"), expected1) 

1866 self.assertEqual(registry.getCollectionSummary(tag), expected2) 

1867 self.assertEqual(registry.getCollectionSummary(calibs), expected2) 

1868 

1869 def testUnrelatedDimensionQueries(self): 

1870 """Test that WHERE expressions in queries can reference dimensions that 

1871 are not in the result set. 

1872 """ 

1873 registry = self.makeRegistry() 

1874 # There is no data to back this query, but it should still return 

1875 # zero records instead of raising. 

1876 self.assertFalse( 

1877 set(registry.queryDataIds(["visit", "detector"], 

1878 where="instrument='Cam1' AND skymap='not_here' AND tract=0")), 

1879 )