Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = ["RegistryTests"] 

24 

25from abc import ABC, abstractmethod 

26from collections import defaultdict 

27import itertools 

28import logging 

29import os 

30import re 

31import unittest 

32 

33import astropy.time 

34import sqlalchemy 

35from typing import Optional, Type, Union, TYPE_CHECKING 

36 

37try: 

38 import numpy as np 

39except ImportError: 

40 np = None 

41 

42from ...core import ( 

43 DataCoordinate, 

44 DataCoordinateSequence, 

45 DataCoordinateSet, 

46 DatasetAssociation, 

47 DatasetRef, 

48 DatasetType, 

49 DimensionGraph, 

50 NamedValueSet, 

51 StorageClass, 

52 ddl, 

53 Timespan, 

54) 

55from ..summaries import CollectionSummary 

56from .._collectionType import CollectionType 

57from .._config import RegistryConfig 

58 

59from .._exceptions import ( 

60 ConflictingDefinitionError, 

61 InconsistentDataIdError, 

62 MissingCollectionError, 

63 OrphanedRecordError, 

64) 

65from ..interfaces import ButlerAttributeExistsError 

66 

67if TYPE_CHECKING: 67 ↛ 68line 67 didn't jump to line 68, because the condition on line 67 was never true

68 from .._registry import Registry 

69 

70 

71class RegistryTests(ABC): 

72 """Generic tests for the `Registry` class that can be subclassed to 

73 generate tests for different configurations. 

74 """ 

75 

76 collectionsManager: Optional[str] = None 

77 """Name of the collections manager class, if subclass provides value for 

78 this member then it overrides name specified in default configuration 

79 (`str`). 

80 """ 

81 

82 @classmethod 

83 @abstractmethod 

84 def getDataDir(cls) -> str: 

85 """Return the root directory containing test data YAML files. 

86 """ 

87 raise NotImplementedError() 

88 

89 def makeRegistryConfig(self) -> RegistryConfig: 

90 """Create RegistryConfig used to create a registry. 

91 

92 This method should be called by a subclass from `makeRegistry`. 

93 Returned instance will be pre-configured based on the values of class 

94 members, and default-configured for all other parametrs. Subclasses 

95 that need default configuration should just instantiate 

96 `RegistryConfig` directly. 

97 """ 

98 config = RegistryConfig() 

99 if self.collectionsManager: 

100 config["managers"]["collections"] = self.collectionsManager 

101 return config 

102 

103 @abstractmethod 

104 def makeRegistry(self) -> Registry: 

105 """Return the Registry instance to be tested. 

106 """ 

107 raise NotImplementedError() 

108 

109 def loadData(self, registry: Registry, filename: str): 

110 """Load registry test data from ``getDataDir/<filename>``, 

111 which should be a YAML import/export file. 

112 """ 

113 from ...transfers import YamlRepoImportBackend 

114 with open(os.path.join(self.getDataDir(), filename), 'r') as stream: 

115 backend = YamlRepoImportBackend(stream, registry) 

116 backend.register() 

117 backend.load(datastore=None) 

118 

119 def testOpaque(self): 

120 """Tests for `Registry.registerOpaqueTable`, 

121 `Registry.insertOpaqueData`, `Registry.fetchOpaqueData`, and 

122 `Registry.deleteOpaqueData`. 

123 """ 

124 registry = self.makeRegistry() 

125 table = "opaque_table_for_testing" 

126 registry.registerOpaqueTable( 

127 table, 

128 spec=ddl.TableSpec( 

129 fields=[ 

130 ddl.FieldSpec("id", dtype=sqlalchemy.BigInteger, primaryKey=True), 

131 ddl.FieldSpec("name", dtype=sqlalchemy.String, length=16, nullable=False), 

132 ddl.FieldSpec("count", dtype=sqlalchemy.SmallInteger, nullable=True), 

133 ], 

134 ) 

135 ) 

136 rows = [ 

137 {"id": 1, "name": "one", "count": None}, 

138 {"id": 2, "name": "two", "count": 5}, 

139 {"id": 3, "name": "three", "count": 6}, 

140 ] 

141 registry.insertOpaqueData(table, *rows) 

142 self.assertCountEqual(rows, list(registry.fetchOpaqueData(table))) 

143 self.assertEqual(rows[0:1], list(registry.fetchOpaqueData(table, id=1))) 

144 self.assertEqual(rows[1:2], list(registry.fetchOpaqueData(table, name="two"))) 

145 self.assertEqual([], list(registry.fetchOpaqueData(table, id=1, name="two"))) 

146 registry.deleteOpaqueData(table, id=3) 

147 self.assertCountEqual(rows[:2], list(registry.fetchOpaqueData(table))) 

148 registry.deleteOpaqueData(table) 

149 self.assertEqual([], list(registry.fetchOpaqueData(table))) 

150 

151 def testDatasetType(self): 

152 """Tests for `Registry.registerDatasetType` and 

153 `Registry.getDatasetType`. 

154 """ 

155 registry = self.makeRegistry() 

156 # Check valid insert 

157 datasetTypeName = "test" 

158 storageClass = StorageClass("testDatasetType") 

159 registry.storageClasses.registerStorageClass(storageClass) 

160 dimensions = registry.dimensions.extract(("instrument", "visit")) 

161 differentDimensions = registry.dimensions.extract(("instrument", "patch")) 

162 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

163 # Inserting for the first time should return True 

164 self.assertTrue(registry.registerDatasetType(inDatasetType)) 

165 outDatasetType1 = registry.getDatasetType(datasetTypeName) 

166 self.assertEqual(outDatasetType1, inDatasetType) 

167 

168 # Re-inserting should work 

169 self.assertFalse(registry.registerDatasetType(inDatasetType)) 

170 # Except when they are not identical 

171 with self.assertRaises(ConflictingDefinitionError): 

172 nonIdenticalDatasetType = DatasetType(datasetTypeName, differentDimensions, storageClass) 

173 registry.registerDatasetType(nonIdenticalDatasetType) 

174 

175 # Template can be None 

176 datasetTypeName = "testNoneTemplate" 

177 storageClass = StorageClass("testDatasetType2") 

178 registry.storageClasses.registerStorageClass(storageClass) 

179 dimensions = registry.dimensions.extract(("instrument", "visit")) 

180 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

181 registry.registerDatasetType(inDatasetType) 

182 outDatasetType2 = registry.getDatasetType(datasetTypeName) 

183 self.assertEqual(outDatasetType2, inDatasetType) 

184 

185 allTypes = set(registry.queryDatasetTypes()) 

186 self.assertEqual(allTypes, {outDatasetType1, outDatasetType2}) 

187 

188 def testDimensions(self): 

189 """Tests for `Registry.insertDimensionData`, 

190 `Registry.syncDimensionData`, and `Registry.expandDataId`. 

191 """ 

192 registry = self.makeRegistry() 

193 dimensionName = "instrument" 

194 dimension = registry.dimensions[dimensionName] 

195 dimensionValue = {"name": "DummyCam", "visit_max": 10, "exposure_max": 10, "detector_max": 2, 

196 "class_name": "lsst.obs.base.Instrument"} 

197 registry.insertDimensionData(dimensionName, dimensionValue) 

198 # Inserting the same value twice should fail 

199 with self.assertRaises(sqlalchemy.exc.IntegrityError): 

200 registry.insertDimensionData(dimensionName, dimensionValue) 

201 # expandDataId should retrieve the record we just inserted 

202 self.assertEqual( 

203 registry.expandDataId( 

204 instrument="DummyCam", 

205 graph=dimension.graph 

206 ).records[dimensionName].toDict(), 

207 dimensionValue 

208 ) 

209 # expandDataId should raise if there is no record with the given ID. 

210 with self.assertRaises(LookupError): 

211 registry.expandDataId({"instrument": "Unknown"}, graph=dimension.graph) 

212 # band doesn't have a table; insert should fail. 

213 with self.assertRaises(TypeError): 

214 registry.insertDimensionData("band", {"band": "i"}) 

215 dimensionName2 = "physical_filter" 

216 dimension2 = registry.dimensions[dimensionName2] 

217 dimensionValue2 = {"name": "DummyCam_i", "band": "i"} 

218 # Missing required dependency ("instrument") should fail 

219 with self.assertRaises(KeyError): 

220 registry.insertDimensionData(dimensionName2, dimensionValue2) 

221 # Adding required dependency should fix the failure 

222 dimensionValue2["instrument"] = "DummyCam" 

223 registry.insertDimensionData(dimensionName2, dimensionValue2) 

224 # expandDataId should retrieve the record we just inserted. 

225 self.assertEqual( 

226 registry.expandDataId( 

227 instrument="DummyCam", physical_filter="DummyCam_i", 

228 graph=dimension2.graph 

229 ).records[dimensionName2].toDict(), 

230 dimensionValue2 

231 ) 

232 # Use syncDimensionData to insert a new record successfully. 

233 dimensionName3 = "detector" 

234 dimensionValue3 = {"instrument": "DummyCam", "id": 1, "full_name": "one", 

235 "name_in_raft": "zero", "purpose": "SCIENCE"} 

236 self.assertTrue(registry.syncDimensionData(dimensionName3, dimensionValue3)) 

237 # Sync that again. Note that one field ("raft") is NULL, and that 

238 # should be okay. 

239 self.assertFalse(registry.syncDimensionData(dimensionName3, dimensionValue3)) 

240 # Now try that sync with the same primary key but a different value. 

241 # This should fail. 

242 with self.assertRaises(ConflictingDefinitionError): 

243 registry.syncDimensionData( 

244 dimensionName3, 

245 {"instrument": "DummyCam", "id": 1, "full_name": "one", 

246 "name_in_raft": "four", "purpose": "SCIENCE"} 

247 ) 

248 

249 @unittest.skipIf(np is None, "numpy not available.") 

250 def testNumpyDataId(self): 

251 """Test that we can use a numpy int in a dataId.""" 

252 registry = self.makeRegistry() 

253 dimensionEntries = [ 

254 ("instrument", {"instrument": "DummyCam"}), 

255 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}), 

256 # Using an np.int64 here fails unless Records.fromDict is also 

257 # patched to look for numbers.Integral 

258 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}), 

259 ] 

260 for args in dimensionEntries: 

261 registry.insertDimensionData(*args) 

262 

263 # Try a normal integer and something that looks like an int but 

264 # is not. 

265 for visit_id in (42, np.int64(42)): 

266 with self.subTest(visit_id=visit_id, id_type=type(visit_id).__name__): 

267 expanded = registry.expandDataId({"instrument": "DummyCam", "visit": visit_id}) 

268 self.assertEqual(expanded["visit"], int(visit_id)) 

269 self.assertIsInstance(expanded["visit"], int) 

270 

271 def testDataIdRelationships(self): 

272 """Test that `Registry.expandDataId` raises an exception when the given 

273 keys are inconsistent. 

274 """ 

275 registry = self.makeRegistry() 

276 self.loadData(registry, "base.yaml") 

277 # Insert a few more dimension records for the next test. 

278 registry.insertDimensionData( 

279 "exposure", 

280 {"instrument": "Cam1", "id": 1, "obs_id": "one", "physical_filter": "Cam1-G"}, 

281 ) 

282 registry.insertDimensionData( 

283 "exposure", 

284 {"instrument": "Cam1", "id": 2, "obs_id": "two", "physical_filter": "Cam1-G"}, 

285 ) 

286 registry.insertDimensionData( 

287 "visit_system", 

288 {"instrument": "Cam1", "id": 0, "name": "one-to-one"}, 

289 ) 

290 registry.insertDimensionData( 

291 "visit", 

292 {"instrument": "Cam1", "id": 1, "name": "one", "physical_filter": "Cam1-G", "visit_system": 0}, 

293 ) 

294 registry.insertDimensionData( 

295 "visit_definition", 

296 {"instrument": "Cam1", "visit": 1, "exposure": 1, "visit_system": 0}, 

297 ) 

298 with self.assertRaises(InconsistentDataIdError): 

299 registry.expandDataId( 

300 {"instrument": "Cam1", "visit": 1, "exposure": 2}, 

301 ) 

302 

303 def testDataset(self): 

304 """Basic tests for `Registry.insertDatasets`, `Registry.getDataset`, 

305 and `Registry.removeDatasets`. 

306 """ 

307 registry = self.makeRegistry() 

308 self.loadData(registry, "base.yaml") 

309 run = "test" 

310 registry.registerRun(run) 

311 datasetType = registry.getDatasetType("bias") 

312 dataId = {"instrument": "Cam1", "detector": 2} 

313 ref, = registry.insertDatasets(datasetType, dataIds=[dataId], run=run) 

314 outRef = registry.getDataset(ref.id) 

315 self.assertIsNotNone(ref.id) 

316 self.assertEqual(ref, outRef) 

317 with self.assertRaises(ConflictingDefinitionError): 

318 registry.insertDatasets(datasetType, dataIds=[dataId], run=run) 

319 registry.removeDatasets([ref]) 

320 self.assertIsNone(registry.findDataset(datasetType, dataId, collections=[run])) 

321 

322 def testFindDataset(self): 

323 """Tests for `Registry.findDataset`. 

324 """ 

325 registry = self.makeRegistry() 

326 self.loadData(registry, "base.yaml") 

327 run = "test" 

328 datasetType = registry.getDatasetType("bias") 

329 dataId = {"instrument": "Cam1", "detector": 4} 

330 registry.registerRun(run) 

331 inputRef, = registry.insertDatasets(datasetType, dataIds=[dataId], run=run) 

332 outputRef = registry.findDataset(datasetType, dataId, collections=[run]) 

333 self.assertEqual(outputRef, inputRef) 

334 # Check that retrieval with invalid dataId raises 

335 with self.assertRaises(LookupError): 

336 dataId = {"instrument": "Cam1"} # no detector 

337 registry.findDataset(datasetType, dataId, collections=run) 

338 # Check that different dataIds match to different datasets 

339 dataId1 = {"instrument": "Cam1", "detector": 1} 

340 inputRef1, = registry.insertDatasets(datasetType, dataIds=[dataId1], run=run) 

341 dataId2 = {"instrument": "Cam1", "detector": 2} 

342 inputRef2, = registry.insertDatasets(datasetType, dataIds=[dataId2], run=run) 

343 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef1) 

344 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef2) 

345 self.assertNotEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef2) 

346 self.assertNotEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef1) 

347 # Check that requesting a non-existing dataId returns None 

348 nonExistingDataId = {"instrument": "Cam1", "detector": 3} 

349 self.assertIsNone(registry.findDataset(datasetType, nonExistingDataId, collections=run)) 

350 

351 def testRemoveDatasetTypeSuccess(self): 

352 """Test that Registry.removeDatasetType works when there are no 

353 datasets of that type present. 

354 """ 

355 registry = self.makeRegistry() 

356 self.loadData(registry, "base.yaml") 

357 registry.removeDatasetType("flat") 

358 with self.assertRaises(KeyError): 

359 registry.getDatasetType("flat") 

360 

361 def testRemoveDatasetTypeFailure(self): 

362 """Test that Registry.removeDatasetType raises when there are datasets 

363 of that type present or if the dataset type is for a component. 

364 """ 

365 registry = self.makeRegistry() 

366 self.loadData(registry, "base.yaml") 

367 self.loadData(registry, "datasets.yaml") 

368 with self.assertRaises(OrphanedRecordError): 

369 registry.removeDatasetType("flat") 

370 with self.assertRaises(ValueError): 

371 registry.removeDatasetType(DatasetType.nameWithComponent("flat", "image")) 

372 

373 def testDatasetTypeComponentQueries(self): 

374 """Test component options when querying for dataset types. 

375 """ 

376 registry = self.makeRegistry() 

377 self.loadData(registry, "base.yaml") 

378 self.loadData(registry, "datasets.yaml") 

379 # Test querying for dataset types with different inputs. 

380 # First query for all dataset types; components should only be included 

381 # when components=True. 

382 self.assertEqual( 

383 {"bias", "flat"}, 

384 NamedValueSet(registry.queryDatasetTypes()).names 

385 ) 

386 self.assertEqual( 

387 {"bias", "flat"}, 

388 NamedValueSet(registry.queryDatasetTypes(components=False)).names 

389 ) 

390 self.assertLess( 

391 {"bias", "flat", "bias.wcs", "flat.photoCalib"}, 

392 NamedValueSet(registry.queryDatasetTypes(components=True)).names 

393 ) 

394 # Use a pattern that can match either parent or components. Again, 

395 # components are only returned if components=True. 

396 self.assertEqual( 

397 {"bias"}, 

398 NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"))).names 

399 ) 

400 self.assertEqual( 

401 {"bias"}, 

402 NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"), components=False)).names 

403 ) 

404 self.assertLess( 

405 {"bias", "bias.wcs"}, 

406 NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"), components=True)).names 

407 ) 

408 # This pattern matches only a component. In this case we also return 

409 # that component dataset type if components=None. 

410 self.assertEqual( 

411 {"bias.wcs"}, 

412 NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"))).names 

413 ) 

414 self.assertEqual( 

415 set(), 

416 NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"), components=False)).names 

417 ) 

418 self.assertEqual( 

419 {"bias.wcs"}, 

420 NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"), components=True)).names 

421 ) 

422 # Add a dataset type using a StorageClass that we'll then remove; check 

423 # that this does not affect our ability to query for dataset types 

424 # (though it will warn). 

425 tempStorageClass = StorageClass( 

426 name="TempStorageClass", 

427 components={"data", registry.storageClasses.getStorageClass("StructuredDataDict")} 

428 ) 

429 registry.storageClasses.registerStorageClass(tempStorageClass) 

430 datasetType = DatasetType("temporary", dimensions=["instrument"], storageClass=tempStorageClass, 

431 universe=registry.dimensions) 

432 registry.registerDatasetType(datasetType) 

433 registry.storageClasses._unregisterStorageClass(tempStorageClass.name) 

434 datasetType._storageClass = None 

435 del tempStorageClass 

436 # Querying for all dataset types, including components, should include 

437 # at least all non-component dataset types (and I don't want to 

438 # enumerate all of the Exposure components for bias and flat here). 

439 with self.assertLogs("lsst.daf.butler.registry._sqlRegistry", logging.WARN) as cm: 

440 everything = NamedValueSet(registry.queryDatasetTypes(components=True)) 

441 self.assertIn("TempStorageClass", cm.output[0]) 

442 self.assertLess({"bias", "flat", "temporary"}, everything.names) 

443 # It should not include "temporary.columns", because we tried to remove 

444 # the storage class that would tell it about that. So if the next line 

445 # fails (i.e. "temporary.columns" _is_ in everything.names), it means 

446 # this part of the test isn't doing anything, because the _unregister 

447 # call about isn't simulating the real-life case we want it to 

448 # simulate, in which different versions of daf_butler in entirely 

449 # different Python processes interact with the same repo. 

450 self.assertNotIn("temporary.data", everything.names) 

451 # Query for dataset types that start with "temp". This should again 

452 # not include the component, and also not fail. 

453 with self.assertLogs("lsst.daf.butler.registry._sqlRegistry", logging.WARN) as cm: 

454 startsWithTemp = NamedValueSet(registry.queryDatasetTypes(re.compile("temp.*"))) 

455 self.assertIn("TempStorageClass", cm.output[0]) 

456 self.assertEqual({"temporary"}, startsWithTemp.names) 

457 

458 def testComponentLookups(self): 

459 """Test searching for component datasets via their parents. 

460 """ 

461 registry = self.makeRegistry() 

462 self.loadData(registry, "base.yaml") 

463 self.loadData(registry, "datasets.yaml") 

464 # Test getting the child dataset type (which does still exist in the 

465 # Registry), and check for consistency with 

466 # DatasetRef.makeComponentRef. 

467 collection = "imported_g" 

468 parentType = registry.getDatasetType("bias") 

469 childType = registry.getDatasetType("bias.wcs") 

470 parentRefResolved = registry.findDataset(parentType, collections=collection, 

471 instrument="Cam1", detector=1) 

472 self.assertIsInstance(parentRefResolved, DatasetRef) 

473 self.assertEqual(childType, parentRefResolved.makeComponentRef("wcs").datasetType) 

474 # Search for a single dataset with findDataset. 

475 childRef1 = registry.findDataset("bias.wcs", collections=collection, 

476 dataId=parentRefResolved.dataId) 

477 self.assertEqual(childRef1, parentRefResolved.makeComponentRef("wcs")) 

478 # Search for detector data IDs constrained by component dataset 

479 # existence with queryDataIds. 

480 dataIds = registry.queryDataIds( 

481 ["detector"], 

482 datasets=["bias.wcs"], 

483 collections=collection, 

484 ).toSet() 

485 self.assertEqual( 

486 dataIds, 

487 DataCoordinateSet( 

488 { 

489 DataCoordinate.standardize(instrument="Cam1", detector=d, graph=parentType.dimensions) 

490 for d in (1, 2, 3) 

491 }, 

492 parentType.dimensions, 

493 ) 

494 ) 

495 # Search for multiple datasets of a single type with queryDatasets. 

496 childRefs2 = set(registry.queryDatasets( 

497 "bias.wcs", 

498 collections=collection, 

499 )) 

500 self.assertEqual( 

501 {ref.unresolved() for ref in childRefs2}, 

502 {DatasetRef(childType, dataId) for dataId in dataIds} 

503 ) 

504 

505 def testCollections(self): 

506 """Tests for registry methods that manage collections. 

507 """ 

508 registry = self.makeRegistry() 

509 self.loadData(registry, "base.yaml") 

510 self.loadData(registry, "datasets.yaml") 

511 run1 = "imported_g" 

512 run2 = "imported_r" 

513 # Test setting a collection docstring after it has been created. 

514 registry.setCollectionDocumentation(run1, "doc for run1") 

515 self.assertEqual(registry.getCollectionDocumentation(run1), "doc for run1") 

516 registry.setCollectionDocumentation(run1, None) 

517 self.assertIsNone(registry.getCollectionDocumentation(run1)) 

518 datasetType = "bias" 

519 # Find some datasets via their run's collection. 

520 dataId1 = {"instrument": "Cam1", "detector": 1} 

521 ref1 = registry.findDataset(datasetType, dataId1, collections=run1) 

522 self.assertIsNotNone(ref1) 

523 dataId2 = {"instrument": "Cam1", "detector": 2} 

524 ref2 = registry.findDataset(datasetType, dataId2, collections=run1) 

525 self.assertIsNotNone(ref2) 

526 # Associate those into a new collection,then look for them there. 

527 tag1 = "tag1" 

528 registry.registerCollection(tag1, type=CollectionType.TAGGED, doc="doc for tag1") 

529 self.assertEqual(registry.getCollectionDocumentation(tag1), "doc for tag1") 

530 registry.associate(tag1, [ref1, ref2]) 

531 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1) 

532 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

533 # Disassociate one and verify that we can't it there anymore... 

534 registry.disassociate(tag1, [ref1]) 

535 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=tag1)) 

536 # ...but we can still find ref2 in tag1, and ref1 in the run. 

537 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run1), ref1) 

538 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

539 collections = set(registry.queryCollections()) 

540 self.assertEqual(collections, {run1, run2, tag1}) 

541 # Associate both refs into tag1 again; ref2 is already there, but that 

542 # should be a harmless no-op. 

543 registry.associate(tag1, [ref1, ref2]) 

544 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1) 

545 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

546 # Get a different dataset (from a different run) that has the same 

547 # dataset type and data ID as ref2. 

548 ref2b = registry.findDataset(datasetType, dataId2, collections=run2) 

549 self.assertNotEqual(ref2, ref2b) 

550 # Attempting to associate that into tag1 should be an error. 

551 with self.assertRaises(ConflictingDefinitionError): 

552 registry.associate(tag1, [ref2b]) 

553 # That error shouldn't have messed up what we had before. 

554 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1) 

555 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

556 # Attempt to associate the conflicting dataset again, this time with 

557 # a dataset that isn't in the collection and won't cause a conflict. 

558 # Should also fail without modifying anything. 

559 dataId3 = {"instrument": "Cam1", "detector": 3} 

560 ref3 = registry.findDataset(datasetType, dataId3, collections=run1) 

561 with self.assertRaises(ConflictingDefinitionError): 

562 registry.associate(tag1, [ref3, ref2b]) 

563 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1) 

564 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

565 self.assertIsNone(registry.findDataset(datasetType, dataId3, collections=tag1)) 

566 # Register a chained collection that searches [tag1, run2] 

567 chain1 = "chain1" 

568 registry.registerCollection(chain1, type=CollectionType.CHAINED) 

569 self.assertIs(registry.getCollectionType(chain1), CollectionType.CHAINED) 

570 # Chained collection exists, but has no collections in it. 

571 self.assertFalse(registry.getCollectionChain(chain1)) 

572 # If we query for all collections, we should get the chained collection 

573 # only if we don't ask to flatten it (i.e. yield only its children). 

574 self.assertEqual(set(registry.queryCollections(flattenChains=False)), {tag1, run1, run2, chain1}) 

575 self.assertEqual(set(registry.queryCollections(flattenChains=True)), {tag1, run1, run2}) 

576 # Attempt to set its child collections to something circular; that 

577 # should fail. 

578 with self.assertRaises(ValueError): 

579 registry.setCollectionChain(chain1, [tag1, chain1]) 

580 # Add the child collections. 

581 registry.setCollectionChain(chain1, [tag1, run2]) 

582 self.assertEqual( 

583 list(registry.getCollectionChain(chain1)), 

584 [tag1, run2] 

585 ) 

586 # Searching for dataId1 or dataId2 in the chain should return ref1 and 

587 # ref2, because both are in tag1. 

588 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain1), ref1) 

589 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain1), ref2) 

590 # Now disassociate ref2 from tag1. The search (for bias) with 

591 # dataId2 in chain1 should then: 

592 # 1. not find it in tag1 

593 # 2. find a different dataset in run2 

594 registry.disassociate(tag1, [ref2]) 

595 ref2b = registry.findDataset(datasetType, dataId2, collections=chain1) 

596 self.assertNotEqual(ref2b, ref2) 

597 self.assertEqual(ref2b, registry.findDataset(datasetType, dataId2, collections=run2)) 

598 # Define a new chain so we can test recursive chains. 

599 chain2 = "chain2" 

600 registry.registerCollection(chain2, type=CollectionType.CHAINED) 

601 registry.setCollectionChain(chain2, [run2, chain1]) 

602 # Query for collections matching a regex. 

603 self.assertCountEqual( 

604 list(registry.queryCollections(re.compile("imported_."), flattenChains=False)), 

605 ["imported_r", "imported_g"] 

606 ) 

607 # Query for collections matching a regex or an explicit str. 

608 self.assertCountEqual( 

609 list(registry.queryCollections([re.compile("imported_."), "chain1"], flattenChains=False)), 

610 ["imported_r", "imported_g", "chain1"] 

611 ) 

612 # Search for bias with dataId1 should find it via tag1 in chain2, 

613 # recursing, because is not in run1. 

614 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=run2)) 

615 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain2), ref1) 

616 # Search for bias with dataId2 should find it in run2 (ref2b). 

617 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain2), ref2b) 

618 # Search for a flat that is in run2. That should not be found 

619 # at the front of chain2, because of the restriction to bias 

620 # on run2 there, but it should be found in at the end of chain1. 

621 dataId4 = {"instrument": "Cam1", "detector": 3, "physical_filter": "Cam1-R2"} 

622 ref4 = registry.findDataset("flat", dataId4, collections=run2) 

623 self.assertIsNotNone(ref4) 

624 self.assertEqual(ref4, registry.findDataset("flat", dataId4, collections=chain2)) 

625 # Deleting a collection that's part of a CHAINED collection is not 

626 # allowed, and is exception-safe. 

627 with self.assertRaises(Exception): 

628 registry.removeCollection(run2) 

629 self.assertEqual(registry.getCollectionType(run2), CollectionType.RUN) 

630 with self.assertRaises(Exception): 

631 registry.removeCollection(chain1) 

632 self.assertEqual(registry.getCollectionType(chain1), CollectionType.CHAINED) 

633 # Actually remove chain2, test that it's gone by asking for its type. 

634 registry.removeCollection(chain2) 

635 with self.assertRaises(MissingCollectionError): 

636 registry.getCollectionType(chain2) 

637 # Actually remove run2 and chain1, which should work now. 

638 registry.removeCollection(chain1) 

639 registry.removeCollection(run2) 

640 with self.assertRaises(MissingCollectionError): 

641 registry.getCollectionType(run2) 

642 with self.assertRaises(MissingCollectionError): 

643 registry.getCollectionType(chain1) 

644 # Remove tag1 as well, just to test that we can remove TAGGED 

645 # collections. 

646 registry.removeCollection(tag1) 

647 with self.assertRaises(MissingCollectionError): 

648 registry.getCollectionType(tag1) 

649 

650 def testCollectionChainFlatten(self): 

651 """Test that Registry.setCollectionChain obeys its 'flatten' option. 

652 """ 

653 registry = self.makeRegistry() 

654 registry.registerCollection("inner", CollectionType.CHAINED) 

655 registry.registerCollection("innermost", CollectionType.RUN) 

656 registry.setCollectionChain("inner", ["innermost"]) 

657 registry.registerCollection("outer", CollectionType.CHAINED) 

658 registry.setCollectionChain("outer", ["inner"], flatten=False) 

659 self.assertEqual(list(registry.getCollectionChain("outer")), ["inner"]) 

660 registry.setCollectionChain("outer", ["inner"], flatten=True) 

661 self.assertEqual(list(registry.getCollectionChain("outer")), ["innermost"]) 

662 

663 def testBasicTransaction(self): 

664 """Test that all operations within a single transaction block are 

665 rolled back if an exception propagates out of the block. 

666 """ 

667 registry = self.makeRegistry() 

668 storageClass = StorageClass("testDatasetType") 

669 registry.storageClasses.registerStorageClass(storageClass) 

670 with registry.transaction(): 

671 registry.insertDimensionData("instrument", {"name": "Cam1", "class_name": "A"}) 

672 with self.assertRaises(ValueError): 

673 with registry.transaction(): 

674 registry.insertDimensionData("instrument", {"name": "Cam2"}) 

675 raise ValueError("Oops, something went wrong") 

676 # Cam1 should exist 

677 self.assertEqual(registry.expandDataId(instrument="Cam1").records["instrument"].class_name, "A") 

678 # But Cam2 and Cam3 should both not exist 

679 with self.assertRaises(LookupError): 

680 registry.expandDataId(instrument="Cam2") 

681 with self.assertRaises(LookupError): 

682 registry.expandDataId(instrument="Cam3") 

683 

684 def testNestedTransaction(self): 

685 """Test that operations within a transaction block are not rolled back 

686 if an exception propagates out of an inner transaction block and is 

687 then caught. 

688 """ 

689 registry = self.makeRegistry() 

690 dimension = registry.dimensions["instrument"] 

691 dataId1 = {"instrument": "DummyCam"} 

692 dataId2 = {"instrument": "DummyCam2"} 

693 checkpointReached = False 

694 with registry.transaction(): 

695 # This should be added and (ultimately) committed. 

696 registry.insertDimensionData(dimension, dataId1) 

697 with self.assertRaises(sqlalchemy.exc.IntegrityError): 

698 with registry.transaction(savepoint=True): 

699 # This does not conflict, and should succeed (but not 

700 # be committed). 

701 registry.insertDimensionData(dimension, dataId2) 

702 checkpointReached = True 

703 # This should conflict and raise, triggerring a rollback 

704 # of the previous insertion within the same transaction 

705 # context, but not the original insertion in the outer 

706 # block. 

707 registry.insertDimensionData(dimension, dataId1) 

708 self.assertTrue(checkpointReached) 

709 self.assertIsNotNone(registry.expandDataId(dataId1, graph=dimension.graph)) 

710 with self.assertRaises(LookupError): 

711 registry.expandDataId(dataId2, graph=dimension.graph) 

712 

713 def testInstrumentDimensions(self): 

714 """Test queries involving only instrument dimensions, with no joins to 

715 skymap.""" 

716 registry = self.makeRegistry() 

717 

718 # need a bunch of dimensions and datasets for test 

719 registry.insertDimensionData( 

720 "instrument", 

721 dict(name="DummyCam", visit_max=25, exposure_max=300, detector_max=6) 

722 ) 

723 registry.insertDimensionData( 

724 "physical_filter", 

725 dict(instrument="DummyCam", name="dummy_r", band="r"), 

726 dict(instrument="DummyCam", name="dummy_i", band="i"), 

727 ) 

728 registry.insertDimensionData( 

729 "detector", 

730 *[dict(instrument="DummyCam", id=i, full_name=str(i)) for i in range(1, 6)] 

731 ) 

732 registry.insertDimensionData( 

733 "visit_system", 

734 dict(instrument="DummyCam", id=1, name="default"), 

735 ) 

736 registry.insertDimensionData( 

737 "visit", 

738 dict(instrument="DummyCam", id=10, name="ten", physical_filter="dummy_i", visit_system=1), 

739 dict(instrument="DummyCam", id=11, name="eleven", physical_filter="dummy_r", visit_system=1), 

740 dict(instrument="DummyCam", id=20, name="twelve", physical_filter="dummy_r", visit_system=1), 

741 ) 

742 registry.insertDimensionData( 

743 "exposure", 

744 dict(instrument="DummyCam", id=100, obs_id="100", physical_filter="dummy_i"), 

745 dict(instrument="DummyCam", id=101, obs_id="101", physical_filter="dummy_i"), 

746 dict(instrument="DummyCam", id=110, obs_id="110", physical_filter="dummy_r"), 

747 dict(instrument="DummyCam", id=111, obs_id="111", physical_filter="dummy_r"), 

748 dict(instrument="DummyCam", id=200, obs_id="200", physical_filter="dummy_r"), 

749 dict(instrument="DummyCam", id=201, obs_id="201", physical_filter="dummy_r"), 

750 ) 

751 registry.insertDimensionData( 

752 "visit_definition", 

753 dict(instrument="DummyCam", exposure=100, visit_system=1, visit=10), 

754 dict(instrument="DummyCam", exposure=101, visit_system=1, visit=10), 

755 dict(instrument="DummyCam", exposure=110, visit_system=1, visit=11), 

756 dict(instrument="DummyCam", exposure=111, visit_system=1, visit=11), 

757 dict(instrument="DummyCam", exposure=200, visit_system=1, visit=20), 

758 dict(instrument="DummyCam", exposure=201, visit_system=1, visit=20), 

759 ) 

760 # dataset types 

761 run1 = "test1_r" 

762 run2 = "test2_r" 

763 tagged2 = "test2_t" 

764 registry.registerRun(run1) 

765 registry.registerRun(run2) 

766 registry.registerCollection(tagged2) 

767 storageClass = StorageClass("testDataset") 

768 registry.storageClasses.registerStorageClass(storageClass) 

769 rawType = DatasetType(name="RAW", 

770 dimensions=registry.dimensions.extract(("instrument", "exposure", "detector")), 

771 storageClass=storageClass) 

772 registry.registerDatasetType(rawType) 

773 calexpType = DatasetType(name="CALEXP", 

774 dimensions=registry.dimensions.extract(("instrument", "visit", "detector")), 

775 storageClass=storageClass) 

776 registry.registerDatasetType(calexpType) 

777 

778 # add pre-existing datasets 

779 for exposure in (100, 101, 110, 111): 

780 for detector in (1, 2, 3): 

781 # note that only 3 of 5 detectors have datasets 

782 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector) 

783 ref, = registry.insertDatasets(rawType, dataIds=[dataId], run=run1) 

784 # exposures 100 and 101 appear in both run1 and tagged2. 

785 # 100 has different datasets in the different collections 

786 # 101 has the same dataset in both collections. 

787 if exposure == 100: 

788 ref, = registry.insertDatasets(rawType, dataIds=[dataId], run=run2) 

789 if exposure in (100, 101): 

790 registry.associate(tagged2, [ref]) 

791 # Add pre-existing datasets to tagged2. 

792 for exposure in (200, 201): 

793 for detector in (3, 4, 5): 

794 # note that only 3 of 5 detectors have datasets 

795 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector) 

796 ref, = registry.insertDatasets(rawType, dataIds=[dataId], run=run2) 

797 registry.associate(tagged2, [ref]) 

798 

799 dimensions = DimensionGraph( 

800 registry.dimensions, 

801 dimensions=(rawType.dimensions.required | calexpType.dimensions.required) 

802 ) 

803 # Test that single dim string works as well as list of str 

804 rows = registry.queryDataIds("visit", datasets=rawType, collections=run1).expanded().toSet() 

805 rowsI = registry.queryDataIds(["visit"], datasets=rawType, collections=run1).expanded().toSet() 

806 self.assertEqual(rows, rowsI) 

807 # with empty expression 

808 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1).expanded().toSet() 

809 self.assertEqual(len(rows), 4*3) # 4 exposures times 3 detectors 

810 for dataId in rows: 

811 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit")) 

812 packer1 = registry.dimensions.makePacker("visit_detector", dataId) 

813 packer2 = registry.dimensions.makePacker("exposure_detector", dataId) 

814 self.assertEqual(packer1.unpack(packer1.pack(dataId)), 

815 DataCoordinate.standardize(dataId, graph=packer1.dimensions)) 

816 self.assertEqual(packer2.unpack(packer2.pack(dataId)), 

817 DataCoordinate.standardize(dataId, graph=packer2.dimensions)) 

818 self.assertNotEqual(packer1.pack(dataId), packer2.pack(dataId)) 

819 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), 

820 (100, 101, 110, 111)) 

821 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 11)) 

822 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3)) 

823 

824 # second collection 

825 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=tagged2).toSet() 

826 self.assertEqual(len(rows), 4*3) # 4 exposures times 3 detectors 

827 for dataId in rows: 

828 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit")) 

829 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), 

830 (100, 101, 200, 201)) 

831 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 20)) 

832 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3, 4, 5)) 

833 

834 # with two input datasets 

835 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=[run1, tagged2]).toSet() 

836 self.assertEqual(len(set(rows)), 6*3) # 6 exposures times 3 detectors; set needed to de-dupe 

837 for dataId in rows: 

838 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit")) 

839 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), 

840 (100, 101, 110, 111, 200, 201)) 

841 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 11, 20)) 

842 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3, 4, 5)) 

843 

844 # limit to single visit 

845 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1, 

846 where="visit = 10", instrument="DummyCam").toSet() 

847 self.assertEqual(len(rows), 2*3) # 2 exposures times 3 detectors 

848 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101)) 

849 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10,)) 

850 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3)) 

851 

852 # more limiting expression, using link names instead of Table.column 

853 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1, 

854 where="visit = 10 and detector > 1 and 'DummyCam'=instrument").toSet() 

855 self.assertEqual(len(rows), 2*2) # 2 exposures times 2 detectors 

856 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101)) 

857 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10,)) 

858 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (2, 3)) 

859 

860 # expression excludes everything 

861 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1, 

862 where="visit > 1000", instrument="DummyCam").toSet() 

863 self.assertEqual(len(rows), 0) 

864 

865 # Selecting by physical_filter, this is not in the dimensions, but it 

866 # is a part of the full expression so it should work too. 

867 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1, 

868 where="physical_filter = 'dummy_r'", instrument="DummyCam").toSet() 

869 self.assertEqual(len(rows), 2*3) # 2 exposures times 3 detectors 

870 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (110, 111)) 

871 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (11,)) 

872 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3)) 

873 

874 def testSkyMapDimensions(self): 

875 """Tests involving only skymap dimensions, no joins to instrument.""" 

876 registry = self.makeRegistry() 

877 

878 # need a bunch of dimensions and datasets for test, we want 

879 # "band" in the test so also have to add physical_filter 

880 # dimensions 

881 registry.insertDimensionData( 

882 "instrument", 

883 dict(instrument="DummyCam") 

884 ) 

885 registry.insertDimensionData( 

886 "physical_filter", 

887 dict(instrument="DummyCam", name="dummy_r", band="r"), 

888 dict(instrument="DummyCam", name="dummy_i", band="i"), 

889 ) 

890 registry.insertDimensionData( 

891 "skymap", 

892 dict(name="DummyMap", hash="sha!".encode("utf8")) 

893 ) 

894 for tract in range(10): 

895 registry.insertDimensionData("tract", dict(skymap="DummyMap", id=tract)) 

896 registry.insertDimensionData( 

897 "patch", 

898 *[dict(skymap="DummyMap", tract=tract, id=patch, cell_x=0, cell_y=0) 

899 for patch in range(10)] 

900 ) 

901 

902 # dataset types 

903 run = "test" 

904 registry.registerRun(run) 

905 storageClass = StorageClass("testDataset") 

906 registry.storageClasses.registerStorageClass(storageClass) 

907 calexpType = DatasetType(name="deepCoadd_calexp", 

908 dimensions=registry.dimensions.extract(("skymap", "tract", "patch", 

909 "band")), 

910 storageClass=storageClass) 

911 registry.registerDatasetType(calexpType) 

912 mergeType = DatasetType(name="deepCoadd_mergeDet", 

913 dimensions=registry.dimensions.extract(("skymap", "tract", "patch")), 

914 storageClass=storageClass) 

915 registry.registerDatasetType(mergeType) 

916 measType = DatasetType(name="deepCoadd_meas", 

917 dimensions=registry.dimensions.extract(("skymap", "tract", "patch", 

918 "band")), 

919 storageClass=storageClass) 

920 registry.registerDatasetType(measType) 

921 

922 dimensions = DimensionGraph( 

923 registry.dimensions, 

924 dimensions=(calexpType.dimensions.required | mergeType.dimensions.required 

925 | measType.dimensions.required) 

926 ) 

927 

928 # add pre-existing datasets 

929 for tract in (1, 3, 5): 

930 for patch in (2, 4, 6, 7): 

931 dataId = dict(skymap="DummyMap", tract=tract, patch=patch) 

932 registry.insertDatasets(mergeType, dataIds=[dataId], run=run) 

933 for aFilter in ("i", "r"): 

934 dataId = dict(skymap="DummyMap", tract=tract, patch=patch, band=aFilter) 

935 registry.insertDatasets(calexpType, dataIds=[dataId], run=run) 

936 

937 # with empty expression 

938 rows = registry.queryDataIds(dimensions, 

939 datasets=[calexpType, mergeType], collections=run).toSet() 

940 self.assertEqual(len(rows), 3*4*2) # 4 tracts x 4 patches x 2 filters 

941 for dataId in rows: 

942 self.assertCountEqual(dataId.keys(), ("skymap", "tract", "patch", "band")) 

943 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 3, 5)) 

944 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 4, 6, 7)) 

945 self.assertCountEqual(set(dataId["band"] for dataId in rows), ("i", "r")) 

946 

947 # limit to 2 tracts and 2 patches 

948 rows = registry.queryDataIds(dimensions, 

949 datasets=[calexpType, mergeType], collections=run, 

950 where="tract IN (1, 5) AND patch IN (2, 7)", skymap="DummyMap").toSet() 

951 self.assertEqual(len(rows), 2*2*2) # 2 tracts x 2 patches x 2 filters 

952 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 5)) 

953 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 7)) 

954 self.assertCountEqual(set(dataId["band"] for dataId in rows), ("i", "r")) 

955 

956 # limit to single filter 

957 rows = registry.queryDataIds(dimensions, 

958 datasets=[calexpType, mergeType], collections=run, 

959 where="band = 'i'").toSet() 

960 self.assertEqual(len(rows), 3*4*1) # 4 tracts x 4 patches x 2 filters 

961 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 3, 5)) 

962 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 4, 6, 7)) 

963 self.assertCountEqual(set(dataId["band"] for dataId in rows), ("i",)) 

964 

965 # expression excludes everything, specifying non-existing skymap is 

966 # not a fatal error, it's operator error 

967 rows = registry.queryDataIds(dimensions, 

968 datasets=[calexpType, mergeType], collections=run, 

969 where="skymap = 'Mars'").toSet() 

970 self.assertEqual(len(rows), 0) 

971 

972 def testSpatialJoin(self): 

973 """Test queries that involve spatial overlap joins. 

974 """ 

975 registry = self.makeRegistry() 

976 self.loadData(registry, "hsc-rc2-subset.yaml") 

977 

978 # Dictionary of spatial DatabaseDimensionElements, keyed by the name of 

979 # the TopologicalFamily they belong to. We'll relate all elements in 

980 # each family to all of the elements in each other family. 

981 families = defaultdict(set) 

982 # Dictionary of {element.name: {dataId: region}}. 

983 regions = {} 

984 for element in registry.dimensions.getDatabaseElements(): 

985 if element.spatial is not None: 

986 families[element.spatial.name].add(element) 

987 regions[element.name] = { 

988 record.dataId: record.region for record in registry.queryDimensionRecords(element) 

989 } 

990 

991 # If this check fails, it's not necessarily a problem - it may just be 

992 # a reasonable change to the default dimension definitions - but the 

993 # test below depends on there being more than one family to do anything 

994 # useful. 

995 self.assertEqual(len(families), 2) 

996 

997 # Overlap DatabaseDimensionElements with each other. 

998 for family1, family2 in itertools.combinations(families, 2): 

999 for element1, element2 in itertools.product(families[family1], families[family2]): 

1000 graph = DimensionGraph.union(element1.graph, element2.graph) 

1001 # Construct expected set of overlapping data IDs via a 

1002 # brute-force comparison of the regions we've already fetched. 

1003 expected = { 

1004 DataCoordinate.standardize( 

1005 {**dataId1.byName(), **dataId2.byName()}, 

1006 graph=graph 

1007 ) 

1008 for (dataId1, region1), (dataId2, region2) 

1009 in itertools.product(regions[element1.name].items(), regions[element2.name].items()) 

1010 if not region1.isDisjointFrom(region2) 

1011 } 

1012 self.assertGreater(len(expected), 2, msg="Test that we aren't just comparing empty sets.") 

1013 queried = set(registry.queryDataIds(graph)) 

1014 self.assertEqual(expected, queried) 

1015 

1016 # Overlap each DatabaseDimensionElement with the commonSkyPix system. 

1017 commonSkyPix = registry.dimensions.commonSkyPix 

1018 for elementName, regions in regions.items(): 

1019 graph = DimensionGraph.union(registry.dimensions[elementName].graph, commonSkyPix.graph) 

1020 expected = set() 

1021 for dataId, region in regions.items(): 

1022 for begin, end in commonSkyPix.pixelization.envelope(region): 

1023 expected.update( 

1024 DataCoordinate.standardize( 

1025 {commonSkyPix.name: index, **dataId.byName()}, 

1026 graph=graph 

1027 ) 

1028 for index in range(begin, end) 

1029 ) 

1030 self.assertGreater(len(expected), 2, msg="Test that we aren't just comparing empty sets.") 

1031 queried = set(registry.queryDataIds(graph)) 

1032 self.assertEqual(expected, queried) 

1033 

1034 def testAbstractQuery(self): 

1035 """Test that we can run a query that just lists the known 

1036 bands. This is tricky because band is 

1037 backed by a query against physical_filter. 

1038 """ 

1039 registry = self.makeRegistry() 

1040 registry.insertDimensionData("instrument", dict(name="DummyCam")) 

1041 registry.insertDimensionData( 

1042 "physical_filter", 

1043 dict(instrument="DummyCam", name="dummy_i", band="i"), 

1044 dict(instrument="DummyCam", name="dummy_i2", band="i"), 

1045 dict(instrument="DummyCam", name="dummy_r", band="r"), 

1046 ) 

1047 rows = registry.queryDataIds(["band"]).toSet() 

1048 self.assertCountEqual( 

1049 rows, 

1050 [DataCoordinate.standardize(band="i", universe=registry.dimensions), 

1051 DataCoordinate.standardize(band="r", universe=registry.dimensions)] 

1052 ) 

1053 

1054 def testAttributeManager(self): 

1055 """Test basic functionality of attribute manager. 

1056 """ 

1057 # number of attributes with schema versions in a fresh database, 

1058 # 6 managers with 3 records per manager, plus config for dimensions 

1059 VERSION_COUNT = 6 * 3 + 1 

1060 

1061 registry = self.makeRegistry() 

1062 attributes = registry._managers.attributes 

1063 

1064 # check what get() returns for non-existing key 

1065 self.assertIsNone(attributes.get("attr")) 

1066 self.assertEqual(attributes.get("attr", ""), "") 

1067 self.assertEqual(attributes.get("attr", "Value"), "Value") 

1068 self.assertEqual(len(list(attributes.items())), VERSION_COUNT) 

1069 

1070 # cannot store empty key or value 

1071 with self.assertRaises(ValueError): 

1072 attributes.set("", "value") 

1073 with self.assertRaises(ValueError): 

1074 attributes.set("attr", "") 

1075 

1076 # set value of non-existing key 

1077 attributes.set("attr", "value") 

1078 self.assertEqual(len(list(attributes.items())), VERSION_COUNT + 1) 

1079 self.assertEqual(attributes.get("attr"), "value") 

1080 

1081 # update value of existing key 

1082 with self.assertRaises(ButlerAttributeExistsError): 

1083 attributes.set("attr", "value2") 

1084 

1085 attributes.set("attr", "value2", force=True) 

1086 self.assertEqual(len(list(attributes.items())), VERSION_COUNT + 1) 

1087 self.assertEqual(attributes.get("attr"), "value2") 

1088 

1089 # delete existing key 

1090 self.assertTrue(attributes.delete("attr")) 

1091 self.assertEqual(len(list(attributes.items())), VERSION_COUNT) 

1092 

1093 # delete non-existing key 

1094 self.assertFalse(attributes.delete("non-attr")) 

1095 

1096 # store bunch of keys and get the list back 

1097 data = [ 

1098 ("version.core", "1.2.3"), 

1099 ("version.dimensions", "3.2.1"), 

1100 ("config.managers.opaque", "ByNameOpaqueTableStorageManager"), 

1101 ] 

1102 for key, value in data: 

1103 attributes.set(key, value) 

1104 items = dict(attributes.items()) 

1105 for key, value in data: 

1106 self.assertEqual(items[key], value) 

1107 

1108 def testQueryDatasetsDeduplication(self): 

1109 """Test that the findFirst option to queryDatasets selects datasets 

1110 from collections in the order given". 

1111 """ 

1112 registry = self.makeRegistry() 

1113 self.loadData(registry, "base.yaml") 

1114 self.loadData(registry, "datasets.yaml") 

1115 self.assertCountEqual( 

1116 list(registry.queryDatasets("bias", collections=["imported_g", "imported_r"])), 

1117 [ 

1118 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"), 

1119 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g"), 

1120 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g"), 

1121 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r"), 

1122 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r"), 

1123 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

1124 ] 

1125 ) 

1126 self.assertCountEqual( 

1127 list(registry.queryDatasets("bias", collections=["imported_g", "imported_r"], 

1128 findFirst=True)), 

1129 [ 

1130 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"), 

1131 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g"), 

1132 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g"), 

1133 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

1134 ] 

1135 ) 

1136 self.assertCountEqual( 

1137 list(registry.queryDatasets("bias", collections=["imported_r", "imported_g"], 

1138 findFirst=True)), 

1139 [ 

1140 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"), 

1141 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r"), 

1142 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r"), 

1143 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

1144 ] 

1145 ) 

1146 

1147 def testQueryResults(self): 

1148 """Test querying for data IDs and then manipulating the QueryResults 

1149 object returned to perform other queries. 

1150 """ 

1151 registry = self.makeRegistry() 

1152 self.loadData(registry, "base.yaml") 

1153 self.loadData(registry, "datasets.yaml") 

1154 bias = registry.getDatasetType("bias") 

1155 flat = registry.getDatasetType("flat") 

1156 # Obtain expected results from methods other than those we're testing 

1157 # here. That includes: 

1158 # - the dimensions of the data IDs we want to query: 

1159 expectedGraph = DimensionGraph(registry.dimensions, names=["detector", "physical_filter"]) 

1160 # - the dimensions of some other data IDs we'll extract from that: 

1161 expectedSubsetGraph = DimensionGraph(registry.dimensions, names=["detector"]) 

1162 # - the data IDs we expect to obtain from the first queries: 

1163 expectedDataIds = DataCoordinateSet( 

1164 { 

1165 DataCoordinate.standardize(instrument="Cam1", detector=d, physical_filter=p, 

1166 universe=registry.dimensions) 

1167 for d, p in itertools.product({1, 2, 3}, {"Cam1-G", "Cam1-R1", "Cam1-R2"}) 

1168 }, 

1169 graph=expectedGraph, 

1170 hasFull=False, 

1171 hasRecords=False, 

1172 ) 

1173 # - the flat datasets we expect to find from those data IDs, in just 

1174 # one collection (so deduplication is irrelevant): 

1175 expectedFlats = [ 

1176 registry.findDataset(flat, instrument="Cam1", detector=1, physical_filter="Cam1-R1", 

1177 collections="imported_r"), 

1178 registry.findDataset(flat, instrument="Cam1", detector=2, physical_filter="Cam1-R1", 

1179 collections="imported_r"), 

1180 registry.findDataset(flat, instrument="Cam1", detector=3, physical_filter="Cam1-R2", 

1181 collections="imported_r"), 

1182 ] 

1183 # - the data IDs we expect to extract from that: 

1184 expectedSubsetDataIds = expectedDataIds.subset(expectedSubsetGraph) 

1185 # - the bias datasets we expect to find from those data IDs, after we 

1186 # subset-out the physical_filter dimension, both with duplicates: 

1187 expectedAllBiases = [ 

1188 registry.findDataset(bias, instrument="Cam1", detector=1, collections="imported_g"), 

1189 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_g"), 

1190 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_g"), 

1191 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_r"), 

1192 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_r"), 

1193 ] 

1194 # - ...and without duplicates: 

1195 expectedDeduplicatedBiases = [ 

1196 registry.findDataset(bias, instrument="Cam1", detector=1, collections="imported_g"), 

1197 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_r"), 

1198 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_r"), 

1199 ] 

1200 # Test against those expected results, using a "lazy" query for the 

1201 # data IDs (which re-executes that query each time we use it to do 

1202 # something new). 

1203 dataIds = registry.queryDataIds( 

1204 ["detector", "physical_filter"], 

1205 where="detector.purpose = 'SCIENCE'", # this rejects detector=4 

1206 instrument="Cam1", 

1207 ) 

1208 self.assertEqual(dataIds.graph, expectedGraph) 

1209 self.assertEqual(dataIds.toSet(), expectedDataIds) 

1210 self.assertCountEqual( 

1211 list( 

1212 dataIds.findDatasets( 

1213 flat, 

1214 collections=["imported_r"], 

1215 ) 

1216 ), 

1217 expectedFlats, 

1218 ) 

1219 subsetDataIds = dataIds.subset(expectedSubsetGraph, unique=True) 

1220 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph) 

1221 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds) 

1222 self.assertCountEqual( 

1223 list( 

1224 subsetDataIds.findDatasets( 

1225 bias, 

1226 collections=["imported_r", "imported_g"], 

1227 findFirst=False 

1228 ) 

1229 ), 

1230 expectedAllBiases 

1231 ) 

1232 self.assertCountEqual( 

1233 list( 

1234 subsetDataIds.findDatasets( 

1235 bias, 

1236 collections=["imported_r", "imported_g"], 

1237 findFirst=True 

1238 ) 

1239 ), expectedDeduplicatedBiases 

1240 ) 

1241 # Materialize the bias dataset queries (only) by putting the results 

1242 # into temporary tables, then repeat those tests. 

1243 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], 

1244 findFirst=False).materialize() as biases: 

1245 self.assertCountEqual(list(biases), expectedAllBiases) 

1246 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], 

1247 findFirst=True).materialize() as biases: 

1248 self.assertCountEqual(list(biases), expectedDeduplicatedBiases) 

1249 # Materialize the data ID subset query, but not the dataset queries. 

1250 with subsetDataIds.materialize() as subsetDataIds: 

1251 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph) 

1252 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds) 

1253 self.assertCountEqual( 

1254 list( 

1255 subsetDataIds.findDatasets( 

1256 bias, 

1257 collections=["imported_r", "imported_g"], 

1258 findFirst=False 

1259 ) 

1260 ), 

1261 expectedAllBiases 

1262 ) 

1263 self.assertCountEqual( 

1264 list( 

1265 subsetDataIds.findDatasets( 

1266 bias, 

1267 collections=["imported_r", "imported_g"], 

1268 findFirst=True 

1269 ) 

1270 ), expectedDeduplicatedBiases 

1271 ) 

1272 # Materialize the dataset queries, too. 

1273 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], 

1274 findFirst=False).materialize() as biases: 

1275 self.assertCountEqual(list(biases), expectedAllBiases) 

1276 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], 

1277 findFirst=True).materialize() as biases: 

1278 self.assertCountEqual(list(biases), expectedDeduplicatedBiases) 

1279 # Materialize the original query, but none of the follow-up queries. 

1280 with dataIds.materialize() as dataIds: 

1281 self.assertEqual(dataIds.graph, expectedGraph) 

1282 self.assertEqual(dataIds.toSet(), expectedDataIds) 

1283 self.assertCountEqual( 

1284 list( 

1285 dataIds.findDatasets( 

1286 flat, 

1287 collections=["imported_r"], 

1288 ) 

1289 ), 

1290 expectedFlats, 

1291 ) 

1292 subsetDataIds = dataIds.subset(expectedSubsetGraph, unique=True) 

1293 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph) 

1294 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds) 

1295 self.assertCountEqual( 

1296 list( 

1297 subsetDataIds.findDatasets( 

1298 bias, 

1299 collections=["imported_r", "imported_g"], 

1300 findFirst=False 

1301 ) 

1302 ), 

1303 expectedAllBiases 

1304 ) 

1305 self.assertCountEqual( 

1306 list( 

1307 subsetDataIds.findDatasets( 

1308 bias, 

1309 collections=["imported_r", "imported_g"], 

1310 findFirst=True 

1311 ) 

1312 ), expectedDeduplicatedBiases 

1313 ) 

1314 # Materialize just the bias dataset queries. 

1315 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], 

1316 findFirst=False).materialize() as biases: 

1317 self.assertCountEqual(list(biases), expectedAllBiases) 

1318 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], 

1319 findFirst=True).materialize() as biases: 

1320 self.assertCountEqual(list(biases), expectedDeduplicatedBiases) 

1321 # Materialize the subset data ID query, but not the dataset 

1322 # queries. 

1323 with subsetDataIds.materialize() as subsetDataIds: 

1324 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph) 

1325 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds) 

1326 self.assertCountEqual( 

1327 list( 

1328 subsetDataIds.findDatasets( 

1329 bias, 

1330 collections=["imported_r", "imported_g"], 

1331 findFirst=False 

1332 ) 

1333 ), 

1334 expectedAllBiases 

1335 ) 

1336 self.assertCountEqual( 

1337 list( 

1338 subsetDataIds.findDatasets( 

1339 bias, 

1340 collections=["imported_r", "imported_g"], 

1341 findFirst=True 

1342 ) 

1343 ), expectedDeduplicatedBiases 

1344 ) 

1345 # Materialize the bias dataset queries, too, so now we're 

1346 # materializing every single step. 

1347 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], 

1348 findFirst=False).materialize() as biases: 

1349 self.assertCountEqual(list(biases), expectedAllBiases) 

1350 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], 

1351 findFirst=True).materialize() as biases: 

1352 self.assertCountEqual(list(biases), expectedDeduplicatedBiases) 

1353 

1354 def testEmptyDimensionsQueries(self): 

1355 """Test Query and QueryResults objects in the case where there are no 

1356 dimensions. 

1357 """ 

1358 # Set up test data: one dataset type, two runs, one dataset in each. 

1359 registry = self.makeRegistry() 

1360 self.loadData(registry, "base.yaml") 

1361 schema = DatasetType("schema", dimensions=registry.dimensions.empty, storageClass="Catalog") 

1362 registry.registerDatasetType(schema) 

1363 dataId = DataCoordinate.makeEmpty(registry.dimensions) 

1364 run1 = "run1" 

1365 run2 = "run2" 

1366 registry.registerRun(run1) 

1367 registry.registerRun(run2) 

1368 (dataset1,) = registry.insertDatasets(schema, dataIds=[dataId], run=run1) 

1369 (dataset2,) = registry.insertDatasets(schema, dataIds=[dataId], run=run2) 

1370 # Query directly for both of the datasets, and each one, one at a time. 

1371 self.assertCountEqual( 

1372 list(registry.queryDatasets(schema, collections=[run1, run2], findFirst=False)), 

1373 [dataset1, dataset2] 

1374 ) 

1375 self.assertEqual( 

1376 list(registry.queryDatasets(schema, collections=[run1, run2], findFirst=True)), 

1377 [dataset1], 

1378 ) 

1379 self.assertEqual( 

1380 list(registry.queryDatasets(schema, collections=[run2, run1], findFirst=True)), 

1381 [dataset2], 

1382 ) 

1383 # Query for data IDs with no dimensions. 

1384 dataIds = registry.queryDataIds([]) 

1385 self.assertEqual( 

1386 dataIds.toSequence(), 

1387 DataCoordinateSequence([dataId], registry.dimensions.empty) 

1388 ) 

1389 # Use queried data IDs to find the datasets. 

1390 self.assertCountEqual( 

1391 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False)), 

1392 [dataset1, dataset2], 

1393 ) 

1394 self.assertEqual( 

1395 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True)), 

1396 [dataset1], 

1397 ) 

1398 self.assertEqual( 

1399 list(dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True)), 

1400 [dataset2], 

1401 ) 

1402 # Now materialize the data ID query results and repeat those tests. 

1403 with dataIds.materialize() as dataIds: 

1404 self.assertEqual( 

1405 dataIds.toSequence(), 

1406 DataCoordinateSequence([dataId], registry.dimensions.empty) 

1407 ) 

1408 self.assertCountEqual( 

1409 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False)), 

1410 [dataset1, dataset2], 

1411 ) 

1412 self.assertEqual( 

1413 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True)), 

1414 [dataset1], 

1415 ) 

1416 self.assertEqual( 

1417 list(dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True)), 

1418 [dataset2], 

1419 ) 

1420 # Query for non-empty data IDs, then subset that to get the empty one. 

1421 # Repeat the above tests starting from that. 

1422 dataIds = registry.queryDataIds(["instrument"]).subset(registry.dimensions.empty, unique=True) 

1423 self.assertEqual( 

1424 dataIds.toSequence(), 

1425 DataCoordinateSequence([dataId], registry.dimensions.empty) 

1426 ) 

1427 self.assertCountEqual( 

1428 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False)), 

1429 [dataset1, dataset2], 

1430 ) 

1431 self.assertEqual( 

1432 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True)), 

1433 [dataset1], 

1434 ) 

1435 self.assertEqual( 

1436 list(dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True)), 

1437 [dataset2], 

1438 ) 

1439 with dataIds.materialize() as dataIds: 

1440 self.assertEqual( 

1441 dataIds.toSequence(), 

1442 DataCoordinateSequence([dataId], registry.dimensions.empty) 

1443 ) 

1444 self.assertCountEqual( 

1445 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False)), 

1446 [dataset1, dataset2], 

1447 ) 

1448 self.assertEqual( 

1449 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True)), 

1450 [dataset1], 

1451 ) 

1452 self.assertEqual( 

1453 list(dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True)), 

1454 [dataset2], 

1455 ) 

1456 # Query for non-empty data IDs, then materialize, then subset to get 

1457 # the empty one. Repeat again. 

1458 with registry.queryDataIds(["instrument"]).materialize() as nonEmptyDataIds: 

1459 dataIds = nonEmptyDataIds.subset(registry.dimensions.empty, unique=True) 

1460 self.assertEqual( 

1461 dataIds.toSequence(), 

1462 DataCoordinateSequence([dataId], registry.dimensions.empty) 

1463 ) 

1464 self.assertCountEqual( 

1465 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False)), 

1466 [dataset1, dataset2], 

1467 ) 

1468 self.assertEqual( 

1469 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True)), 

1470 [dataset1], 

1471 ) 

1472 self.assertEqual( 

1473 list(dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True)), 

1474 [dataset2], 

1475 ) 

1476 with dataIds.materialize() as dataIds: 

1477 self.assertEqual( 

1478 dataIds.toSequence(), 

1479 DataCoordinateSequence([dataId], registry.dimensions.empty) 

1480 ) 

1481 self.assertCountEqual( 

1482 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False)), 

1483 [dataset1, dataset2], 

1484 ) 

1485 self.assertEqual( 

1486 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True)), 

1487 [dataset1], 

1488 ) 

1489 self.assertEqual( 

1490 list(dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True)), 

1491 [dataset2], 

1492 ) 

1493 

1494 def testCalibrationCollections(self): 

1495 """Test operations on `~CollectionType.CALIBRATION` collections, 

1496 including `Registry.certify`, `Registry.decertify`, and 

1497 `Registry.findDataset`. 

1498 """ 

1499 # Setup - make a Registry, fill it with some datasets in 

1500 # non-calibration collections. 

1501 registry = self.makeRegistry() 

1502 self.loadData(registry, "base.yaml") 

1503 self.loadData(registry, "datasets.yaml") 

1504 # Set up some timestamps. 

1505 t1 = astropy.time.Time('2020-01-01T01:00:00', format="isot", scale="tai") 

1506 t2 = astropy.time.Time('2020-01-01T02:00:00', format="isot", scale="tai") 

1507 t3 = astropy.time.Time('2020-01-01T03:00:00', format="isot", scale="tai") 

1508 t4 = astropy.time.Time('2020-01-01T04:00:00', format="isot", scale="tai") 

1509 t5 = astropy.time.Time('2020-01-01T05:00:00', format="isot", scale="tai") 

1510 allTimespans = [ 

1511 Timespan(a, b) for a, b in itertools.combinations([None, t1, t2, t3, t4, t5, None], r=2) 

1512 ] 

1513 # Get references to some datasets. 

1514 bias2a = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g") 

1515 bias3a = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g") 

1516 bias2b = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r") 

1517 bias3b = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r") 

1518 # Register the main calibration collection we'll be working with. 

1519 collection = "Cam1/calibs/default" 

1520 registry.registerCollection(collection, type=CollectionType.CALIBRATION) 

1521 # Cannot associate into a calibration collection (no timespan). 

1522 with self.assertRaises(TypeError): 

1523 registry.associate(collection, [bias2a]) 

1524 # Certify 2a dataset with [t2, t4) validity. 

1525 registry.certify(collection, [bias2a], Timespan(begin=t2, end=t4)) 

1526 # We should not be able to certify 2b with anything overlapping that 

1527 # window. 

1528 with self.assertRaises(ConflictingDefinitionError): 

1529 registry.certify(collection, [bias2b], Timespan(begin=None, end=t3)) 

1530 with self.assertRaises(ConflictingDefinitionError): 

1531 registry.certify(collection, [bias2b], Timespan(begin=None, end=t5)) 

1532 with self.assertRaises(ConflictingDefinitionError): 

1533 registry.certify(collection, [bias2b], Timespan(begin=t1, end=t3)) 

1534 with self.assertRaises(ConflictingDefinitionError): 

1535 registry.certify(collection, [bias2b], Timespan(begin=t1, end=t5)) 

1536 with self.assertRaises(ConflictingDefinitionError): 

1537 registry.certify(collection, [bias2b], Timespan(begin=t1, end=None)) 

1538 with self.assertRaises(ConflictingDefinitionError): 

1539 registry.certify(collection, [bias2b], Timespan(begin=t2, end=t3)) 

1540 with self.assertRaises(ConflictingDefinitionError): 

1541 registry.certify(collection, [bias2b], Timespan(begin=t2, end=t5)) 

1542 with self.assertRaises(ConflictingDefinitionError): 

1543 registry.certify(collection, [bias2b], Timespan(begin=t2, end=None)) 

1544 # We should be able to certify 3a with a range overlapping that window, 

1545 # because it's for a different detector. 

1546 # We'll certify 3a over [t1, t3). 

1547 registry.certify(collection, [bias3a], Timespan(begin=t1, end=t3)) 

1548 # Now we'll certify 2b and 3b together over [t4, ∞). 

1549 registry.certify(collection, [bias2b, bias3b], Timespan(begin=t4, end=None)) 

1550 

1551 # Fetch all associations and check that they are what we expect. 

1552 self.assertCountEqual( 

1553 list( 

1554 registry.queryDatasetAssociations( 

1555 "bias", 

1556 collections=[collection, "imported_g", "imported_r"], 

1557 ) 

1558 ), 

1559 [ 

1560 DatasetAssociation( 

1561 ref=registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"), 

1562 collection="imported_g", 

1563 timespan=None, 

1564 ), 

1565 DatasetAssociation( 

1566 ref=registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

1567 collection="imported_r", 

1568 timespan=None, 

1569 ), 

1570 DatasetAssociation(ref=bias2a, collection="imported_g", timespan=None), 

1571 DatasetAssociation(ref=bias3a, collection="imported_g", timespan=None), 

1572 DatasetAssociation(ref=bias2b, collection="imported_r", timespan=None), 

1573 DatasetAssociation(ref=bias3b, collection="imported_r", timespan=None), 

1574 DatasetAssociation(ref=bias2a, collection=collection, timespan=Timespan(begin=t2, end=t4)), 

1575 DatasetAssociation(ref=bias3a, collection=collection, timespan=Timespan(begin=t1, end=t3)), 

1576 DatasetAssociation(ref=bias2b, collection=collection, timespan=Timespan(begin=t4, end=None)), 

1577 DatasetAssociation(ref=bias3b, collection=collection, timespan=Timespan(begin=t4, end=None)), 

1578 ] 

1579 ) 

1580 

1581 class Ambiguous: 

1582 """Tag class to denote lookups that are expected to be ambiguous. 

1583 """ 

1584 pass 

1585 

1586 def assertLookup(detector: int, timespan: Timespan, 

1587 expected: Optional[Union[DatasetRef, Type[Ambiguous]]]) -> None: 

1588 """Local function that asserts that a bias lookup returns the given 

1589 expected result. 

1590 """ 

1591 if expected is Ambiguous: 

1592 with self.assertRaises(RuntimeError): 

1593 registry.findDataset("bias", collections=collection, instrument="Cam1", 

1594 detector=detector, timespan=timespan) 

1595 else: 

1596 self.assertEqual( 

1597 expected, 

1598 registry.findDataset("bias", collections=collection, instrument="Cam1", 

1599 detector=detector, timespan=timespan) 

1600 ) 

1601 

1602 # Systematically test lookups against expected results. 

1603 assertLookup(detector=2, timespan=Timespan(None, t1), expected=None) 

1604 assertLookup(detector=2, timespan=Timespan(None, t2), expected=None) 

1605 assertLookup(detector=2, timespan=Timespan(None, t3), expected=bias2a) 

1606 assertLookup(detector=2, timespan=Timespan(None, t4), expected=bias2a) 

1607 assertLookup(detector=2, timespan=Timespan(None, t5), expected=Ambiguous) 

1608 assertLookup(detector=2, timespan=Timespan(None, None), expected=Ambiguous) 

1609 assertLookup(detector=2, timespan=Timespan(t1, t2), expected=None) 

1610 assertLookup(detector=2, timespan=Timespan(t1, t3), expected=bias2a) 

1611 assertLookup(detector=2, timespan=Timespan(t1, t4), expected=bias2a) 

1612 assertLookup(detector=2, timespan=Timespan(t1, t5), expected=Ambiguous) 

1613 assertLookup(detector=2, timespan=Timespan(t1, None), expected=Ambiguous) 

1614 assertLookup(detector=2, timespan=Timespan(t2, t3), expected=bias2a) 

1615 assertLookup(detector=2, timespan=Timespan(t2, t4), expected=bias2a) 

1616 assertLookup(detector=2, timespan=Timespan(t2, t5), expected=Ambiguous) 

1617 assertLookup(detector=2, timespan=Timespan(t2, None), expected=Ambiguous) 

1618 assertLookup(detector=2, timespan=Timespan(t3, t4), expected=bias2a) 

1619 assertLookup(detector=2, timespan=Timespan(t3, t5), expected=Ambiguous) 

1620 assertLookup(detector=2, timespan=Timespan(t3, None), expected=Ambiguous) 

1621 assertLookup(detector=2, timespan=Timespan(t4, t5), expected=bias2b) 

1622 assertLookup(detector=2, timespan=Timespan(t4, None), expected=bias2b) 

1623 assertLookup(detector=2, timespan=Timespan(t5, None), expected=bias2b) 

1624 assertLookup(detector=3, timespan=Timespan(None, t1), expected=None) 

1625 assertLookup(detector=3, timespan=Timespan(None, t2), expected=bias3a) 

1626 assertLookup(detector=3, timespan=Timespan(None, t3), expected=bias3a) 

1627 assertLookup(detector=3, timespan=Timespan(None, t4), expected=bias3a) 

1628 assertLookup(detector=3, timespan=Timespan(None, t5), expected=Ambiguous) 

1629 assertLookup(detector=3, timespan=Timespan(None, None), expected=Ambiguous) 

1630 assertLookup(detector=3, timespan=Timespan(t1, t2), expected=bias3a) 

1631 assertLookup(detector=3, timespan=Timespan(t1, t3), expected=bias3a) 

1632 assertLookup(detector=3, timespan=Timespan(t1, t4), expected=bias3a) 

1633 assertLookup(detector=3, timespan=Timespan(t1, t5), expected=Ambiguous) 

1634 assertLookup(detector=3, timespan=Timespan(t1, None), expected=Ambiguous) 

1635 assertLookup(detector=3, timespan=Timespan(t2, t3), expected=bias3a) 

1636 assertLookup(detector=3, timespan=Timespan(t2, t4), expected=bias3a) 

1637 assertLookup(detector=3, timespan=Timespan(t2, t5), expected=Ambiguous) 

1638 assertLookup(detector=3, timespan=Timespan(t2, None), expected=Ambiguous) 

1639 assertLookup(detector=3, timespan=Timespan(t3, t4), expected=None) 

1640 assertLookup(detector=3, timespan=Timespan(t3, t5), expected=bias3b) 

1641 assertLookup(detector=3, timespan=Timespan(t3, None), expected=bias3b) 

1642 assertLookup(detector=3, timespan=Timespan(t4, t5), expected=bias3b) 

1643 assertLookup(detector=3, timespan=Timespan(t4, None), expected=bias3b) 

1644 assertLookup(detector=3, timespan=Timespan(t5, None), expected=bias3b) 

1645 

1646 # Decertify [t3, t5) for all data IDs, and do test lookups again. 

1647 # This should truncate bias2a to [t2, t3), leave bias3a unchanged at 

1648 # [t1, t3), and truncate bias2b and bias3b to [t5, ∞). 

1649 registry.decertify(collection=collection, datasetType="bias", timespan=Timespan(t3, t5)) 

1650 assertLookup(detector=2, timespan=Timespan(None, t1), expected=None) 

1651 assertLookup(detector=2, timespan=Timespan(None, t2), expected=None) 

1652 assertLookup(detector=2, timespan=Timespan(None, t3), expected=bias2a) 

1653 assertLookup(detector=2, timespan=Timespan(None, t4), expected=bias2a) 

1654 assertLookup(detector=2, timespan=Timespan(None, t5), expected=bias2a) 

1655 assertLookup(detector=2, timespan=Timespan(None, None), expected=Ambiguous) 

1656 assertLookup(detector=2, timespan=Timespan(t1, t2), expected=None) 

1657 assertLookup(detector=2, timespan=Timespan(t1, t3), expected=bias2a) 

1658 assertLookup(detector=2, timespan=Timespan(t1, t4), expected=bias2a) 

1659 assertLookup(detector=2, timespan=Timespan(t1, t5), expected=bias2a) 

1660 assertLookup(detector=2, timespan=Timespan(t1, None), expected=Ambiguous) 

1661 assertLookup(detector=2, timespan=Timespan(t2, t3), expected=bias2a) 

1662 assertLookup(detector=2, timespan=Timespan(t2, t4), expected=bias2a) 

1663 assertLookup(detector=2, timespan=Timespan(t2, t5), expected=bias2a) 

1664 assertLookup(detector=2, timespan=Timespan(t2, None), expected=Ambiguous) 

1665 assertLookup(detector=2, timespan=Timespan(t3, t4), expected=None) 

1666 assertLookup(detector=2, timespan=Timespan(t3, t5), expected=None) 

1667 assertLookup(detector=2, timespan=Timespan(t3, None), expected=bias2b) 

1668 assertLookup(detector=2, timespan=Timespan(t4, t5), expected=None) 

1669 assertLookup(detector=2, timespan=Timespan(t4, None), expected=bias2b) 

1670 assertLookup(detector=2, timespan=Timespan(t5, None), expected=bias2b) 

1671 assertLookup(detector=3, timespan=Timespan(None, t1), expected=None) 

1672 assertLookup(detector=3, timespan=Timespan(None, t2), expected=bias3a) 

1673 assertLookup(detector=3, timespan=Timespan(None, t3), expected=bias3a) 

1674 assertLookup(detector=3, timespan=Timespan(None, t4), expected=bias3a) 

1675 assertLookup(detector=3, timespan=Timespan(None, t5), expected=bias3a) 

1676 assertLookup(detector=3, timespan=Timespan(None, None), expected=Ambiguous) 

1677 assertLookup(detector=3, timespan=Timespan(t1, t2), expected=bias3a) 

1678 assertLookup(detector=3, timespan=Timespan(t1, t3), expected=bias3a) 

1679 assertLookup(detector=3, timespan=Timespan(t1, t4), expected=bias3a) 

1680 assertLookup(detector=3, timespan=Timespan(t1, t5), expected=bias3a) 

1681 assertLookup(detector=3, timespan=Timespan(t1, None), expected=Ambiguous) 

1682 assertLookup(detector=3, timespan=Timespan(t2, t3), expected=bias3a) 

1683 assertLookup(detector=3, timespan=Timespan(t2, t4), expected=bias3a) 

1684 assertLookup(detector=3, timespan=Timespan(t2, t5), expected=bias3a) 

1685 assertLookup(detector=3, timespan=Timespan(t2, None), expected=Ambiguous) 

1686 assertLookup(detector=3, timespan=Timespan(t3, t4), expected=None) 

1687 assertLookup(detector=3, timespan=Timespan(t3, t5), expected=None) 

1688 assertLookup(detector=3, timespan=Timespan(t3, None), expected=bias3b) 

1689 assertLookup(detector=3, timespan=Timespan(t4, t5), expected=None) 

1690 assertLookup(detector=3, timespan=Timespan(t4, None), expected=bias3b) 

1691 assertLookup(detector=3, timespan=Timespan(t5, None), expected=bias3b) 

1692 

1693 # Decertify everything, this time with explicit data IDs, then check 

1694 # that no lookups succeed. 

1695 registry.decertify( 

1696 collection, "bias", Timespan(None, None), 

1697 dataIds=[ 

1698 dict(instrument="Cam1", detector=2), 

1699 dict(instrument="Cam1", detector=3), 

1700 ] 

1701 ) 

1702 for detector in (2, 3): 

1703 for timespan in allTimespans: 

1704 assertLookup(detector=detector, timespan=timespan, expected=None) 

1705 # Certify bias2a and bias3a over (-∞, ∞), check that all lookups return 

1706 # those. 

1707 registry.certify(collection, [bias2a, bias3a], Timespan(None, None),) 

1708 for timespan in allTimespans: 

1709 assertLookup(detector=2, timespan=timespan, expected=bias2a) 

1710 assertLookup(detector=3, timespan=timespan, expected=bias3a) 

1711 # Decertify just bias2 over [t2, t4). 

1712 # This should split a single certification row into two (and leave the 

1713 # other existing row, for bias3a, alone). 

1714 registry.decertify(collection, "bias", Timespan(t2, t4), 

1715 dataIds=[dict(instrument="Cam1", detector=2)]) 

1716 for timespan in allTimespans: 

1717 assertLookup(detector=3, timespan=timespan, expected=bias3a) 

1718 overlapsBefore = timespan.overlaps(Timespan(None, t2)) 

1719 overlapsAfter = timespan.overlaps(Timespan(t4, None)) 

1720 if overlapsBefore and overlapsAfter: 

1721 expected = Ambiguous 

1722 elif overlapsBefore or overlapsAfter: 

1723 expected = bias2a 

1724 else: 

1725 expected = None 

1726 assertLookup(detector=2, timespan=timespan, expected=expected) 

1727 

1728 def testIngestTimeQuery(self): 

1729 

1730 registry = self.makeRegistry() 

1731 self.loadData(registry, "base.yaml") 

1732 self.loadData(registry, "datasets.yaml") 

1733 

1734 datasets = list(registry.queryDatasets(..., collections=...)) 

1735 len0 = len(datasets) 

1736 self.assertGreater(len0, 0) 

1737 

1738 where = "ingest_date > T'2000-01-01'" 

1739 datasets = list(registry.queryDatasets(..., collections=..., where=where)) 

1740 len1 = len(datasets) 

1741 self.assertEqual(len0, len1) 

1742 

1743 # no one will ever use this piece of software in 30 years 

1744 where = "ingest_date > T'2050-01-01'" 

1745 datasets = list(registry.queryDatasets(..., collections=..., where=where)) 

1746 len2 = len(datasets) 

1747 self.assertEqual(len2, 0) 

1748 

1749 def testTimespanQueries(self): 

1750 """Test query expressions involving timespans. 

1751 """ 

1752 registry = self.makeRegistry() 

1753 self.loadData(registry, "hsc-rc2-subset.yaml") 

1754 # All exposures in the database; mapping from ID to timespan. 

1755 visits = {record.id: record.timespan for record in registry.queryDimensionRecords("visit")} 

1756 # Just those IDs, sorted (which is also temporal sorting, because HSC 

1757 # exposure IDs are monotonically increasing). 

1758 ids = sorted(visits.keys()) 

1759 self.assertGreater(len(ids), 20) 

1760 # Pick some quasi-random indexes into `ids` to play with. 

1761 i1 = int(len(ids)*0.1) 

1762 i2 = int(len(ids)*0.3) 

1763 i3 = int(len(ids)*0.6) 

1764 i4 = int(len(ids)*0.8) 

1765 # Extract some times from those: just before the beginning of i1 (which 

1766 # should be after the end of the exposure before), exactly the 

1767 # beginning of i2, just after the beginning of i3 (and before its end), 

1768 # and the exact end of i4. 

1769 t1 = visits[ids[i1]].begin - astropy.time.TimeDelta(1.0, format="sec") 

1770 self.assertGreater(t1, visits[ids[i1 - 1]].end) 

1771 t2 = visits[ids[i2]].begin 

1772 t3 = visits[ids[i3]].begin + astropy.time.TimeDelta(1.0, format="sec") 

1773 self.assertLess(t3, visits[ids[i3]].end) 

1774 t4 = visits[ids[i4]].end 

1775 # Make sure those are actually in order. 

1776 self.assertEqual([t1, t2, t3, t4], sorted([t4, t3, t2, t1])) 

1777 

1778 bind = { 

1779 "t1": t1, 

1780 "t2": t2, 

1781 "t3": t3, 

1782 "t4": t4, 

1783 "ts23": Timespan(t2, t3), 

1784 } 

1785 

1786 def query(where): 

1787 """Helper function that queries for visit data IDs and returns 

1788 results as a sorted, deduplicated list of visit IDs. 

1789 """ 

1790 return sorted( 

1791 {dataId["visit"] for dataId in registry.queryDataIds("visit", 

1792 instrument="HSC", 

1793 bind=bind, 

1794 where=where)} 

1795 ) 

1796 

1797 # Try a bunch of timespan queries, mixing up the bounds themselves, 

1798 # where they appear in the expression, and how we get the timespan into 

1799 # the expression. 

1800 

1801 # t1 is before the start of i1, so this should not include i1. 

1802 self.assertEqual(ids[:i1], query("visit.timespan OVERLAPS (null, t1)")) 

1803 # t2 is exactly at the start of i2, but ends are exclusive, so these 

1804 # should not include i2. 

1805 self.assertEqual(ids[i1:i2], query("(t1, t2) OVERLAPS visit.timespan")) 

1806 self.assertEqual(ids[:i2], query("visit.timespan < (t2, t4)")) 

1807 # t3 is in the middle of i3, so this should include i3. 

1808 self.assertEqual(ids[i2:i3 + 1], query("visit.timespan OVERLAPS ts23")) 

1809 # This one should not include t3 by the same reasoning. 

1810 self.assertEqual(ids[i3 + 1:], query("visit.timespan > (t1, t3)")) 

1811 # t4 is exactly at the end of i4, so this should include i4. 

1812 self.assertEqual(ids[i3:i4 + 1], query(f"visit.timespan OVERLAPS (T'{t3.tai.isot}', t4)")) 

1813 # i4's upper bound of t4 is exclusive so this should not include t4. 

1814 self.assertEqual(ids[i4 + 1:], query("visit.timespan OVERLAPS (t4, NULL)")) 

1815 

1816 # Now some timespan vs. time scalar queries. 

1817 self.assertEqual(ids[:i2], query("visit.timespan < t2")) 

1818 self.assertEqual(ids[:i2], query("t2 > visit.timespan")) 

1819 self.assertEqual(ids[i3 + 1:], query("visit.timespan > t3")) 

1820 self.assertEqual(ids[i3 + 1:], query("t3 < visit.timespan")) 

1821 self.assertEqual(ids[i3:i3+1], query("visit.timespan OVERLAPS t3")) 

1822 self.assertEqual(ids[i3:i3+1], query(f"T'{t3.tai.isot}' OVERLAPS visit.timespan")) 

1823 

1824 # Empty timespans should not overlap anything. 

1825 self.assertEqual([], query("visit.timespan OVERLAPS (t3, t2)")) 

1826 

1827 def testCollectionSummaries(self): 

1828 """Test recording and retrieval of collection summaries. 

1829 """ 

1830 self.maxDiff = None 

1831 registry = self.makeRegistry() 

1832 # Importing datasets from yaml should go through the code path where 

1833 # we update collection summaries as we insert datasets. 

1834 self.loadData(registry, "base.yaml") 

1835 self.loadData(registry, "datasets.yaml") 

1836 flat = registry.getDatasetType("flat") 

1837 expected1 = CollectionSummary.makeEmpty(registry.dimensions) 

1838 expected1.datasetTypes.add(registry.getDatasetType("bias")) 

1839 expected1.datasetTypes.add(flat) 

1840 expected1.dimensions.update_extract( 

1841 DataCoordinate.standardize(instrument="Cam1", universe=registry.dimensions) 

1842 ) 

1843 self.assertEqual(registry.getCollectionSummary("imported_g"), expected1) 

1844 self.assertEqual(registry.getCollectionSummary("imported_r"), expected1) 

1845 # Create a chained collection with both of the imported runs; the 

1846 # summary should be the same, because it's a union with itself. 

1847 chain = "chain" 

1848 registry.registerCollection(chain, CollectionType.CHAINED) 

1849 registry.setCollectionChain(chain, ["imported_r", "imported_g"]) 

1850 self.assertEqual(registry.getCollectionSummary(chain), expected1) 

1851 # Associate flats only into a tagged collection and a calibration 

1852 # collection to check summaries of those. 

1853 tag = "tag" 

1854 registry.registerCollection(tag, CollectionType.TAGGED) 

1855 registry.associate(tag, registry.queryDatasets(flat, collections="imported_g")) 

1856 calibs = "calibs" 

1857 registry.registerCollection(calibs, CollectionType.CALIBRATION) 

1858 registry.certify(calibs, registry.queryDatasets(flat, collections="imported_g"), 

1859 timespan=Timespan(None, None)) 

1860 expected2 = expected1.copy() 

1861 expected2.datasetTypes.discard("bias") 

1862 self.assertEqual(registry.getCollectionSummary(tag), expected2) 

1863 self.assertEqual(registry.getCollectionSummary(calibs), expected2) 

1864 # Explicitly calling Registry.refresh() should load those same 

1865 # summaries, via a totally different code path. 

1866 registry.refresh() 

1867 self.assertEqual(registry.getCollectionSummary("imported_g"), expected1) 

1868 self.assertEqual(registry.getCollectionSummary("imported_r"), expected1) 

1869 self.assertEqual(registry.getCollectionSummary(tag), expected2) 

1870 self.assertEqual(registry.getCollectionSummary(calibs), expected2) 

1871 

1872 def testUnrelatedDimensionQueries(self): 

1873 """Test that WHERE expressions in queries can reference dimensions that 

1874 are not in the result set. 

1875 """ 

1876 registry = self.makeRegistry() 

1877 # There is no data to back this query, but it should still return 

1878 # zero records instead of raising. 

1879 self.assertFalse( 

1880 set(registry.queryDataIds(["visit", "detector"], 

1881 where="instrument='Cam1' AND skymap='not_here' AND tract=0")), 

1882 )