Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = ["RegistryTests"] 

24 

25from abc import ABC, abstractmethod 

26from collections import defaultdict 

27import itertools 

28import logging 

29import os 

30import re 

31import unittest 

32 

33import astropy.time 

34import sqlalchemy 

35from typing import Optional, Type, Union 

36 

37try: 

38 import numpy as np 

39except ImportError: 

40 np = None 

41 

42from ...core import ( 

43 DataCoordinate, 

44 DataCoordinateSequence, 

45 DataCoordinateSet, 

46 DatasetAssociation, 

47 DatasetRef, 

48 DatasetType, 

49 DimensionGraph, 

50 NamedValueSet, 

51 StorageClass, 

52 ddl, 

53 Timespan, 

54) 

55from .._registry import ( 

56 CollectionSummary, 

57 CollectionType, 

58 ConflictingDefinitionError, 

59 InconsistentDataIdError, 

60 Registry, 

61 RegistryConfig, 

62) 

63from .._exceptions import MissingCollectionError 

64from ..interfaces import ButlerAttributeExistsError 

65 

66 

67class RegistryTests(ABC): 

68 """Generic tests for the `Registry` class that can be subclassed to 

69 generate tests for different configurations. 

70 """ 

71 

72 collectionsManager: Optional[str] = None 

73 """Name of the collections manager class, if subclass provides value for 

74 this member then it overrides name specified in default configuration 

75 (`str`). 

76 """ 

77 

78 @classmethod 

79 @abstractmethod 

80 def getDataDir(cls) -> str: 

81 """Return the root directory containing test data YAML files. 

82 """ 

83 raise NotImplementedError() 

84 

85 def makeRegistryConfig(self) -> RegistryConfig: 

86 """Create RegistryConfig used to create a registry. 

87 

88 This method should be called by a subclass from `makeRegistry`. 

89 Returned instance will be pre-configured based on the values of class 

90 members, and default-configured for all other parametrs. Subclasses 

91 that need default configuration should just instantiate 

92 `RegistryConfig` directly. 

93 """ 

94 config = RegistryConfig() 

95 if self.collectionsManager: 

96 config["managers"]["collections"] = self.collectionsManager 

97 return config 

98 

99 @abstractmethod 

100 def makeRegistry(self) -> Registry: 

101 """Return the Registry instance to be tested. 

102 """ 

103 raise NotImplementedError() 

104 

105 def loadData(self, registry: Registry, filename: str): 

106 """Load registry test data from ``getDataDir/<filename>``, 

107 which should be a YAML import/export file. 

108 """ 

109 from ...transfers import YamlRepoImportBackend 

110 with open(os.path.join(self.getDataDir(), filename), 'r') as stream: 

111 backend = YamlRepoImportBackend(stream, registry) 

112 backend.register() 

113 backend.load(datastore=None) 

114 

115 def testOpaque(self): 

116 """Tests for `Registry.registerOpaqueTable`, 

117 `Registry.insertOpaqueData`, `Registry.fetchOpaqueData`, and 

118 `Registry.deleteOpaqueData`. 

119 """ 

120 registry = self.makeRegistry() 

121 table = "opaque_table_for_testing" 

122 registry.registerOpaqueTable( 

123 table, 

124 spec=ddl.TableSpec( 

125 fields=[ 

126 ddl.FieldSpec("id", dtype=sqlalchemy.BigInteger, primaryKey=True), 

127 ddl.FieldSpec("name", dtype=sqlalchemy.String, length=16, nullable=False), 

128 ddl.FieldSpec("count", dtype=sqlalchemy.SmallInteger, nullable=True), 

129 ], 

130 ) 

131 ) 

132 rows = [ 

133 {"id": 1, "name": "one", "count": None}, 

134 {"id": 2, "name": "two", "count": 5}, 

135 {"id": 3, "name": "three", "count": 6}, 

136 ] 

137 registry.insertOpaqueData(table, *rows) 

138 self.assertCountEqual(rows, list(registry.fetchOpaqueData(table))) 

139 self.assertEqual(rows[0:1], list(registry.fetchOpaqueData(table, id=1))) 

140 self.assertEqual(rows[1:2], list(registry.fetchOpaqueData(table, name="two"))) 

141 self.assertEqual([], list(registry.fetchOpaqueData(table, id=1, name="two"))) 

142 registry.deleteOpaqueData(table, id=3) 

143 self.assertCountEqual(rows[:2], list(registry.fetchOpaqueData(table))) 

144 registry.deleteOpaqueData(table) 

145 self.assertEqual([], list(registry.fetchOpaqueData(table))) 

146 

147 def testDatasetType(self): 

148 """Tests for `Registry.registerDatasetType` and 

149 `Registry.getDatasetType`. 

150 """ 

151 registry = self.makeRegistry() 

152 # Check valid insert 

153 datasetTypeName = "test" 

154 storageClass = StorageClass("testDatasetType") 

155 registry.storageClasses.registerStorageClass(storageClass) 

156 dimensions = registry.dimensions.extract(("instrument", "visit")) 

157 differentDimensions = registry.dimensions.extract(("instrument", "patch")) 

158 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

159 # Inserting for the first time should return True 

160 self.assertTrue(registry.registerDatasetType(inDatasetType)) 

161 outDatasetType1 = registry.getDatasetType(datasetTypeName) 

162 self.assertEqual(outDatasetType1, inDatasetType) 

163 

164 # Re-inserting should work 

165 self.assertFalse(registry.registerDatasetType(inDatasetType)) 

166 # Except when they are not identical 

167 with self.assertRaises(ConflictingDefinitionError): 

168 nonIdenticalDatasetType = DatasetType(datasetTypeName, differentDimensions, storageClass) 

169 registry.registerDatasetType(nonIdenticalDatasetType) 

170 

171 # Template can be None 

172 datasetTypeName = "testNoneTemplate" 

173 storageClass = StorageClass("testDatasetType2") 

174 registry.storageClasses.registerStorageClass(storageClass) 

175 dimensions = registry.dimensions.extract(("instrument", "visit")) 

176 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

177 registry.registerDatasetType(inDatasetType) 

178 outDatasetType2 = registry.getDatasetType(datasetTypeName) 

179 self.assertEqual(outDatasetType2, inDatasetType) 

180 

181 allTypes = set(registry.queryDatasetTypes()) 

182 self.assertEqual(allTypes, {outDatasetType1, outDatasetType2}) 

183 

184 def testDimensions(self): 

185 """Tests for `Registry.insertDimensionData`, 

186 `Registry.syncDimensionData`, and `Registry.expandDataId`. 

187 """ 

188 registry = self.makeRegistry() 

189 dimensionName = "instrument" 

190 dimension = registry.dimensions[dimensionName] 

191 dimensionValue = {"name": "DummyCam", "visit_max": 10, "exposure_max": 10, "detector_max": 2, 

192 "class_name": "lsst.obs.base.Instrument"} 

193 registry.insertDimensionData(dimensionName, dimensionValue) 

194 # Inserting the same value twice should fail 

195 with self.assertRaises(sqlalchemy.exc.IntegrityError): 

196 registry.insertDimensionData(dimensionName, dimensionValue) 

197 # expandDataId should retrieve the record we just inserted 

198 self.assertEqual( 

199 registry.expandDataId( 

200 instrument="DummyCam", 

201 graph=dimension.graph 

202 ).records[dimensionName].toDict(), 

203 dimensionValue 

204 ) 

205 # expandDataId should raise if there is no record with the given ID. 

206 with self.assertRaises(LookupError): 

207 registry.expandDataId({"instrument": "Unknown"}, graph=dimension.graph) 

208 # band doesn't have a table; insert should fail. 

209 with self.assertRaises(TypeError): 

210 registry.insertDimensionData("band", {"band": "i"}) 

211 dimensionName2 = "physical_filter" 

212 dimension2 = registry.dimensions[dimensionName2] 

213 dimensionValue2 = {"name": "DummyCam_i", "band": "i"} 

214 # Missing required dependency ("instrument") should fail 

215 with self.assertRaises(KeyError): 

216 registry.insertDimensionData(dimensionName2, dimensionValue2) 

217 # Adding required dependency should fix the failure 

218 dimensionValue2["instrument"] = "DummyCam" 

219 registry.insertDimensionData(dimensionName2, dimensionValue2) 

220 # expandDataId should retrieve the record we just inserted. 

221 self.assertEqual( 

222 registry.expandDataId( 

223 instrument="DummyCam", physical_filter="DummyCam_i", 

224 graph=dimension2.graph 

225 ).records[dimensionName2].toDict(), 

226 dimensionValue2 

227 ) 

228 # Use syncDimensionData to insert a new record successfully. 

229 dimensionName3 = "detector" 

230 dimensionValue3 = {"instrument": "DummyCam", "id": 1, "full_name": "one", 

231 "name_in_raft": "zero", "purpose": "SCIENCE"} 

232 self.assertTrue(registry.syncDimensionData(dimensionName3, dimensionValue3)) 

233 # Sync that again. Note that one field ("raft") is NULL, and that 

234 # should be okay. 

235 self.assertFalse(registry.syncDimensionData(dimensionName3, dimensionValue3)) 

236 # Now try that sync with the same primary key but a different value. 

237 # This should fail. 

238 with self.assertRaises(ConflictingDefinitionError): 

239 registry.syncDimensionData( 

240 dimensionName3, 

241 {"instrument": "DummyCam", "id": 1, "full_name": "one", 

242 "name_in_raft": "four", "purpose": "SCIENCE"} 

243 ) 

244 

245 @unittest.skipIf(np is None, "numpy not available.") 

246 def testNumpyDataId(self): 

247 """Test that we can use a numpy int in a dataId.""" 

248 registry = self.makeRegistry() 

249 dimensionEntries = [ 

250 ("instrument", {"instrument": "DummyCam"}), 

251 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}), 

252 # Using an np.int64 here fails unless Records.fromDict is also 

253 # patched to look for numbers.Integral 

254 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}), 

255 ] 

256 for args in dimensionEntries: 

257 registry.insertDimensionData(*args) 

258 

259 # Try a normal integer and something that looks like an int but 

260 # is not. 

261 for visit_id in (42, np.int64(42)): 

262 with self.subTest(visit_id=visit_id, id_type=type(visit_id).__name__): 

263 expanded = registry.expandDataId({"instrument": "DummyCam", "visit": visit_id}) 

264 self.assertEqual(expanded["visit"], int(visit_id)) 

265 self.assertIsInstance(expanded["visit"], int) 

266 

267 def testDataIdRelationships(self): 

268 """Test that `Registry.expandDataId` raises an exception when the given 

269 keys are inconsistent. 

270 """ 

271 registry = self.makeRegistry() 

272 self.loadData(registry, "base.yaml") 

273 # Insert a few more dimension records for the next test. 

274 registry.insertDimensionData( 

275 "exposure", 

276 {"instrument": "Cam1", "id": 1, "obs_id": "one", "physical_filter": "Cam1-G"}, 

277 ) 

278 registry.insertDimensionData( 

279 "exposure", 

280 {"instrument": "Cam1", "id": 2, "obs_id": "two", "physical_filter": "Cam1-G"}, 

281 ) 

282 registry.insertDimensionData( 

283 "visit_system", 

284 {"instrument": "Cam1", "id": 0, "name": "one-to-one"}, 

285 ) 

286 registry.insertDimensionData( 

287 "visit", 

288 {"instrument": "Cam1", "id": 1, "name": "one", "physical_filter": "Cam1-G", "visit_system": 0}, 

289 ) 

290 registry.insertDimensionData( 

291 "visit_definition", 

292 {"instrument": "Cam1", "visit": 1, "exposure": 1, "visit_system": 0}, 

293 ) 

294 with self.assertRaises(InconsistentDataIdError): 

295 registry.expandDataId( 

296 {"instrument": "Cam1", "visit": 1, "exposure": 2}, 

297 ) 

298 

299 def testDataset(self): 

300 """Basic tests for `Registry.insertDatasets`, `Registry.getDataset`, 

301 and `Registry.removeDatasets`. 

302 """ 

303 registry = self.makeRegistry() 

304 self.loadData(registry, "base.yaml") 

305 run = "test" 

306 registry.registerRun(run) 

307 datasetType = registry.getDatasetType("bias") 

308 dataId = {"instrument": "Cam1", "detector": 2} 

309 ref, = registry.insertDatasets(datasetType, dataIds=[dataId], run=run) 

310 outRef = registry.getDataset(ref.id) 

311 self.assertIsNotNone(ref.id) 

312 self.assertEqual(ref, outRef) 

313 with self.assertRaises(ConflictingDefinitionError): 

314 registry.insertDatasets(datasetType, dataIds=[dataId], run=run) 

315 registry.removeDatasets([ref]) 

316 self.assertIsNone(registry.findDataset(datasetType, dataId, collections=[run])) 

317 

318 def testFindDataset(self): 

319 """Tests for `Registry.findDataset`. 

320 """ 

321 registry = self.makeRegistry() 

322 self.loadData(registry, "base.yaml") 

323 run = "test" 

324 datasetType = registry.getDatasetType("bias") 

325 dataId = {"instrument": "Cam1", "detector": 4} 

326 registry.registerRun(run) 

327 inputRef, = registry.insertDatasets(datasetType, dataIds=[dataId], run=run) 

328 outputRef = registry.findDataset(datasetType, dataId, collections=[run]) 

329 self.assertEqual(outputRef, inputRef) 

330 # Check that retrieval with invalid dataId raises 

331 with self.assertRaises(LookupError): 

332 dataId = {"instrument": "Cam1"} # no detector 

333 registry.findDataset(datasetType, dataId, collections=run) 

334 # Check that different dataIds match to different datasets 

335 dataId1 = {"instrument": "Cam1", "detector": 1} 

336 inputRef1, = registry.insertDatasets(datasetType, dataIds=[dataId1], run=run) 

337 dataId2 = {"instrument": "Cam1", "detector": 2} 

338 inputRef2, = registry.insertDatasets(datasetType, dataIds=[dataId2], run=run) 

339 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef1) 

340 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef2) 

341 self.assertNotEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef2) 

342 self.assertNotEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef1) 

343 # Check that requesting a non-existing dataId returns None 

344 nonExistingDataId = {"instrument": "Cam1", "detector": 3} 

345 self.assertIsNone(registry.findDataset(datasetType, nonExistingDataId, collections=run)) 

346 

347 def testDatasetTypeComponentQueries(self): 

348 """Test component options when querying for dataset types. 

349 """ 

350 registry = self.makeRegistry() 

351 self.loadData(registry, "base.yaml") 

352 self.loadData(registry, "datasets.yaml") 

353 # Test querying for dataset types with different inputs. 

354 # First query for all dataset types; components should only be included 

355 # when components=True. 

356 self.assertEqual( 

357 {"bias", "flat"}, 

358 NamedValueSet(registry.queryDatasetTypes()).names 

359 ) 

360 self.assertEqual( 

361 {"bias", "flat"}, 

362 NamedValueSet(registry.queryDatasetTypes(components=False)).names 

363 ) 

364 self.assertLess( 

365 {"bias", "flat", "bias.wcs", "flat.photoCalib"}, 

366 NamedValueSet(registry.queryDatasetTypes(components=True)).names 

367 ) 

368 # Use a pattern that can match either parent or components. Again, 

369 # components are only returned if components=True. 

370 self.assertEqual( 

371 {"bias"}, 

372 NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"))).names 

373 ) 

374 self.assertEqual( 

375 {"bias"}, 

376 NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"), components=False)).names 

377 ) 

378 self.assertLess( 

379 {"bias", "bias.wcs"}, 

380 NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"), components=True)).names 

381 ) 

382 # This pattern matches only a component. In this case we also return 

383 # that component dataset type if components=None. 

384 self.assertEqual( 

385 {"bias.wcs"}, 

386 NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"))).names 

387 ) 

388 self.assertEqual( 

389 set(), 

390 NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"), components=False)).names 

391 ) 

392 self.assertEqual( 

393 {"bias.wcs"}, 

394 NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"), components=True)).names 

395 ) 

396 # Add a dataset type using a StorageClass that we'll then remove; check 

397 # that this does not affect our ability to query for dataset types 

398 # (though it will warn). 

399 tempStorageClass = StorageClass( 

400 name="TempStorageClass", 

401 components={"data", registry.storageClasses.getStorageClass("StructuredDataDict")} 

402 ) 

403 registry.storageClasses.registerStorageClass(tempStorageClass) 

404 datasetType = DatasetType("temporary", dimensions=["instrument"], storageClass=tempStorageClass, 

405 universe=registry.dimensions) 

406 registry.registerDatasetType(datasetType) 

407 registry.storageClasses._unregisterStorageClass(tempStorageClass.name) 

408 datasetType._storageClass = None 

409 del tempStorageClass 

410 # Querying for all dataset types, including components, should include 

411 # at least all non-component dataset types (and I don't want to 

412 # enumerate all of the Exposure components for bias and flat here). 

413 with self.assertLogs("lsst.daf.butler.registry._registry", logging.WARN) as cm: 

414 everything = NamedValueSet(registry.queryDatasetTypes(components=True)) 

415 self.assertIn("TempStorageClass", cm.output[0]) 

416 self.assertLess({"bias", "flat", "temporary"}, everything.names) 

417 # It should not include "temporary.columns", because we tried to remove 

418 # the storage class that would tell it about that. So if the next line 

419 # fails (i.e. "temporary.columns" _is_ in everything.names), it means 

420 # this part of the test isn't doing anything, because the _unregister 

421 # call about isn't simulating the real-life case we want it to 

422 # simulate, in which different versions of daf_butler in entirely 

423 # different Python processes interact with the same repo. 

424 self.assertNotIn("temporary.data", everything.names) 

425 # Query for dataset types that start with "temp". This should again 

426 # not include the component, and also not fail. 

427 with self.assertLogs("lsst.daf.butler.registry._registry", logging.WARN) as cm: 

428 startsWithTemp = NamedValueSet(registry.queryDatasetTypes(re.compile("temp.*"))) 

429 self.assertIn("TempStorageClass", cm.output[0]) 

430 self.assertEqual({"temporary"}, startsWithTemp.names) 

431 

432 def testComponentLookups(self): 

433 """Test searching for component datasets via their parents. 

434 """ 

435 registry = self.makeRegistry() 

436 self.loadData(registry, "base.yaml") 

437 self.loadData(registry, "datasets.yaml") 

438 # Test getting the child dataset type (which does still exist in the 

439 # Registry), and check for consistency with 

440 # DatasetRef.makeComponentRef. 

441 collection = "imported_g" 

442 parentType = registry.getDatasetType("bias") 

443 childType = registry.getDatasetType("bias.wcs") 

444 parentRefResolved = registry.findDataset(parentType, collections=collection, 

445 instrument="Cam1", detector=1) 

446 self.assertIsInstance(parentRefResolved, DatasetRef) 

447 self.assertEqual(childType, parentRefResolved.makeComponentRef("wcs").datasetType) 

448 # Search for a single dataset with findDataset. 

449 childRef1 = registry.findDataset("bias.wcs", collections=collection, 

450 dataId=parentRefResolved.dataId) 

451 self.assertEqual(childRef1, parentRefResolved.makeComponentRef("wcs")) 

452 # Search for detector data IDs constrained by component dataset 

453 # existence with queryDataIds. 

454 dataIds = registry.queryDataIds( 

455 ["detector"], 

456 datasets=["bias.wcs"], 

457 collections=collection, 

458 ).toSet() 

459 self.assertEqual( 

460 dataIds, 

461 DataCoordinateSet( 

462 { 

463 DataCoordinate.standardize(instrument="Cam1", detector=d, graph=parentType.dimensions) 

464 for d in (1, 2, 3) 

465 }, 

466 parentType.dimensions, 

467 ) 

468 ) 

469 # Search for multiple datasets of a single type with queryDatasets. 

470 childRefs2 = set(registry.queryDatasets( 

471 "bias.wcs", 

472 collections=collection, 

473 )) 

474 self.assertEqual( 

475 {ref.unresolved() for ref in childRefs2}, 

476 {DatasetRef(childType, dataId) for dataId in dataIds} 

477 ) 

478 

479 def testCollections(self): 

480 """Tests for registry methods that manage collections. 

481 """ 

482 registry = self.makeRegistry() 

483 self.loadData(registry, "base.yaml") 

484 self.loadData(registry, "datasets.yaml") 

485 run1 = "imported_g" 

486 run2 = "imported_r" 

487 # Test setting a collection docstring after it has been created. 

488 registry.setCollectionDocumentation(run1, "doc for run1") 

489 self.assertEqual(registry.getCollectionDocumentation(run1), "doc for run1") 

490 registry.setCollectionDocumentation(run1, None) 

491 self.assertIsNone(registry.getCollectionDocumentation(run1)) 

492 datasetType = "bias" 

493 # Find some datasets via their run's collection. 

494 dataId1 = {"instrument": "Cam1", "detector": 1} 

495 ref1 = registry.findDataset(datasetType, dataId1, collections=run1) 

496 self.assertIsNotNone(ref1) 

497 dataId2 = {"instrument": "Cam1", "detector": 2} 

498 ref2 = registry.findDataset(datasetType, dataId2, collections=run1) 

499 self.assertIsNotNone(ref2) 

500 # Associate those into a new collection,then look for them there. 

501 tag1 = "tag1" 

502 registry.registerCollection(tag1, type=CollectionType.TAGGED, doc="doc for tag1") 

503 self.assertEqual(registry.getCollectionDocumentation(tag1), "doc for tag1") 

504 registry.associate(tag1, [ref1, ref2]) 

505 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1) 

506 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

507 # Disassociate one and verify that we can't it there anymore... 

508 registry.disassociate(tag1, [ref1]) 

509 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=tag1)) 

510 # ...but we can still find ref2 in tag1, and ref1 in the run. 

511 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run1), ref1) 

512 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

513 collections = set(registry.queryCollections()) 

514 self.assertEqual(collections, {run1, run2, tag1}) 

515 # Associate both refs into tag1 again; ref2 is already there, but that 

516 # should be a harmless no-op. 

517 registry.associate(tag1, [ref1, ref2]) 

518 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1) 

519 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

520 # Get a different dataset (from a different run) that has the same 

521 # dataset type and data ID as ref2. 

522 ref2b = registry.findDataset(datasetType, dataId2, collections=run2) 

523 self.assertNotEqual(ref2, ref2b) 

524 # Attempting to associate that into tag1 should be an error. 

525 with self.assertRaises(ConflictingDefinitionError): 

526 registry.associate(tag1, [ref2b]) 

527 # That error shouldn't have messed up what we had before. 

528 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1) 

529 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

530 # Attempt to associate the conflicting dataset again, this time with 

531 # a dataset that isn't in the collection and won't cause a conflict. 

532 # Should also fail without modifying anything. 

533 dataId3 = {"instrument": "Cam1", "detector": 3} 

534 ref3 = registry.findDataset(datasetType, dataId3, collections=run1) 

535 with self.assertRaises(ConflictingDefinitionError): 

536 registry.associate(tag1, [ref3, ref2b]) 

537 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1) 

538 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

539 self.assertIsNone(registry.findDataset(datasetType, dataId3, collections=tag1)) 

540 # Register a chained collection that searches [tag1, run2] 

541 chain1 = "chain1" 

542 registry.registerCollection(chain1, type=CollectionType.CHAINED) 

543 self.assertIs(registry.getCollectionType(chain1), CollectionType.CHAINED) 

544 # Chained collection exists, but has no collections in it. 

545 self.assertFalse(registry.getCollectionChain(chain1)) 

546 # If we query for all collections, we should get the chained collection 

547 # only if we don't ask to flatten it (i.e. yield only its children). 

548 self.assertEqual(set(registry.queryCollections(flattenChains=False)), {tag1, run1, run2, chain1}) 

549 self.assertEqual(set(registry.queryCollections(flattenChains=True)), {tag1, run1, run2}) 

550 # Attempt to set its child collections to something circular; that 

551 # should fail. 

552 with self.assertRaises(ValueError): 

553 registry.setCollectionChain(chain1, [tag1, chain1]) 

554 # Add the child collections. 

555 registry.setCollectionChain(chain1, [tag1, run2]) 

556 self.assertEqual( 

557 list(registry.getCollectionChain(chain1)), 

558 [tag1, run2] 

559 ) 

560 # Searching for dataId1 or dataId2 in the chain should return ref1 and 

561 # ref2, because both are in tag1. 

562 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain1), ref1) 

563 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain1), ref2) 

564 # Now disassociate ref2 from tag1. The search (for bias) with 

565 # dataId2 in chain1 should then: 

566 # 1. not find it in tag1 

567 # 2. find a different dataset in run2 

568 registry.disassociate(tag1, [ref2]) 

569 ref2b = registry.findDataset(datasetType, dataId2, collections=chain1) 

570 self.assertNotEqual(ref2b, ref2) 

571 self.assertEqual(ref2b, registry.findDataset(datasetType, dataId2, collections=run2)) 

572 # Define a new chain so we can test recursive chains. 

573 chain2 = "chain2" 

574 registry.registerCollection(chain2, type=CollectionType.CHAINED) 

575 registry.setCollectionChain(chain2, [run2, chain1]) 

576 # Query for collections matching a regex. 

577 self.assertCountEqual( 

578 list(registry.queryCollections(re.compile("imported_."), flattenChains=False)), 

579 ["imported_r", "imported_g"] 

580 ) 

581 # Query for collections matching a regex or an explicit str. 

582 self.assertCountEqual( 

583 list(registry.queryCollections([re.compile("imported_."), "chain1"], flattenChains=False)), 

584 ["imported_r", "imported_g", "chain1"] 

585 ) 

586 # Search for bias with dataId1 should find it via tag1 in chain2, 

587 # recursing, because is not in run1. 

588 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=run2)) 

589 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain2), ref1) 

590 # Search for bias with dataId2 should find it in run2 (ref2b). 

591 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain2), ref2b) 

592 # Search for a flat that is in run2. That should not be found 

593 # at the front of chain2, because of the restriction to bias 

594 # on run2 there, but it should be found in at the end of chain1. 

595 dataId4 = {"instrument": "Cam1", "detector": 3, "physical_filter": "Cam1-R2"} 

596 ref4 = registry.findDataset("flat", dataId4, collections=run2) 

597 self.assertIsNotNone(ref4) 

598 self.assertEqual(ref4, registry.findDataset("flat", dataId4, collections=chain2)) 

599 # Deleting a collection that's part of a CHAINED collection is not 

600 # allowed, and is exception-safe. 

601 with self.assertRaises(Exception): 

602 registry.removeCollection(run2) 

603 self.assertEqual(registry.getCollectionType(run2), CollectionType.RUN) 

604 with self.assertRaises(Exception): 

605 registry.removeCollection(chain1) 

606 self.assertEqual(registry.getCollectionType(chain1), CollectionType.CHAINED) 

607 # Actually remove chain2, test that it's gone by asking for its type. 

608 registry.removeCollection(chain2) 

609 with self.assertRaises(MissingCollectionError): 

610 registry.getCollectionType(chain2) 

611 # Actually remove run2 and chain1, which should work now. 

612 registry.removeCollection(chain1) 

613 registry.removeCollection(run2) 

614 with self.assertRaises(MissingCollectionError): 

615 registry.getCollectionType(run2) 

616 with self.assertRaises(MissingCollectionError): 

617 registry.getCollectionType(chain1) 

618 # Remove tag1 as well, just to test that we can remove TAGGED 

619 # collections. 

620 registry.removeCollection(tag1) 

621 with self.assertRaises(MissingCollectionError): 

622 registry.getCollectionType(tag1) 

623 

624 def testBasicTransaction(self): 

625 """Test that all operations within a single transaction block are 

626 rolled back if an exception propagates out of the block. 

627 """ 

628 registry = self.makeRegistry() 

629 storageClass = StorageClass("testDatasetType") 

630 registry.storageClasses.registerStorageClass(storageClass) 

631 with registry.transaction(): 

632 registry.insertDimensionData("instrument", {"name": "Cam1", "class_name": "A"}) 

633 with self.assertRaises(ValueError): 

634 with registry.transaction(): 

635 registry.insertDimensionData("instrument", {"name": "Cam2"}) 

636 raise ValueError("Oops, something went wrong") 

637 # Cam1 should exist 

638 self.assertEqual(registry.expandDataId(instrument="Cam1").records["instrument"].class_name, "A") 

639 # But Cam2 and Cam3 should both not exist 

640 with self.assertRaises(LookupError): 

641 registry.expandDataId(instrument="Cam2") 

642 with self.assertRaises(LookupError): 

643 registry.expandDataId(instrument="Cam3") 

644 

645 def testNestedTransaction(self): 

646 """Test that operations within a transaction block are not rolled back 

647 if an exception propagates out of an inner transaction block and is 

648 then caught. 

649 """ 

650 registry = self.makeRegistry() 

651 dimension = registry.dimensions["instrument"] 

652 dataId1 = {"instrument": "DummyCam"} 

653 dataId2 = {"instrument": "DummyCam2"} 

654 checkpointReached = False 

655 with registry.transaction(): 

656 # This should be added and (ultimately) committed. 

657 registry.insertDimensionData(dimension, dataId1) 

658 with self.assertRaises(sqlalchemy.exc.IntegrityError): 

659 with registry.transaction(savepoint=True): 

660 # This does not conflict, and should succeed (but not 

661 # be committed). 

662 registry.insertDimensionData(dimension, dataId2) 

663 checkpointReached = True 

664 # This should conflict and raise, triggerring a rollback 

665 # of the previous insertion within the same transaction 

666 # context, but not the original insertion in the outer 

667 # block. 

668 registry.insertDimensionData(dimension, dataId1) 

669 self.assertTrue(checkpointReached) 

670 self.assertIsNotNone(registry.expandDataId(dataId1, graph=dimension.graph)) 

671 with self.assertRaises(LookupError): 

672 registry.expandDataId(dataId2, graph=dimension.graph) 

673 

674 def testInstrumentDimensions(self): 

675 """Test queries involving only instrument dimensions, with no joins to 

676 skymap.""" 

677 registry = self.makeRegistry() 

678 

679 # need a bunch of dimensions and datasets for test 

680 registry.insertDimensionData( 

681 "instrument", 

682 dict(name="DummyCam", visit_max=25, exposure_max=300, detector_max=6) 

683 ) 

684 registry.insertDimensionData( 

685 "physical_filter", 

686 dict(instrument="DummyCam", name="dummy_r", band="r"), 

687 dict(instrument="DummyCam", name="dummy_i", band="i"), 

688 ) 

689 registry.insertDimensionData( 

690 "detector", 

691 *[dict(instrument="DummyCam", id=i, full_name=str(i)) for i in range(1, 6)] 

692 ) 

693 registry.insertDimensionData( 

694 "visit_system", 

695 dict(instrument="DummyCam", id=1, name="default"), 

696 ) 

697 registry.insertDimensionData( 

698 "visit", 

699 dict(instrument="DummyCam", id=10, name="ten", physical_filter="dummy_i", visit_system=1), 

700 dict(instrument="DummyCam", id=11, name="eleven", physical_filter="dummy_r", visit_system=1), 

701 dict(instrument="DummyCam", id=20, name="twelve", physical_filter="dummy_r", visit_system=1), 

702 ) 

703 registry.insertDimensionData( 

704 "exposure", 

705 dict(instrument="DummyCam", id=100, obs_id="100", physical_filter="dummy_i"), 

706 dict(instrument="DummyCam", id=101, obs_id="101", physical_filter="dummy_i"), 

707 dict(instrument="DummyCam", id=110, obs_id="110", physical_filter="dummy_r"), 

708 dict(instrument="DummyCam", id=111, obs_id="111", physical_filter="dummy_r"), 

709 dict(instrument="DummyCam", id=200, obs_id="200", physical_filter="dummy_r"), 

710 dict(instrument="DummyCam", id=201, obs_id="201", physical_filter="dummy_r"), 

711 ) 

712 registry.insertDimensionData( 

713 "visit_definition", 

714 dict(instrument="DummyCam", exposure=100, visit_system=1, visit=10), 

715 dict(instrument="DummyCam", exposure=101, visit_system=1, visit=10), 

716 dict(instrument="DummyCam", exposure=110, visit_system=1, visit=11), 

717 dict(instrument="DummyCam", exposure=111, visit_system=1, visit=11), 

718 dict(instrument="DummyCam", exposure=200, visit_system=1, visit=20), 

719 dict(instrument="DummyCam", exposure=201, visit_system=1, visit=20), 

720 ) 

721 # dataset types 

722 run1 = "test1_r" 

723 run2 = "test2_r" 

724 tagged2 = "test2_t" 

725 registry.registerRun(run1) 

726 registry.registerRun(run2) 

727 registry.registerCollection(tagged2) 

728 storageClass = StorageClass("testDataset") 

729 registry.storageClasses.registerStorageClass(storageClass) 

730 rawType = DatasetType(name="RAW", 

731 dimensions=registry.dimensions.extract(("instrument", "exposure", "detector")), 

732 storageClass=storageClass) 

733 registry.registerDatasetType(rawType) 

734 calexpType = DatasetType(name="CALEXP", 

735 dimensions=registry.dimensions.extract(("instrument", "visit", "detector")), 

736 storageClass=storageClass) 

737 registry.registerDatasetType(calexpType) 

738 

739 # add pre-existing datasets 

740 for exposure in (100, 101, 110, 111): 

741 for detector in (1, 2, 3): 

742 # note that only 3 of 5 detectors have datasets 

743 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector) 

744 ref, = registry.insertDatasets(rawType, dataIds=[dataId], run=run1) 

745 # exposures 100 and 101 appear in both run1 and tagged2. 

746 # 100 has different datasets in the different collections 

747 # 101 has the same dataset in both collections. 

748 if exposure == 100: 

749 ref, = registry.insertDatasets(rawType, dataIds=[dataId], run=run2) 

750 if exposure in (100, 101): 

751 registry.associate(tagged2, [ref]) 

752 # Add pre-existing datasets to tagged2. 

753 for exposure in (200, 201): 

754 for detector in (3, 4, 5): 

755 # note that only 3 of 5 detectors have datasets 

756 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector) 

757 ref, = registry.insertDatasets(rawType, dataIds=[dataId], run=run2) 

758 registry.associate(tagged2, [ref]) 

759 

760 dimensions = DimensionGraph( 

761 registry.dimensions, 

762 dimensions=(rawType.dimensions.required | calexpType.dimensions.required) 

763 ) 

764 # Test that single dim string works as well as list of str 

765 rows = registry.queryDataIds("visit", datasets=rawType, collections=run1).expanded().toSet() 

766 rowsI = registry.queryDataIds(["visit"], datasets=rawType, collections=run1).expanded().toSet() 

767 self.assertEqual(rows, rowsI) 

768 # with empty expression 

769 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1).expanded().toSet() 

770 self.assertEqual(len(rows), 4*3) # 4 exposures times 3 detectors 

771 for dataId in rows: 

772 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit")) 

773 packer1 = registry.dimensions.makePacker("visit_detector", dataId) 

774 packer2 = registry.dimensions.makePacker("exposure_detector", dataId) 

775 self.assertEqual(packer1.unpack(packer1.pack(dataId)), 

776 DataCoordinate.standardize(dataId, graph=packer1.dimensions)) 

777 self.assertEqual(packer2.unpack(packer2.pack(dataId)), 

778 DataCoordinate.standardize(dataId, graph=packer2.dimensions)) 

779 self.assertNotEqual(packer1.pack(dataId), packer2.pack(dataId)) 

780 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), 

781 (100, 101, 110, 111)) 

782 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 11)) 

783 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3)) 

784 

785 # second collection 

786 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=tagged2).toSet() 

787 self.assertEqual(len(rows), 4*3) # 4 exposures times 3 detectors 

788 for dataId in rows: 

789 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit")) 

790 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), 

791 (100, 101, 200, 201)) 

792 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 20)) 

793 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3, 4, 5)) 

794 

795 # with two input datasets 

796 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=[run1, tagged2]).toSet() 

797 self.assertEqual(len(set(rows)), 6*3) # 6 exposures times 3 detectors; set needed to de-dupe 

798 for dataId in rows: 

799 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit")) 

800 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), 

801 (100, 101, 110, 111, 200, 201)) 

802 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 11, 20)) 

803 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3, 4, 5)) 

804 

805 # limit to single visit 

806 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1, 

807 where="visit = 10", instrument="DummyCam").toSet() 

808 self.assertEqual(len(rows), 2*3) # 2 exposures times 3 detectors 

809 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101)) 

810 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10,)) 

811 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3)) 

812 

813 # more limiting expression, using link names instead of Table.column 

814 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1, 

815 where="visit = 10 and detector > 1 and 'DummyCam'=instrument").toSet() 

816 self.assertEqual(len(rows), 2*2) # 2 exposures times 2 detectors 

817 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101)) 

818 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10,)) 

819 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (2, 3)) 

820 

821 # expression excludes everything 

822 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1, 

823 where="visit > 1000", instrument="DummyCam").toSet() 

824 self.assertEqual(len(rows), 0) 

825 

826 # Selecting by physical_filter, this is not in the dimensions, but it 

827 # is a part of the full expression so it should work too. 

828 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1, 

829 where="physical_filter = 'dummy_r'", instrument="DummyCam").toSet() 

830 self.assertEqual(len(rows), 2*3) # 2 exposures times 3 detectors 

831 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (110, 111)) 

832 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (11,)) 

833 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3)) 

834 

835 def testSkyMapDimensions(self): 

836 """Tests involving only skymap dimensions, no joins to instrument.""" 

837 registry = self.makeRegistry() 

838 

839 # need a bunch of dimensions and datasets for test, we want 

840 # "band" in the test so also have to add physical_filter 

841 # dimensions 

842 registry.insertDimensionData( 

843 "instrument", 

844 dict(instrument="DummyCam") 

845 ) 

846 registry.insertDimensionData( 

847 "physical_filter", 

848 dict(instrument="DummyCam", name="dummy_r", band="r"), 

849 dict(instrument="DummyCam", name="dummy_i", band="i"), 

850 ) 

851 registry.insertDimensionData( 

852 "skymap", 

853 dict(name="DummyMap", hash="sha!".encode("utf8")) 

854 ) 

855 for tract in range(10): 

856 registry.insertDimensionData("tract", dict(skymap="DummyMap", id=tract)) 

857 registry.insertDimensionData( 

858 "patch", 

859 *[dict(skymap="DummyMap", tract=tract, id=patch, cell_x=0, cell_y=0) 

860 for patch in range(10)] 

861 ) 

862 

863 # dataset types 

864 run = "test" 

865 registry.registerRun(run) 

866 storageClass = StorageClass("testDataset") 

867 registry.storageClasses.registerStorageClass(storageClass) 

868 calexpType = DatasetType(name="deepCoadd_calexp", 

869 dimensions=registry.dimensions.extract(("skymap", "tract", "patch", 

870 "band")), 

871 storageClass=storageClass) 

872 registry.registerDatasetType(calexpType) 

873 mergeType = DatasetType(name="deepCoadd_mergeDet", 

874 dimensions=registry.dimensions.extract(("skymap", "tract", "patch")), 

875 storageClass=storageClass) 

876 registry.registerDatasetType(mergeType) 

877 measType = DatasetType(name="deepCoadd_meas", 

878 dimensions=registry.dimensions.extract(("skymap", "tract", "patch", 

879 "band")), 

880 storageClass=storageClass) 

881 registry.registerDatasetType(measType) 

882 

883 dimensions = DimensionGraph( 

884 registry.dimensions, 

885 dimensions=(calexpType.dimensions.required | mergeType.dimensions.required 

886 | measType.dimensions.required) 

887 ) 

888 

889 # add pre-existing datasets 

890 for tract in (1, 3, 5): 

891 for patch in (2, 4, 6, 7): 

892 dataId = dict(skymap="DummyMap", tract=tract, patch=patch) 

893 registry.insertDatasets(mergeType, dataIds=[dataId], run=run) 

894 for aFilter in ("i", "r"): 

895 dataId = dict(skymap="DummyMap", tract=tract, patch=patch, band=aFilter) 

896 registry.insertDatasets(calexpType, dataIds=[dataId], run=run) 

897 

898 # with empty expression 

899 rows = registry.queryDataIds(dimensions, 

900 datasets=[calexpType, mergeType], collections=run).toSet() 

901 self.assertEqual(len(rows), 3*4*2) # 4 tracts x 4 patches x 2 filters 

902 for dataId in rows: 

903 self.assertCountEqual(dataId.keys(), ("skymap", "tract", "patch", "band")) 

904 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 3, 5)) 

905 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 4, 6, 7)) 

906 self.assertCountEqual(set(dataId["band"] for dataId in rows), ("i", "r")) 

907 

908 # limit to 2 tracts and 2 patches 

909 rows = registry.queryDataIds(dimensions, 

910 datasets=[calexpType, mergeType], collections=run, 

911 where="tract IN (1, 5) AND patch IN (2, 7)", skymap="DummyMap").toSet() 

912 self.assertEqual(len(rows), 2*2*2) # 2 tracts x 2 patches x 2 filters 

913 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 5)) 

914 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 7)) 

915 self.assertCountEqual(set(dataId["band"] for dataId in rows), ("i", "r")) 

916 

917 # limit to single filter 

918 rows = registry.queryDataIds(dimensions, 

919 datasets=[calexpType, mergeType], collections=run, 

920 where="band = 'i'").toSet() 

921 self.assertEqual(len(rows), 3*4*1) # 4 tracts x 4 patches x 2 filters 

922 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 3, 5)) 

923 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 4, 6, 7)) 

924 self.assertCountEqual(set(dataId["band"] for dataId in rows), ("i",)) 

925 

926 # expression excludes everything, specifying non-existing skymap is 

927 # not a fatal error, it's operator error 

928 rows = registry.queryDataIds(dimensions, 

929 datasets=[calexpType, mergeType], collections=run, 

930 where="skymap = 'Mars'").toSet() 

931 self.assertEqual(len(rows), 0) 

932 

933 def testSpatialJoin(self): 

934 """Test queries that involve spatial overlap joins. 

935 """ 

936 registry = self.makeRegistry() 

937 self.loadData(registry, "hsc-rc2-subset.yaml") 

938 

939 # Dictionary of spatial DatabaseDimensionElements, keyed by the name of 

940 # the TopologicalFamily they belong to. We'll relate all elements in 

941 # each family to all of the elements in each other family. 

942 families = defaultdict(set) 

943 # Dictionary of {element.name: {dataId: region}}. 

944 regions = {} 

945 for element in registry.dimensions.getDatabaseElements(): 

946 if element.spatial is not None: 

947 families[element.spatial.name].add(element) 

948 regions[element.name] = { 

949 record.dataId: record.region for record in registry.queryDimensionRecords(element) 

950 } 

951 

952 # If this check fails, it's not necessarily a problem - it may just be 

953 # a reasonable change to the default dimension definitions - but the 

954 # test below depends on there being more than one family to do anything 

955 # useful. 

956 self.assertEqual(len(families), 2) 

957 

958 # Overlap DatabaseDimensionElements with each other. 

959 for family1, family2 in itertools.combinations(families, 2): 

960 for element1, element2 in itertools.product(families[family1], families[family2]): 

961 graph = DimensionGraph.union(element1.graph, element2.graph) 

962 # Construct expected set of overlapping data IDs via a 

963 # brute-force comparison of the regions we've already fetched. 

964 expected = { 

965 DataCoordinate.standardize( 

966 {**dataId1.byName(), **dataId2.byName()}, 

967 graph=graph 

968 ) 

969 for (dataId1, region1), (dataId2, region2) 

970 in itertools.product(regions[element1.name].items(), regions[element2.name].items()) 

971 if not region1.isDisjointFrom(region2) 

972 } 

973 self.assertGreater(len(expected), 2, msg="Test that we aren't just comparing empty sets.") 

974 queried = set(registry.queryDataIds(graph)) 

975 self.assertEqual(expected, queried) 

976 

977 # Overlap each DatabaseDimensionElement with the commonSkyPix system. 

978 commonSkyPix = registry.dimensions.commonSkyPix 

979 for elementName, regions in regions.items(): 

980 graph = DimensionGraph.union(registry.dimensions[elementName].graph, commonSkyPix.graph) 

981 expected = set() 

982 for dataId, region in regions.items(): 

983 for begin, end in commonSkyPix.pixelization.envelope(region): 

984 expected.update( 

985 DataCoordinate.standardize( 

986 {commonSkyPix.name: index, **dataId.byName()}, 

987 graph=graph 

988 ) 

989 for index in range(begin, end) 

990 ) 

991 self.assertGreater(len(expected), 2, msg="Test that we aren't just comparing empty sets.") 

992 queried = set(registry.queryDataIds(graph)) 

993 self.assertEqual(expected, queried) 

994 

995 def testAbstractQuery(self): 

996 """Test that we can run a query that just lists the known 

997 bands. This is tricky because band is 

998 backed by a query against physical_filter. 

999 """ 

1000 registry = self.makeRegistry() 

1001 registry.insertDimensionData("instrument", dict(name="DummyCam")) 

1002 registry.insertDimensionData( 

1003 "physical_filter", 

1004 dict(instrument="DummyCam", name="dummy_i", band="i"), 

1005 dict(instrument="DummyCam", name="dummy_i2", band="i"), 

1006 dict(instrument="DummyCam", name="dummy_r", band="r"), 

1007 ) 

1008 rows = registry.queryDataIds(["band"]).toSet() 

1009 self.assertCountEqual( 

1010 rows, 

1011 [DataCoordinate.standardize(band="i", universe=registry.dimensions), 

1012 DataCoordinate.standardize(band="r", universe=registry.dimensions)] 

1013 ) 

1014 

1015 def testAttributeManager(self): 

1016 """Test basic functionality of attribute manager. 

1017 """ 

1018 # number of attributes with schema versions in a fresh database, 

1019 # 6 managers with 3 records per manager, plus config for dimensions 

1020 VERSION_COUNT = 6 * 3 + 1 

1021 

1022 registry = self.makeRegistry() 

1023 attributes = registry._managers.attributes 

1024 

1025 # check what get() returns for non-existing key 

1026 self.assertIsNone(attributes.get("attr")) 

1027 self.assertEqual(attributes.get("attr", ""), "") 

1028 self.assertEqual(attributes.get("attr", "Value"), "Value") 

1029 self.assertEqual(len(list(attributes.items())), VERSION_COUNT) 

1030 

1031 # cannot store empty key or value 

1032 with self.assertRaises(ValueError): 

1033 attributes.set("", "value") 

1034 with self.assertRaises(ValueError): 

1035 attributes.set("attr", "") 

1036 

1037 # set value of non-existing key 

1038 attributes.set("attr", "value") 

1039 self.assertEqual(len(list(attributes.items())), VERSION_COUNT + 1) 

1040 self.assertEqual(attributes.get("attr"), "value") 

1041 

1042 # update value of existing key 

1043 with self.assertRaises(ButlerAttributeExistsError): 

1044 attributes.set("attr", "value2") 

1045 

1046 attributes.set("attr", "value2", force=True) 

1047 self.assertEqual(len(list(attributes.items())), VERSION_COUNT + 1) 

1048 self.assertEqual(attributes.get("attr"), "value2") 

1049 

1050 # delete existing key 

1051 self.assertTrue(attributes.delete("attr")) 

1052 self.assertEqual(len(list(attributes.items())), VERSION_COUNT) 

1053 

1054 # delete non-existing key 

1055 self.assertFalse(attributes.delete("non-attr")) 

1056 

1057 # store bunch of keys and get the list back 

1058 data = [ 

1059 ("version.core", "1.2.3"), 

1060 ("version.dimensions", "3.2.1"), 

1061 ("config.managers.opaque", "ByNameOpaqueTableStorageManager"), 

1062 ] 

1063 for key, value in data: 

1064 attributes.set(key, value) 

1065 items = dict(attributes.items()) 

1066 for key, value in data: 

1067 self.assertEqual(items[key], value) 

1068 

1069 def testQueryDatasetsDeduplication(self): 

1070 """Test that the findFirst option to queryDatasets selects datasets 

1071 from collections in the order given". 

1072 """ 

1073 registry = self.makeRegistry() 

1074 self.loadData(registry, "base.yaml") 

1075 self.loadData(registry, "datasets.yaml") 

1076 self.assertCountEqual( 

1077 list(registry.queryDatasets("bias", collections=["imported_g", "imported_r"])), 

1078 [ 

1079 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"), 

1080 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g"), 

1081 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g"), 

1082 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r"), 

1083 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r"), 

1084 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

1085 ] 

1086 ) 

1087 self.assertCountEqual( 

1088 list(registry.queryDatasets("bias", collections=["imported_g", "imported_r"], 

1089 findFirst=True)), 

1090 [ 

1091 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"), 

1092 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g"), 

1093 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g"), 

1094 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

1095 ] 

1096 ) 

1097 self.assertCountEqual( 

1098 list(registry.queryDatasets("bias", collections=["imported_r", "imported_g"], 

1099 findFirst=True)), 

1100 [ 

1101 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"), 

1102 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r"), 

1103 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r"), 

1104 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

1105 ] 

1106 ) 

1107 

1108 def testQueryResults(self): 

1109 """Test querying for data IDs and then manipulating the QueryResults 

1110 object returned to perform other queries. 

1111 """ 

1112 registry = self.makeRegistry() 

1113 self.loadData(registry, "base.yaml") 

1114 self.loadData(registry, "datasets.yaml") 

1115 bias = registry.getDatasetType("bias") 

1116 flat = registry.getDatasetType("flat") 

1117 # Obtain expected results from methods other than those we're testing 

1118 # here. That includes: 

1119 # - the dimensions of the data IDs we want to query: 

1120 expectedGraph = DimensionGraph(registry.dimensions, names=["detector", "physical_filter"]) 

1121 # - the dimensions of some other data IDs we'll extract from that: 

1122 expectedSubsetGraph = DimensionGraph(registry.dimensions, names=["detector"]) 

1123 # - the data IDs we expect to obtain from the first queries: 

1124 expectedDataIds = DataCoordinateSet( 

1125 { 

1126 DataCoordinate.standardize(instrument="Cam1", detector=d, physical_filter=p, 

1127 universe=registry.dimensions) 

1128 for d, p in itertools.product({1, 2, 3}, {"Cam1-G", "Cam1-R1", "Cam1-R2"}) 

1129 }, 

1130 graph=expectedGraph, 

1131 hasFull=False, 

1132 hasRecords=False, 

1133 ) 

1134 # - the flat datasets we expect to find from those data IDs, in just 

1135 # one collection (so deduplication is irrelevant): 

1136 expectedFlats = [ 

1137 registry.findDataset(flat, instrument="Cam1", detector=1, physical_filter="Cam1-R1", 

1138 collections="imported_r"), 

1139 registry.findDataset(flat, instrument="Cam1", detector=2, physical_filter="Cam1-R1", 

1140 collections="imported_r"), 

1141 registry.findDataset(flat, instrument="Cam1", detector=3, physical_filter="Cam1-R2", 

1142 collections="imported_r"), 

1143 ] 

1144 # - the data IDs we expect to extract from that: 

1145 expectedSubsetDataIds = expectedDataIds.subset(expectedSubsetGraph) 

1146 # - the bias datasets we expect to find from those data IDs, after we 

1147 # subset-out the physical_filter dimension, both with duplicates: 

1148 expectedAllBiases = [ 

1149 registry.findDataset(bias, instrument="Cam1", detector=1, collections="imported_g"), 

1150 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_g"), 

1151 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_g"), 

1152 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_r"), 

1153 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_r"), 

1154 ] 

1155 # - ...and without duplicates: 

1156 expectedDeduplicatedBiases = [ 

1157 registry.findDataset(bias, instrument="Cam1", detector=1, collections="imported_g"), 

1158 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_r"), 

1159 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_r"), 

1160 ] 

1161 # Test against those expected results, using a "lazy" query for the 

1162 # data IDs (which re-executes that query each time we use it to do 

1163 # something new). 

1164 dataIds = registry.queryDataIds( 

1165 ["detector", "physical_filter"], 

1166 where="detector.purpose = 'SCIENCE'", # this rejects detector=4 

1167 instrument="Cam1", 

1168 ) 

1169 self.assertEqual(dataIds.graph, expectedGraph) 

1170 self.assertEqual(dataIds.toSet(), expectedDataIds) 

1171 self.assertCountEqual( 

1172 list( 

1173 dataIds.findDatasets( 

1174 flat, 

1175 collections=["imported_r"], 

1176 ) 

1177 ), 

1178 expectedFlats, 

1179 ) 

1180 subsetDataIds = dataIds.subset(expectedSubsetGraph, unique=True) 

1181 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph) 

1182 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds) 

1183 self.assertCountEqual( 

1184 list( 

1185 subsetDataIds.findDatasets( 

1186 bias, 

1187 collections=["imported_r", "imported_g"], 

1188 findFirst=False 

1189 ) 

1190 ), 

1191 expectedAllBiases 

1192 ) 

1193 self.assertCountEqual( 

1194 list( 

1195 subsetDataIds.findDatasets( 

1196 bias, 

1197 collections=["imported_r", "imported_g"], 

1198 findFirst=True 

1199 ) 

1200 ), expectedDeduplicatedBiases 

1201 ) 

1202 # Materialize the bias dataset queries (only) by putting the results 

1203 # into temporary tables, then repeat those tests. 

1204 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], 

1205 findFirst=False).materialize() as biases: 

1206 self.assertCountEqual(list(biases), expectedAllBiases) 

1207 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], 

1208 findFirst=True).materialize() as biases: 

1209 self.assertCountEqual(list(biases), expectedDeduplicatedBiases) 

1210 # Materialize the data ID subset query, but not the dataset queries. 

1211 with subsetDataIds.materialize() as subsetDataIds: 

1212 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph) 

1213 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds) 

1214 self.assertCountEqual( 

1215 list( 

1216 subsetDataIds.findDatasets( 

1217 bias, 

1218 collections=["imported_r", "imported_g"], 

1219 findFirst=False 

1220 ) 

1221 ), 

1222 expectedAllBiases 

1223 ) 

1224 self.assertCountEqual( 

1225 list( 

1226 subsetDataIds.findDatasets( 

1227 bias, 

1228 collections=["imported_r", "imported_g"], 

1229 findFirst=True 

1230 ) 

1231 ), expectedDeduplicatedBiases 

1232 ) 

1233 # Materialize the dataset queries, too. 

1234 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], 

1235 findFirst=False).materialize() as biases: 

1236 self.assertCountEqual(list(biases), expectedAllBiases) 

1237 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], 

1238 findFirst=True).materialize() as biases: 

1239 self.assertCountEqual(list(biases), expectedDeduplicatedBiases) 

1240 # Materialize the original query, but none of the follow-up queries. 

1241 with dataIds.materialize() as dataIds: 

1242 self.assertEqual(dataIds.graph, expectedGraph) 

1243 self.assertEqual(dataIds.toSet(), expectedDataIds) 

1244 self.assertCountEqual( 

1245 list( 

1246 dataIds.findDatasets( 

1247 flat, 

1248 collections=["imported_r"], 

1249 ) 

1250 ), 

1251 expectedFlats, 

1252 ) 

1253 subsetDataIds = dataIds.subset(expectedSubsetGraph, unique=True) 

1254 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph) 

1255 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds) 

1256 self.assertCountEqual( 

1257 list( 

1258 subsetDataIds.findDatasets( 

1259 bias, 

1260 collections=["imported_r", "imported_g"], 

1261 findFirst=False 

1262 ) 

1263 ), 

1264 expectedAllBiases 

1265 ) 

1266 self.assertCountEqual( 

1267 list( 

1268 subsetDataIds.findDatasets( 

1269 bias, 

1270 collections=["imported_r", "imported_g"], 

1271 findFirst=True 

1272 ) 

1273 ), expectedDeduplicatedBiases 

1274 ) 

1275 # Materialize just the bias dataset queries. 

1276 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], 

1277 findFirst=False).materialize() as biases: 

1278 self.assertCountEqual(list(biases), expectedAllBiases) 

1279 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], 

1280 findFirst=True).materialize() as biases: 

1281 self.assertCountEqual(list(biases), expectedDeduplicatedBiases) 

1282 # Materialize the subset data ID query, but not the dataset 

1283 # queries. 

1284 with subsetDataIds.materialize() as subsetDataIds: 

1285 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph) 

1286 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds) 

1287 self.assertCountEqual( 

1288 list( 

1289 subsetDataIds.findDatasets( 

1290 bias, 

1291 collections=["imported_r", "imported_g"], 

1292 findFirst=False 

1293 ) 

1294 ), 

1295 expectedAllBiases 

1296 ) 

1297 self.assertCountEqual( 

1298 list( 

1299 subsetDataIds.findDatasets( 

1300 bias, 

1301 collections=["imported_r", "imported_g"], 

1302 findFirst=True 

1303 ) 

1304 ), expectedDeduplicatedBiases 

1305 ) 

1306 # Materialize the bias dataset queries, too, so now we're 

1307 # materializing every single step. 

1308 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], 

1309 findFirst=False).materialize() as biases: 

1310 self.assertCountEqual(list(biases), expectedAllBiases) 

1311 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], 

1312 findFirst=True).materialize() as biases: 

1313 self.assertCountEqual(list(biases), expectedDeduplicatedBiases) 

1314 

1315 def testEmptyDimensionsQueries(self): 

1316 """Test Query and QueryResults objects in the case where there are no 

1317 dimensions. 

1318 """ 

1319 # Set up test data: one dataset type, two runs, one dataset in each. 

1320 registry = self.makeRegistry() 

1321 self.loadData(registry, "base.yaml") 

1322 schema = DatasetType("schema", dimensions=registry.dimensions.empty, storageClass="Catalog") 

1323 registry.registerDatasetType(schema) 

1324 dataId = DataCoordinate.makeEmpty(registry.dimensions) 

1325 run1 = "run1" 

1326 run2 = "run2" 

1327 registry.registerRun(run1) 

1328 registry.registerRun(run2) 

1329 (dataset1,) = registry.insertDatasets(schema, dataIds=[dataId], run=run1) 

1330 (dataset2,) = registry.insertDatasets(schema, dataIds=[dataId], run=run2) 

1331 # Query directly for both of the datasets, and each one, one at a time. 

1332 self.assertCountEqual( 

1333 list(registry.queryDatasets(schema, collections=[run1, run2], findFirst=False)), 

1334 [dataset1, dataset2] 

1335 ) 

1336 self.assertEqual( 

1337 list(registry.queryDatasets(schema, collections=[run1, run2], findFirst=True)), 

1338 [dataset1], 

1339 ) 

1340 self.assertEqual( 

1341 list(registry.queryDatasets(schema, collections=[run2, run1], findFirst=True)), 

1342 [dataset2], 

1343 ) 

1344 # Query for data IDs with no dimensions. 

1345 dataIds = registry.queryDataIds([]) 

1346 self.assertEqual( 

1347 dataIds.toSequence(), 

1348 DataCoordinateSequence([dataId], registry.dimensions.empty) 

1349 ) 

1350 # Use queried data IDs to find the datasets. 

1351 self.assertCountEqual( 

1352 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False)), 

1353 [dataset1, dataset2], 

1354 ) 

1355 self.assertEqual( 

1356 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True)), 

1357 [dataset1], 

1358 ) 

1359 self.assertEqual( 

1360 list(dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True)), 

1361 [dataset2], 

1362 ) 

1363 # Now materialize the data ID query results and repeat those tests. 

1364 with dataIds.materialize() as dataIds: 

1365 self.assertEqual( 

1366 dataIds.toSequence(), 

1367 DataCoordinateSequence([dataId], registry.dimensions.empty) 

1368 ) 

1369 self.assertCountEqual( 

1370 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False)), 

1371 [dataset1, dataset2], 

1372 ) 

1373 self.assertEqual( 

1374 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True)), 

1375 [dataset1], 

1376 ) 

1377 self.assertEqual( 

1378 list(dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True)), 

1379 [dataset2], 

1380 ) 

1381 # Query for non-empty data IDs, then subset that to get the empty one. 

1382 # Repeat the above tests starting from that. 

1383 dataIds = registry.queryDataIds(["instrument"]).subset(registry.dimensions.empty, unique=True) 

1384 self.assertEqual( 

1385 dataIds.toSequence(), 

1386 DataCoordinateSequence([dataId], registry.dimensions.empty) 

1387 ) 

1388 self.assertCountEqual( 

1389 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False)), 

1390 [dataset1, dataset2], 

1391 ) 

1392 self.assertEqual( 

1393 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True)), 

1394 [dataset1], 

1395 ) 

1396 self.assertEqual( 

1397 list(dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True)), 

1398 [dataset2], 

1399 ) 

1400 with dataIds.materialize() as dataIds: 

1401 self.assertEqual( 

1402 dataIds.toSequence(), 

1403 DataCoordinateSequence([dataId], registry.dimensions.empty) 

1404 ) 

1405 self.assertCountEqual( 

1406 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False)), 

1407 [dataset1, dataset2], 

1408 ) 

1409 self.assertEqual( 

1410 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True)), 

1411 [dataset1], 

1412 ) 

1413 self.assertEqual( 

1414 list(dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True)), 

1415 [dataset2], 

1416 ) 

1417 # Query for non-empty data IDs, then materialize, then subset to get 

1418 # the empty one. Repeat again. 

1419 with registry.queryDataIds(["instrument"]).materialize() as nonEmptyDataIds: 

1420 dataIds = nonEmptyDataIds.subset(registry.dimensions.empty, unique=True) 

1421 self.assertEqual( 

1422 dataIds.toSequence(), 

1423 DataCoordinateSequence([dataId], registry.dimensions.empty) 

1424 ) 

1425 self.assertCountEqual( 

1426 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False)), 

1427 [dataset1, dataset2], 

1428 ) 

1429 self.assertEqual( 

1430 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True)), 

1431 [dataset1], 

1432 ) 

1433 self.assertEqual( 

1434 list(dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True)), 

1435 [dataset2], 

1436 ) 

1437 with dataIds.materialize() as dataIds: 

1438 self.assertEqual( 

1439 dataIds.toSequence(), 

1440 DataCoordinateSequence([dataId], registry.dimensions.empty) 

1441 ) 

1442 self.assertCountEqual( 

1443 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False)), 

1444 [dataset1, dataset2], 

1445 ) 

1446 self.assertEqual( 

1447 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True)), 

1448 [dataset1], 

1449 ) 

1450 self.assertEqual( 

1451 list(dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True)), 

1452 [dataset2], 

1453 ) 

1454 

1455 def testCalibrationCollections(self): 

1456 """Test operations on `~CollectionType.CALIBRATION` collections, 

1457 including `Registry.certify`, `Registry.decertify`, and 

1458 `Registry.findDataset`. 

1459 """ 

1460 # Setup - make a Registry, fill it with some datasets in 

1461 # non-calibration collections. 

1462 registry = self.makeRegistry() 

1463 self.loadData(registry, "base.yaml") 

1464 self.loadData(registry, "datasets.yaml") 

1465 # Set up some timestamps. 

1466 t1 = astropy.time.Time('2020-01-01T01:00:00', format="isot", scale="tai") 

1467 t2 = astropy.time.Time('2020-01-01T02:00:00', format="isot", scale="tai") 

1468 t3 = astropy.time.Time('2020-01-01T03:00:00', format="isot", scale="tai") 

1469 t4 = astropy.time.Time('2020-01-01T04:00:00', format="isot", scale="tai") 

1470 t5 = astropy.time.Time('2020-01-01T05:00:00', format="isot", scale="tai") 

1471 allTimespans = [ 

1472 Timespan(a, b) for a, b in itertools.combinations([None, t1, t2, t3, t4, t5, None], r=2) 

1473 ] 

1474 # Get references to some datasets. 

1475 bias2a = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g") 

1476 bias3a = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g") 

1477 bias2b = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r") 

1478 bias3b = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r") 

1479 # Register the main calibration collection we'll be working with. 

1480 collection = "Cam1/calibs/default" 

1481 registry.registerCollection(collection, type=CollectionType.CALIBRATION) 

1482 # Cannot associate into a calibration collection (no timespan). 

1483 with self.assertRaises(TypeError): 

1484 registry.associate(collection, [bias2a]) 

1485 # Certify 2a dataset with [t2, t4) validity. 

1486 registry.certify(collection, [bias2a], Timespan(begin=t2, end=t4)) 

1487 # We should not be able to certify 2b with anything overlapping that 

1488 # window. 

1489 with self.assertRaises(ConflictingDefinitionError): 

1490 registry.certify(collection, [bias2b], Timespan(begin=None, end=t3)) 

1491 with self.assertRaises(ConflictingDefinitionError): 

1492 registry.certify(collection, [bias2b], Timespan(begin=None, end=t5)) 

1493 with self.assertRaises(ConflictingDefinitionError): 

1494 registry.certify(collection, [bias2b], Timespan(begin=t1, end=t3)) 

1495 with self.assertRaises(ConflictingDefinitionError): 

1496 registry.certify(collection, [bias2b], Timespan(begin=t1, end=t5)) 

1497 with self.assertRaises(ConflictingDefinitionError): 

1498 registry.certify(collection, [bias2b], Timespan(begin=t1, end=None)) 

1499 with self.assertRaises(ConflictingDefinitionError): 

1500 registry.certify(collection, [bias2b], Timespan(begin=t2, end=t3)) 

1501 with self.assertRaises(ConflictingDefinitionError): 

1502 registry.certify(collection, [bias2b], Timespan(begin=t2, end=t5)) 

1503 with self.assertRaises(ConflictingDefinitionError): 

1504 registry.certify(collection, [bias2b], Timespan(begin=t2, end=None)) 

1505 # We should be able to certify 3a with a range overlapping that window, 

1506 # because it's for a different detector. 

1507 # We'll certify 3a over [t1, t3). 

1508 registry.certify(collection, [bias3a], Timespan(begin=t1, end=t3)) 

1509 # Now we'll certify 2b and 3b together over [t4, ∞). 

1510 registry.certify(collection, [bias2b, bias3b], Timespan(begin=t4, end=None)) 

1511 

1512 # Fetch all associations and check that they are what we expect. 

1513 self.assertCountEqual( 

1514 list( 

1515 registry.queryDatasetAssociations( 

1516 "bias", 

1517 collections=[collection, "imported_g", "imported_r"], 

1518 ) 

1519 ), 

1520 [ 

1521 DatasetAssociation( 

1522 ref=registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"), 

1523 collection="imported_g", 

1524 timespan=None, 

1525 ), 

1526 DatasetAssociation( 

1527 ref=registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

1528 collection="imported_r", 

1529 timespan=None, 

1530 ), 

1531 DatasetAssociation(ref=bias2a, collection="imported_g", timespan=None), 

1532 DatasetAssociation(ref=bias3a, collection="imported_g", timespan=None), 

1533 DatasetAssociation(ref=bias2b, collection="imported_r", timespan=None), 

1534 DatasetAssociation(ref=bias3b, collection="imported_r", timespan=None), 

1535 DatasetAssociation(ref=bias2a, collection=collection, timespan=Timespan(begin=t2, end=t4)), 

1536 DatasetAssociation(ref=bias3a, collection=collection, timespan=Timespan(begin=t1, end=t3)), 

1537 DatasetAssociation(ref=bias2b, collection=collection, timespan=Timespan(begin=t4, end=None)), 

1538 DatasetAssociation(ref=bias3b, collection=collection, timespan=Timespan(begin=t4, end=None)), 

1539 ] 

1540 ) 

1541 

1542 class Ambiguous: 

1543 """Tag class to denote lookups that are expected to be ambiguous. 

1544 """ 

1545 pass 

1546 

1547 def assertLookup(detector: int, timespan: Timespan, 

1548 expected: Optional[Union[DatasetRef, Type[Ambiguous]]]) -> None: 

1549 """Local function that asserts that a bias lookup returns the given 

1550 expected result. 

1551 """ 

1552 if expected is Ambiguous: 

1553 with self.assertRaises(RuntimeError): 

1554 registry.findDataset("bias", collections=collection, instrument="Cam1", 

1555 detector=detector, timespan=timespan) 

1556 else: 

1557 self.assertEqual( 

1558 expected, 

1559 registry.findDataset("bias", collections=collection, instrument="Cam1", 

1560 detector=detector, timespan=timespan) 

1561 ) 

1562 

1563 # Systematically test lookups against expected results. 

1564 assertLookup(detector=2, timespan=Timespan(None, t1), expected=None) 

1565 assertLookup(detector=2, timespan=Timespan(None, t2), expected=None) 

1566 assertLookup(detector=2, timespan=Timespan(None, t3), expected=bias2a) 

1567 assertLookup(detector=2, timespan=Timespan(None, t4), expected=bias2a) 

1568 assertLookup(detector=2, timespan=Timespan(None, t5), expected=Ambiguous) 

1569 assertLookup(detector=2, timespan=Timespan(None, None), expected=Ambiguous) 

1570 assertLookup(detector=2, timespan=Timespan(t1, t2), expected=None) 

1571 assertLookup(detector=2, timespan=Timespan(t1, t3), expected=bias2a) 

1572 assertLookup(detector=2, timespan=Timespan(t1, t4), expected=bias2a) 

1573 assertLookup(detector=2, timespan=Timespan(t1, t5), expected=Ambiguous) 

1574 assertLookup(detector=2, timespan=Timespan(t1, None), expected=Ambiguous) 

1575 assertLookup(detector=2, timespan=Timespan(t2, t3), expected=bias2a) 

1576 assertLookup(detector=2, timespan=Timespan(t2, t4), expected=bias2a) 

1577 assertLookup(detector=2, timespan=Timespan(t2, t5), expected=Ambiguous) 

1578 assertLookup(detector=2, timespan=Timespan(t2, None), expected=Ambiguous) 

1579 assertLookup(detector=2, timespan=Timespan(t3, t4), expected=bias2a) 

1580 assertLookup(detector=2, timespan=Timespan(t3, t5), expected=Ambiguous) 

1581 assertLookup(detector=2, timespan=Timespan(t3, None), expected=Ambiguous) 

1582 assertLookup(detector=2, timespan=Timespan(t4, t5), expected=bias2b) 

1583 assertLookup(detector=2, timespan=Timespan(t4, None), expected=bias2b) 

1584 assertLookup(detector=2, timespan=Timespan(t5, None), expected=bias2b) 

1585 assertLookup(detector=3, timespan=Timespan(None, t1), expected=None) 

1586 assertLookup(detector=3, timespan=Timespan(None, t2), expected=bias3a) 

1587 assertLookup(detector=3, timespan=Timespan(None, t3), expected=bias3a) 

1588 assertLookup(detector=3, timespan=Timespan(None, t4), expected=bias3a) 

1589 assertLookup(detector=3, timespan=Timespan(None, t5), expected=Ambiguous) 

1590 assertLookup(detector=3, timespan=Timespan(None, None), expected=Ambiguous) 

1591 assertLookup(detector=3, timespan=Timespan(t1, t2), expected=bias3a) 

1592 assertLookup(detector=3, timespan=Timespan(t1, t3), expected=bias3a) 

1593 assertLookup(detector=3, timespan=Timespan(t1, t4), expected=bias3a) 

1594 assertLookup(detector=3, timespan=Timespan(t1, t5), expected=Ambiguous) 

1595 assertLookup(detector=3, timespan=Timespan(t1, None), expected=Ambiguous) 

1596 assertLookup(detector=3, timespan=Timespan(t2, t3), expected=bias3a) 

1597 assertLookup(detector=3, timespan=Timespan(t2, t4), expected=bias3a) 

1598 assertLookup(detector=3, timespan=Timespan(t2, t5), expected=Ambiguous) 

1599 assertLookup(detector=3, timespan=Timespan(t2, None), expected=Ambiguous) 

1600 assertLookup(detector=3, timespan=Timespan(t3, t4), expected=None) 

1601 assertLookup(detector=3, timespan=Timespan(t3, t5), expected=bias3b) 

1602 assertLookup(detector=3, timespan=Timespan(t3, None), expected=bias3b) 

1603 assertLookup(detector=3, timespan=Timespan(t4, t5), expected=bias3b) 

1604 assertLookup(detector=3, timespan=Timespan(t4, None), expected=bias3b) 

1605 assertLookup(detector=3, timespan=Timespan(t5, None), expected=bias3b) 

1606 

1607 # Decertify [t3, t5) for all data IDs, and do test lookups again. 

1608 # This should truncate bias2a to [t2, t3), leave bias3a unchanged at 

1609 # [t1, t3), and truncate bias2b and bias3b to [t5, ∞). 

1610 registry.decertify(collection=collection, datasetType="bias", timespan=Timespan(t3, t5)) 

1611 assertLookup(detector=2, timespan=Timespan(None, t1), expected=None) 

1612 assertLookup(detector=2, timespan=Timespan(None, t2), expected=None) 

1613 assertLookup(detector=2, timespan=Timespan(None, t3), expected=bias2a) 

1614 assertLookup(detector=2, timespan=Timespan(None, t4), expected=bias2a) 

1615 assertLookup(detector=2, timespan=Timespan(None, t5), expected=bias2a) 

1616 assertLookup(detector=2, timespan=Timespan(None, None), expected=Ambiguous) 

1617 assertLookup(detector=2, timespan=Timespan(t1, t2), expected=None) 

1618 assertLookup(detector=2, timespan=Timespan(t1, t3), expected=bias2a) 

1619 assertLookup(detector=2, timespan=Timespan(t1, t4), expected=bias2a) 

1620 assertLookup(detector=2, timespan=Timespan(t1, t5), expected=bias2a) 

1621 assertLookup(detector=2, timespan=Timespan(t1, None), expected=Ambiguous) 

1622 assertLookup(detector=2, timespan=Timespan(t2, t3), expected=bias2a) 

1623 assertLookup(detector=2, timespan=Timespan(t2, t4), expected=bias2a) 

1624 assertLookup(detector=2, timespan=Timespan(t2, t5), expected=bias2a) 

1625 assertLookup(detector=2, timespan=Timespan(t2, None), expected=Ambiguous) 

1626 assertLookup(detector=2, timespan=Timespan(t3, t4), expected=None) 

1627 assertLookup(detector=2, timespan=Timespan(t3, t5), expected=None) 

1628 assertLookup(detector=2, timespan=Timespan(t3, None), expected=bias2b) 

1629 assertLookup(detector=2, timespan=Timespan(t4, t5), expected=None) 

1630 assertLookup(detector=2, timespan=Timespan(t4, None), expected=bias2b) 

1631 assertLookup(detector=2, timespan=Timespan(t5, None), expected=bias2b) 

1632 assertLookup(detector=3, timespan=Timespan(None, t1), expected=None) 

1633 assertLookup(detector=3, timespan=Timespan(None, t2), expected=bias3a) 

1634 assertLookup(detector=3, timespan=Timespan(None, t3), expected=bias3a) 

1635 assertLookup(detector=3, timespan=Timespan(None, t4), expected=bias3a) 

1636 assertLookup(detector=3, timespan=Timespan(None, t5), expected=bias3a) 

1637 assertLookup(detector=3, timespan=Timespan(None, None), expected=Ambiguous) 

1638 assertLookup(detector=3, timespan=Timespan(t1, t2), expected=bias3a) 

1639 assertLookup(detector=3, timespan=Timespan(t1, t3), expected=bias3a) 

1640 assertLookup(detector=3, timespan=Timespan(t1, t4), expected=bias3a) 

1641 assertLookup(detector=3, timespan=Timespan(t1, t5), expected=bias3a) 

1642 assertLookup(detector=3, timespan=Timespan(t1, None), expected=Ambiguous) 

1643 assertLookup(detector=3, timespan=Timespan(t2, t3), expected=bias3a) 

1644 assertLookup(detector=3, timespan=Timespan(t2, t4), expected=bias3a) 

1645 assertLookup(detector=3, timespan=Timespan(t2, t5), expected=bias3a) 

1646 assertLookup(detector=3, timespan=Timespan(t2, None), expected=Ambiguous) 

1647 assertLookup(detector=3, timespan=Timespan(t3, t4), expected=None) 

1648 assertLookup(detector=3, timespan=Timespan(t3, t5), expected=None) 

1649 assertLookup(detector=3, timespan=Timespan(t3, None), expected=bias3b) 

1650 assertLookup(detector=3, timespan=Timespan(t4, t5), expected=None) 

1651 assertLookup(detector=3, timespan=Timespan(t4, None), expected=bias3b) 

1652 assertLookup(detector=3, timespan=Timespan(t5, None), expected=bias3b) 

1653 

1654 # Decertify everything, this time with explicit data IDs, then check 

1655 # that no lookups succeed. 

1656 registry.decertify( 

1657 collection, "bias", Timespan(None, None), 

1658 dataIds=[ 

1659 dict(instrument="Cam1", detector=2), 

1660 dict(instrument="Cam1", detector=3), 

1661 ] 

1662 ) 

1663 for detector in (2, 3): 

1664 for timespan in allTimespans: 

1665 assertLookup(detector=detector, timespan=timespan, expected=None) 

1666 # Certify bias2a and bias3a over (-∞, ∞), check that all lookups return 

1667 # those. 

1668 registry.certify(collection, [bias2a, bias3a], Timespan(None, None),) 

1669 for timespan in allTimespans: 

1670 assertLookup(detector=2, timespan=timespan, expected=bias2a) 

1671 assertLookup(detector=3, timespan=timespan, expected=bias3a) 

1672 # Decertify just bias2 over [t2, t4). 

1673 # This should split a single certification row into two (and leave the 

1674 # other existing row, for bias3a, alone). 

1675 registry.decertify(collection, "bias", Timespan(t2, t4), 

1676 dataIds=[dict(instrument="Cam1", detector=2)]) 

1677 for timespan in allTimespans: 

1678 assertLookup(detector=3, timespan=timespan, expected=bias3a) 

1679 overlapsBefore = timespan.overlaps(Timespan(None, t2)) 

1680 overlapsAfter = timespan.overlaps(Timespan(t4, None)) 

1681 if overlapsBefore and overlapsAfter: 

1682 expected = Ambiguous 

1683 elif overlapsBefore or overlapsAfter: 

1684 expected = bias2a 

1685 else: 

1686 expected = None 

1687 assertLookup(detector=2, timespan=timespan, expected=expected) 

1688 

1689 def testIngestTimeQuery(self): 

1690 

1691 registry = self.makeRegistry() 

1692 self.loadData(registry, "base.yaml") 

1693 self.loadData(registry, "datasets.yaml") 

1694 

1695 datasets = list(registry.queryDatasets(..., collections=...)) 

1696 len0 = len(datasets) 

1697 self.assertGreater(len0, 0) 

1698 

1699 where = "ingest_date > T'2000-01-01'" 

1700 datasets = list(registry.queryDatasets(..., collections=..., where=where)) 

1701 len1 = len(datasets) 

1702 self.assertEqual(len0, len1) 

1703 

1704 # no one will ever use this piece of software in 30 years 

1705 where = "ingest_date > T'2050-01-01'" 

1706 datasets = list(registry.queryDatasets(..., collections=..., where=where)) 

1707 len2 = len(datasets) 

1708 self.assertEqual(len2, 0) 

1709 

1710 def testTimespanQueries(self): 

1711 """Test query expressions involving timespans. 

1712 """ 

1713 registry = self.makeRegistry() 

1714 self.loadData(registry, "hsc-rc2-subset.yaml") 

1715 # All exposures in the database; mapping from ID to timespan. 

1716 visits = {record.id: record.timespan for record in registry.queryDimensionRecords("visit")} 

1717 # Just those IDs, sorted (which is also temporal sorting, because HSC 

1718 # exposure IDs are monotonically increasing). 

1719 ids = sorted(visits.keys()) 

1720 self.assertGreater(len(ids), 20) 

1721 # Pick some quasi-random indexes into `ids` to play with. 

1722 i1 = int(len(ids)*0.1) 

1723 i2 = int(len(ids)*0.3) 

1724 i3 = int(len(ids)*0.6) 

1725 i4 = int(len(ids)*0.8) 

1726 # Extract some times from those: just before the beginning of i1 (which 

1727 # should be after the end of the exposure before), exactly the 

1728 # beginning of i2, just after the beginning of i3 (and before its end), 

1729 # and the exact end of i4. 

1730 t1 = visits[ids[i1]].begin - astropy.time.TimeDelta(1.0, format="sec") 

1731 self.assertGreater(t1, visits[ids[i1 - 1]].end) 

1732 t2 = visits[ids[i2]].begin 

1733 t3 = visits[ids[i3]].begin + astropy.time.TimeDelta(1.0, format="sec") 

1734 self.assertLess(t3, visits[ids[i3]].end) 

1735 t4 = visits[ids[i4]].end 

1736 # Make sure those are actually in order. 

1737 self.assertEqual([t1, t2, t3, t4], sorted([t4, t3, t2, t1])) 

1738 

1739 bind = { 

1740 "t1": t1, 

1741 "t2": t2, 

1742 "t3": t3, 

1743 "t4": t4, 

1744 "ts23": Timespan(t2, t3), 

1745 } 

1746 

1747 def query(where): 

1748 """Helper function that queries for visit data IDs and returns 

1749 results as a sorted, deduplicated list of visit IDs. 

1750 """ 

1751 return sorted( 

1752 {dataId["visit"] for dataId in registry.queryDataIds("visit", 

1753 instrument="HSC", 

1754 bind=bind, 

1755 where=where)} 

1756 ) 

1757 

1758 # Try a bunch of timespan queries, mixing up the bounds themselves, 

1759 # where they appear in the expression, and how we get the timespan into 

1760 # the expression. 

1761 

1762 # t1 is before the start of i1, so this should not include i1. 

1763 self.assertEqual(ids[:i1], query("visit.timespan OVERLAPS (null, t1)")) 

1764 # t2 is exactly at the start of i2, but ends are exclusive, so these 

1765 # should not include i2. 

1766 self.assertEqual(ids[i1:i2], query("(t1, t2) OVERLAPS visit.timespan")) 

1767 self.assertEqual(ids[:i2], query("visit.timespan < (t2, t4)")) 

1768 # t3 is in the middle of i3, so this should include i3. 

1769 self.assertEqual(ids[i2:i3 + 1], query("visit.timespan OVERLAPS ts23")) 

1770 # This one should not include t3 by the same reasoning. 

1771 self.assertEqual(ids[i3 + 1:], query("visit.timespan > (t1, t3)")) 

1772 # t4 is exactly at the end of i4, so this should include i4. 

1773 self.assertEqual(ids[i3:i4 + 1], query(f"visit.timespan OVERLAPS (T'{t3.tai.isot}', t4)")) 

1774 # i4's upper bound of t4 is exclusive so this should not include t4. 

1775 self.assertEqual(ids[i4 + 1:], query("visit.timespan OVERLAPS (t4, NULL)")) 

1776 

1777 # Now some timespan vs. time scalar queries. 

1778 self.assertEqual(ids[:i2], query("visit.timespan < t2")) 

1779 self.assertEqual(ids[:i2], query("t2 > visit.timespan")) 

1780 self.assertEqual(ids[i3 + 1:], query("visit.timespan > t3")) 

1781 self.assertEqual(ids[i3 + 1:], query("t3 < visit.timespan")) 

1782 self.assertEqual(ids[i3:i3+1], query("visit.timespan OVERLAPS t3")) 

1783 self.assertEqual(ids[i3:i3+1], query(f"T'{t3.tai.isot}' OVERLAPS visit.timespan")) 

1784 

1785 # Empty timespans should not overlap anything. 

1786 self.assertEqual([], query("visit.timespan OVERLAPS (t3, t2)")) 

1787 

1788 def testCollectionSummaries(self): 

1789 """Test recording and retrieval of collection summaries. 

1790 """ 

1791 self.maxDiff = None 

1792 registry = self.makeRegistry() 

1793 # Importing datasets from yaml should go through the code path where 

1794 # we update collection summaries as we insert datasets. 

1795 self.loadData(registry, "base.yaml") 

1796 self.loadData(registry, "datasets.yaml") 

1797 flat = registry.getDatasetType("flat") 

1798 expected1 = CollectionSummary.makeEmpty(registry.dimensions) 

1799 expected1.datasetTypes.add(registry.getDatasetType("bias")) 

1800 expected1.datasetTypes.add(flat) 

1801 expected1.dimensions.update_extract( 

1802 DataCoordinate.standardize(instrument="Cam1", universe=registry.dimensions) 

1803 ) 

1804 self.assertEqual(registry.getCollectionSummary("imported_g"), expected1) 

1805 self.assertEqual(registry.getCollectionSummary("imported_r"), expected1) 

1806 # Create a chained collection with both of the imported runs; the 

1807 # summary should be the same, because it's a union with itself. 

1808 chain = "chain" 

1809 registry.registerCollection(chain, CollectionType.CHAINED) 

1810 registry.setCollectionChain(chain, ["imported_r", "imported_g"]) 

1811 self.assertEqual(registry.getCollectionSummary(chain), expected1) 

1812 # Associate flats only into a tagged collection and a calibration 

1813 # collection to check summaries of those. 

1814 tag = "tag" 

1815 registry.registerCollection(tag, CollectionType.TAGGED) 

1816 registry.associate(tag, registry.queryDatasets(flat, collections="imported_g")) 

1817 calibs = "calibs" 

1818 registry.registerCollection(calibs, CollectionType.CALIBRATION) 

1819 registry.certify(calibs, registry.queryDatasets(flat, collections="imported_g"), 

1820 timespan=Timespan(None, None)) 

1821 expected2 = expected1.copy() 

1822 expected2.datasetTypes.discard("bias") 

1823 self.assertEqual(registry.getCollectionSummary(tag), expected2) 

1824 self.assertEqual(registry.getCollectionSummary(calibs), expected2) 

1825 # Explicitly calling Registry.refresh() should load those same 

1826 # summaries, via a totally different code path. 

1827 registry.refresh() 

1828 self.assertEqual(registry.getCollectionSummary("imported_g"), expected1) 

1829 self.assertEqual(registry.getCollectionSummary("imported_r"), expected1) 

1830 self.assertEqual(registry.getCollectionSummary(tag), expected2) 

1831 self.assertEqual(registry.getCollectionSummary(calibs), expected2) 

1832 

1833 def testUnrelatedDimensionQueries(self): 

1834 """Test that WHERE expressions in queries can reference dimensions that 

1835 are not in the result set. 

1836 """ 

1837 registry = self.makeRegistry() 

1838 # There is no data to back this query, but it should still return 

1839 # zero records instead of raising. 

1840 self.assertFalse( 

1841 set(registry.queryDataIds(["visit", "detector"], 

1842 where="instrument='Cam1' AND skymap='not_here' AND tract=0")), 

1843 )