Coverage for python/lsst/daf/butler/registry/tests/_registry.py: 6%

Shortcuts on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1062 statements  

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = ["RegistryTests"] 

24 

25from abc import ABC, abstractmethod 

26from collections import defaultdict 

27from datetime import datetime, timedelta 

28import itertools 

29import logging 

30import os 

31import re 

32from typing import Iterator 

33import unittest 

34 

35import astropy.time 

36import sqlalchemy 

37from typing import Optional, Type, Union, TYPE_CHECKING 

38 

39try: 

40 import numpy as np 

41except ImportError: 

42 np = None 

43 

44import lsst.sphgeom 

45from ...core import ( 

46 DataCoordinate, 

47 DataCoordinateSet, 

48 DatasetAssociation, 

49 DatasetRef, 

50 DatasetType, 

51 DimensionGraph, 

52 NamedValueSet, 

53 StorageClass, 

54 ddl, 

55 Timespan, 

56) 

57from ..summaries import CollectionSummary 

58from .._collectionType import CollectionType 

59from .._config import RegistryConfig 

60 

61from .._exceptions import ( 

62 ConflictingDefinitionError, 

63 InconsistentDataIdError, 

64 MissingCollectionError, 

65 OrphanedRecordError, 

66) 

67from ..interfaces import ButlerAttributeExistsError 

68 

69if TYPE_CHECKING: 69 ↛ 70line 69 didn't jump to line 70, because the condition on line 69 was never true

70 from .._registry import Registry 

71 

72 

73class RegistryTests(ABC): 

74 """Generic tests for the `Registry` class that can be subclassed to 

75 generate tests for different configurations. 

76 """ 

77 

78 collectionsManager: Optional[str] = None 

79 """Name of the collections manager class, if subclass provides value for 

80 this member then it overrides name specified in default configuration 

81 (`str`). 

82 """ 

83 

84 datasetsManager: Optional[str] = None 

85 """Name of the datasets manager class, if subclass provides value for 

86 this member then it overrides name specified in default configuration 

87 (`str`). 

88 """ 

89 

90 @classmethod 

91 @abstractmethod 

92 def getDataDir(cls) -> str: 

93 """Return the root directory containing test data YAML files. 

94 """ 

95 raise NotImplementedError() 

96 

97 def makeRegistryConfig(self) -> RegistryConfig: 

98 """Create RegistryConfig used to create a registry. 

99 

100 This method should be called by a subclass from `makeRegistry`. 

101 Returned instance will be pre-configured based on the values of class 

102 members, and default-configured for all other parametrs. Subclasses 

103 that need default configuration should just instantiate 

104 `RegistryConfig` directly. 

105 """ 

106 config = RegistryConfig() 

107 if self.collectionsManager: 

108 config["managers", "collections"] = self.collectionsManager 

109 if self.datasetsManager: 

110 config["managers", "datasets"] = self.datasetsManager 

111 return config 

112 

113 @abstractmethod 

114 def makeRegistry(self) -> Registry: 

115 """Return the Registry instance to be tested. 

116 """ 

117 raise NotImplementedError() 

118 

119 def loadData(self, registry: Registry, filename: str): 

120 """Load registry test data from ``getDataDir/<filename>``, 

121 which should be a YAML import/export file. 

122 """ 

123 from ...transfers import YamlRepoImportBackend 

124 with open(os.path.join(self.getDataDir(), filename), 'r') as stream: 

125 backend = YamlRepoImportBackend(stream, registry) 

126 backend.register() 

127 backend.load(datastore=None) 

128 

129 def checkQueryResults(self, results, expected): 

130 """Check that a query results object contains expected values. 

131 

132 Parameters 

133 ---------- 

134 results : `DataCoordinateQueryResults` or `DatasetQueryResults` 

135 A lazy-evaluation query results object. 

136 expected : `list` 

137 A list of `DataCoordinate` o `DatasetRef` objects that should be 

138 equal to results of the query, aside from ordering. 

139 """ 

140 self.assertCountEqual(list(results), expected) 

141 self.assertEqual(results.count(), len(expected)) 

142 if expected: 

143 self.assertTrue(results.any()) 

144 else: 

145 self.assertFalse(results.any()) 

146 

147 def testOpaque(self): 

148 """Tests for `Registry.registerOpaqueTable`, 

149 `Registry.insertOpaqueData`, `Registry.fetchOpaqueData`, and 

150 `Registry.deleteOpaqueData`. 

151 """ 

152 registry = self.makeRegistry() 

153 table = "opaque_table_for_testing" 

154 registry.registerOpaqueTable( 

155 table, 

156 spec=ddl.TableSpec( 

157 fields=[ 

158 ddl.FieldSpec("id", dtype=sqlalchemy.BigInteger, primaryKey=True), 

159 ddl.FieldSpec("name", dtype=sqlalchemy.String, length=16, nullable=False), 

160 ddl.FieldSpec("count", dtype=sqlalchemy.SmallInteger, nullable=True), 

161 ], 

162 ) 

163 ) 

164 rows = [ 

165 {"id": 1, "name": "one", "count": None}, 

166 {"id": 2, "name": "two", "count": 5}, 

167 {"id": 3, "name": "three", "count": 6}, 

168 ] 

169 registry.insertOpaqueData(table, *rows) 

170 self.assertCountEqual(rows, list(registry.fetchOpaqueData(table))) 

171 self.assertEqual(rows[0:1], list(registry.fetchOpaqueData(table, id=1))) 

172 self.assertEqual(rows[1:2], list(registry.fetchOpaqueData(table, name="two"))) 

173 self.assertEqual(rows[0:1], list(registry.fetchOpaqueData(table, id=(1, 3), name=("one", "two")))) 

174 self.assertEqual(rows, list(registry.fetchOpaqueData(table, id=(1, 2, 3)))) 

175 # Test very long IN clause which exceeds sqlite limit on number of 

176 # parameters. SQLite says the limit is 32k but it looks like it is 

177 # much higher. 

178 self.assertEqual(rows, list(registry.fetchOpaqueData(table, id=list(range(300_000))))) 

179 # Two IN clauses, each longer than 1k batch size, first with 

180 # duplicates, second has matching elements in different batches (after 

181 # sorting). 

182 self.assertEqual(rows[0:2], list(registry.fetchOpaqueData( 

183 table, 

184 id=list(range(1000)) + list(range(100, 0, -1)), 

185 name=["one"] + [f"q{i}" for i in range(2200)] + ["two"]))) 

186 self.assertEqual([], list(registry.fetchOpaqueData(table, id=1, name="two"))) 

187 registry.deleteOpaqueData(table, id=3) 

188 self.assertCountEqual(rows[:2], list(registry.fetchOpaqueData(table))) 

189 registry.deleteOpaqueData(table) 

190 self.assertEqual([], list(registry.fetchOpaqueData(table))) 

191 

192 def testDatasetType(self): 

193 """Tests for `Registry.registerDatasetType` and 

194 `Registry.getDatasetType`. 

195 """ 

196 registry = self.makeRegistry() 

197 # Check valid insert 

198 datasetTypeName = "test" 

199 storageClass = StorageClass("testDatasetType") 

200 registry.storageClasses.registerStorageClass(storageClass) 

201 dimensions = registry.dimensions.extract(("instrument", "visit")) 

202 differentDimensions = registry.dimensions.extract(("instrument", "patch")) 

203 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

204 # Inserting for the first time should return True 

205 self.assertTrue(registry.registerDatasetType(inDatasetType)) 

206 outDatasetType1 = registry.getDatasetType(datasetTypeName) 

207 self.assertEqual(outDatasetType1, inDatasetType) 

208 

209 # Re-inserting should work 

210 self.assertFalse(registry.registerDatasetType(inDatasetType)) 

211 # Except when they are not identical 

212 with self.assertRaises(ConflictingDefinitionError): 

213 nonIdenticalDatasetType = DatasetType(datasetTypeName, differentDimensions, storageClass) 

214 registry.registerDatasetType(nonIdenticalDatasetType) 

215 

216 # Template can be None 

217 datasetTypeName = "testNoneTemplate" 

218 storageClass = StorageClass("testDatasetType2") 

219 registry.storageClasses.registerStorageClass(storageClass) 

220 dimensions = registry.dimensions.extract(("instrument", "visit")) 

221 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

222 registry.registerDatasetType(inDatasetType) 

223 outDatasetType2 = registry.getDatasetType(datasetTypeName) 

224 self.assertEqual(outDatasetType2, inDatasetType) 

225 

226 allTypes = set(registry.queryDatasetTypes()) 

227 self.assertEqual(allTypes, {outDatasetType1, outDatasetType2}) 

228 

229 def testDimensions(self): 

230 """Tests for `Registry.insertDimensionData`, 

231 `Registry.syncDimensionData`, and `Registry.expandDataId`. 

232 """ 

233 registry = self.makeRegistry() 

234 dimensionName = "instrument" 

235 dimension = registry.dimensions[dimensionName] 

236 dimensionValue = {"name": "DummyCam", "visit_max": 10, "exposure_max": 10, "detector_max": 2, 

237 "class_name": "lsst.obs.base.Instrument"} 

238 registry.insertDimensionData(dimensionName, dimensionValue) 

239 # Inserting the same value twice should fail 

240 with self.assertRaises(sqlalchemy.exc.IntegrityError): 

241 registry.insertDimensionData(dimensionName, dimensionValue) 

242 # expandDataId should retrieve the record we just inserted 

243 self.assertEqual( 

244 registry.expandDataId( 

245 instrument="DummyCam", 

246 graph=dimension.graph 

247 ).records[dimensionName].toDict(), 

248 dimensionValue 

249 ) 

250 # expandDataId should raise if there is no record with the given ID. 

251 with self.assertRaises(LookupError): 

252 registry.expandDataId({"instrument": "Unknown"}, graph=dimension.graph) 

253 # band doesn't have a table; insert should fail. 

254 with self.assertRaises(TypeError): 

255 registry.insertDimensionData("band", {"band": "i"}) 

256 dimensionName2 = "physical_filter" 

257 dimension2 = registry.dimensions[dimensionName2] 

258 dimensionValue2 = {"name": "DummyCam_i", "band": "i"} 

259 # Missing required dependency ("instrument") should fail 

260 with self.assertRaises(KeyError): 

261 registry.insertDimensionData(dimensionName2, dimensionValue2) 

262 # Adding required dependency should fix the failure 

263 dimensionValue2["instrument"] = "DummyCam" 

264 registry.insertDimensionData(dimensionName2, dimensionValue2) 

265 # expandDataId should retrieve the record we just inserted. 

266 self.assertEqual( 

267 registry.expandDataId( 

268 instrument="DummyCam", physical_filter="DummyCam_i", 

269 graph=dimension2.graph 

270 ).records[dimensionName2].toDict(), 

271 dimensionValue2 

272 ) 

273 # Use syncDimensionData to insert a new record successfully. 

274 dimensionName3 = "detector" 

275 dimensionValue3 = {"instrument": "DummyCam", "id": 1, "full_name": "one", 

276 "name_in_raft": "zero", "purpose": "SCIENCE"} 

277 self.assertTrue(registry.syncDimensionData(dimensionName3, dimensionValue3)) 

278 # Sync that again. Note that one field ("raft") is NULL, and that 

279 # should be okay. 

280 self.assertFalse(registry.syncDimensionData(dimensionName3, dimensionValue3)) 

281 # Now try that sync with the same primary key but a different value. 

282 # This should fail. 

283 with self.assertRaises(ConflictingDefinitionError): 

284 registry.syncDimensionData( 

285 dimensionName3, 

286 {"instrument": "DummyCam", "id": 1, "full_name": "one", 

287 "name_in_raft": "four", "purpose": "SCIENCE"} 

288 ) 

289 

290 @unittest.skipIf(np is None, "numpy not available.") 

291 def testNumpyDataId(self): 

292 """Test that we can use a numpy int in a dataId.""" 

293 registry = self.makeRegistry() 

294 dimensionEntries = [ 

295 ("instrument", {"instrument": "DummyCam"}), 

296 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}), 

297 # Using an np.int64 here fails unless Records.fromDict is also 

298 # patched to look for numbers.Integral 

299 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}), 

300 ] 

301 for args in dimensionEntries: 

302 registry.insertDimensionData(*args) 

303 

304 # Try a normal integer and something that looks like an int but 

305 # is not. 

306 for visit_id in (42, np.int64(42)): 

307 with self.subTest(visit_id=visit_id, id_type=type(visit_id).__name__): 

308 expanded = registry.expandDataId({"instrument": "DummyCam", "visit": visit_id}) 

309 self.assertEqual(expanded["visit"], int(visit_id)) 

310 self.assertIsInstance(expanded["visit"], int) 

311 

312 def testDataIdRelationships(self): 

313 """Test that `Registry.expandDataId` raises an exception when the given 

314 keys are inconsistent. 

315 """ 

316 registry = self.makeRegistry() 

317 self.loadData(registry, "base.yaml") 

318 # Insert a few more dimension records for the next test. 

319 registry.insertDimensionData( 

320 "exposure", 

321 {"instrument": "Cam1", "id": 1, "obs_id": "one", "physical_filter": "Cam1-G"}, 

322 ) 

323 registry.insertDimensionData( 

324 "exposure", 

325 {"instrument": "Cam1", "id": 2, "obs_id": "two", "physical_filter": "Cam1-G"}, 

326 ) 

327 registry.insertDimensionData( 

328 "visit_system", 

329 {"instrument": "Cam1", "id": 0, "name": "one-to-one"}, 

330 ) 

331 registry.insertDimensionData( 

332 "visit", 

333 {"instrument": "Cam1", "id": 1, "name": "one", "physical_filter": "Cam1-G", "visit_system": 0}, 

334 ) 

335 registry.insertDimensionData( 

336 "visit_definition", 

337 {"instrument": "Cam1", "visit": 1, "exposure": 1, "visit_system": 0}, 

338 ) 

339 with self.assertRaises(InconsistentDataIdError): 

340 registry.expandDataId( 

341 {"instrument": "Cam1", "visit": 1, "exposure": 2}, 

342 ) 

343 

344 def testDataset(self): 

345 """Basic tests for `Registry.insertDatasets`, `Registry.getDataset`, 

346 and `Registry.removeDatasets`. 

347 """ 

348 registry = self.makeRegistry() 

349 self.loadData(registry, "base.yaml") 

350 run = "test" 

351 registry.registerRun(run) 

352 datasetType = registry.getDatasetType("bias") 

353 dataId = {"instrument": "Cam1", "detector": 2} 

354 ref, = registry.insertDatasets(datasetType, dataIds=[dataId], run=run) 

355 outRef = registry.getDataset(ref.id) 

356 self.assertIsNotNone(ref.id) 

357 self.assertEqual(ref, outRef) 

358 with self.assertRaises(ConflictingDefinitionError): 

359 registry.insertDatasets(datasetType, dataIds=[dataId], run=run) 

360 registry.removeDatasets([ref]) 

361 self.assertIsNone(registry.findDataset(datasetType, dataId, collections=[run])) 

362 

363 def testFindDataset(self): 

364 """Tests for `Registry.findDataset`. 

365 """ 

366 registry = self.makeRegistry() 

367 self.loadData(registry, "base.yaml") 

368 run = "test" 

369 datasetType = registry.getDatasetType("bias") 

370 dataId = {"instrument": "Cam1", "detector": 4} 

371 registry.registerRun(run) 

372 inputRef, = registry.insertDatasets(datasetType, dataIds=[dataId], run=run) 

373 outputRef = registry.findDataset(datasetType, dataId, collections=[run]) 

374 self.assertEqual(outputRef, inputRef) 

375 # Check that retrieval with invalid dataId raises 

376 with self.assertRaises(LookupError): 

377 dataId = {"instrument": "Cam1"} # no detector 

378 registry.findDataset(datasetType, dataId, collections=run) 

379 # Check that different dataIds match to different datasets 

380 dataId1 = {"instrument": "Cam1", "detector": 1} 

381 inputRef1, = registry.insertDatasets(datasetType, dataIds=[dataId1], run=run) 

382 dataId2 = {"instrument": "Cam1", "detector": 2} 

383 inputRef2, = registry.insertDatasets(datasetType, dataIds=[dataId2], run=run) 

384 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef1) 

385 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef2) 

386 self.assertNotEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef2) 

387 self.assertNotEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef1) 

388 # Check that requesting a non-existing dataId returns None 

389 nonExistingDataId = {"instrument": "Cam1", "detector": 3} 

390 self.assertIsNone(registry.findDataset(datasetType, nonExistingDataId, collections=run)) 

391 

392 def testRemoveDatasetTypeSuccess(self): 

393 """Test that Registry.removeDatasetType works when there are no 

394 datasets of that type present. 

395 """ 

396 registry = self.makeRegistry() 

397 self.loadData(registry, "base.yaml") 

398 registry.removeDatasetType("flat") 

399 with self.assertRaises(KeyError): 

400 registry.getDatasetType("flat") 

401 

402 def testRemoveDatasetTypeFailure(self): 

403 """Test that Registry.removeDatasetType raises when there are datasets 

404 of that type present or if the dataset type is for a component. 

405 """ 

406 registry = self.makeRegistry() 

407 self.loadData(registry, "base.yaml") 

408 self.loadData(registry, "datasets.yaml") 

409 with self.assertRaises(OrphanedRecordError): 

410 registry.removeDatasetType("flat") 

411 with self.assertRaises(ValueError): 

412 registry.removeDatasetType(DatasetType.nameWithComponent("flat", "image")) 

413 

414 def testDatasetTypeComponentQueries(self): 

415 """Test component options when querying for dataset types. 

416 """ 

417 registry = self.makeRegistry() 

418 self.loadData(registry, "base.yaml") 

419 self.loadData(registry, "datasets.yaml") 

420 # Test querying for dataset types with different inputs. 

421 # First query for all dataset types; components should only be included 

422 # when components=True. 

423 self.assertEqual( 

424 {"bias", "flat"}, 

425 NamedValueSet(registry.queryDatasetTypes()).names 

426 ) 

427 self.assertEqual( 

428 {"bias", "flat"}, 

429 NamedValueSet(registry.queryDatasetTypes(components=False)).names 

430 ) 

431 self.assertLess( 

432 {"bias", "flat", "bias.wcs", "flat.photoCalib"}, 

433 NamedValueSet(registry.queryDatasetTypes(components=True)).names 

434 ) 

435 # Use a pattern that can match either parent or components. Again, 

436 # components are only returned if components=True. 

437 self.assertEqual( 

438 {"bias"}, 

439 NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"))).names 

440 ) 

441 self.assertEqual( 

442 {"bias"}, 

443 NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"), components=False)).names 

444 ) 

445 self.assertLess( 

446 {"bias", "bias.wcs"}, 

447 NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"), components=True)).names 

448 ) 

449 # This pattern matches only a component. In this case we also return 

450 # that component dataset type if components=None. 

451 self.assertEqual( 

452 {"bias.wcs"}, 

453 NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"))).names 

454 ) 

455 self.assertEqual( 

456 set(), 

457 NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"), components=False)).names 

458 ) 

459 self.assertEqual( 

460 {"bias.wcs"}, 

461 NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"), components=True)).names 

462 ) 

463 # Add a dataset type using a StorageClass that we'll then remove; check 

464 # that this does not affect our ability to query for dataset types 

465 # (though it will warn). 

466 tempStorageClass = StorageClass( 

467 name="TempStorageClass", 

468 components={"data", registry.storageClasses.getStorageClass("StructuredDataDict")} 

469 ) 

470 registry.storageClasses.registerStorageClass(tempStorageClass) 

471 datasetType = DatasetType("temporary", dimensions=["instrument"], storageClass=tempStorageClass, 

472 universe=registry.dimensions) 

473 registry.registerDatasetType(datasetType) 

474 registry.storageClasses._unregisterStorageClass(tempStorageClass.name) 

475 datasetType._storageClass = None 

476 del tempStorageClass 

477 # Querying for all dataset types, including components, should include 

478 # at least all non-component dataset types (and I don't want to 

479 # enumerate all of the Exposure components for bias and flat here). 

480 with self.assertLogs("lsst.daf.butler.registries", logging.WARN) as cm: 

481 everything = NamedValueSet(registry.queryDatasetTypes(components=True)) 

482 self.assertIn("TempStorageClass", cm.output[0]) 

483 self.assertLess({"bias", "flat", "temporary"}, everything.names) 

484 # It should not include "temporary.columns", because we tried to remove 

485 # the storage class that would tell it about that. So if the next line 

486 # fails (i.e. "temporary.columns" _is_ in everything.names), it means 

487 # this part of the test isn't doing anything, because the _unregister 

488 # call about isn't simulating the real-life case we want it to 

489 # simulate, in which different versions of daf_butler in entirely 

490 # different Python processes interact with the same repo. 

491 self.assertNotIn("temporary.data", everything.names) 

492 # Query for dataset types that start with "temp". This should again 

493 # not include the component, and also not fail. 

494 with self.assertLogs("lsst.daf.butler.registries", logging.WARN) as cm: 

495 startsWithTemp = NamedValueSet(registry.queryDatasetTypes(re.compile("temp.*"))) 

496 self.assertIn("TempStorageClass", cm.output[0]) 

497 self.assertEqual({"temporary"}, startsWithTemp.names) 

498 

499 def testComponentLookups(self): 

500 """Test searching for component datasets via their parents. 

501 """ 

502 registry = self.makeRegistry() 

503 self.loadData(registry, "base.yaml") 

504 self.loadData(registry, "datasets.yaml") 

505 # Test getting the child dataset type (which does still exist in the 

506 # Registry), and check for consistency with 

507 # DatasetRef.makeComponentRef. 

508 collection = "imported_g" 

509 parentType = registry.getDatasetType("bias") 

510 childType = registry.getDatasetType("bias.wcs") 

511 parentRefResolved = registry.findDataset(parentType, collections=collection, 

512 instrument="Cam1", detector=1) 

513 self.assertIsInstance(parentRefResolved, DatasetRef) 

514 self.assertEqual(childType, parentRefResolved.makeComponentRef("wcs").datasetType) 

515 # Search for a single dataset with findDataset. 

516 childRef1 = registry.findDataset("bias.wcs", collections=collection, 

517 dataId=parentRefResolved.dataId) 

518 self.assertEqual(childRef1, parentRefResolved.makeComponentRef("wcs")) 

519 # Search for detector data IDs constrained by component dataset 

520 # existence with queryDataIds. 

521 dataIds = registry.queryDataIds( 

522 ["detector"], 

523 datasets=["bias.wcs"], 

524 collections=collection, 

525 ).toSet() 

526 self.assertEqual( 

527 dataIds, 

528 DataCoordinateSet( 

529 { 

530 DataCoordinate.standardize(instrument="Cam1", detector=d, graph=parentType.dimensions) 

531 for d in (1, 2, 3) 

532 }, 

533 parentType.dimensions, 

534 ) 

535 ) 

536 # Search for multiple datasets of a single type with queryDatasets. 

537 childRefs2 = set(registry.queryDatasets( 

538 "bias.wcs", 

539 collections=collection, 

540 )) 

541 self.assertEqual( 

542 {ref.unresolved() for ref in childRefs2}, 

543 {DatasetRef(childType, dataId) for dataId in dataIds} 

544 ) 

545 

546 def testCollections(self): 

547 """Tests for registry methods that manage collections. 

548 """ 

549 registry = self.makeRegistry() 

550 self.loadData(registry, "base.yaml") 

551 self.loadData(registry, "datasets.yaml") 

552 run1 = "imported_g" 

553 run2 = "imported_r" 

554 # Test setting a collection docstring after it has been created. 

555 registry.setCollectionDocumentation(run1, "doc for run1") 

556 self.assertEqual(registry.getCollectionDocumentation(run1), "doc for run1") 

557 registry.setCollectionDocumentation(run1, None) 

558 self.assertIsNone(registry.getCollectionDocumentation(run1)) 

559 datasetType = "bias" 

560 # Find some datasets via their run's collection. 

561 dataId1 = {"instrument": "Cam1", "detector": 1} 

562 ref1 = registry.findDataset(datasetType, dataId1, collections=run1) 

563 self.assertIsNotNone(ref1) 

564 dataId2 = {"instrument": "Cam1", "detector": 2} 

565 ref2 = registry.findDataset(datasetType, dataId2, collections=run1) 

566 self.assertIsNotNone(ref2) 

567 # Associate those into a new collection,then look for them there. 

568 tag1 = "tag1" 

569 registry.registerCollection(tag1, type=CollectionType.TAGGED, doc="doc for tag1") 

570 self.assertEqual(registry.getCollectionDocumentation(tag1), "doc for tag1") 

571 registry.associate(tag1, [ref1, ref2]) 

572 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1) 

573 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

574 # Disassociate one and verify that we can't it there anymore... 

575 registry.disassociate(tag1, [ref1]) 

576 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=tag1)) 

577 # ...but we can still find ref2 in tag1, and ref1 in the run. 

578 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run1), ref1) 

579 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

580 collections = set(registry.queryCollections()) 

581 self.assertEqual(collections, {run1, run2, tag1}) 

582 # Associate both refs into tag1 again; ref2 is already there, but that 

583 # should be a harmless no-op. 

584 registry.associate(tag1, [ref1, ref2]) 

585 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1) 

586 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

587 # Get a different dataset (from a different run) that has the same 

588 # dataset type and data ID as ref2. 

589 ref2b = registry.findDataset(datasetType, dataId2, collections=run2) 

590 self.assertNotEqual(ref2, ref2b) 

591 # Attempting to associate that into tag1 should be an error. 

592 with self.assertRaises(ConflictingDefinitionError): 

593 registry.associate(tag1, [ref2b]) 

594 # That error shouldn't have messed up what we had before. 

595 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1) 

596 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

597 # Attempt to associate the conflicting dataset again, this time with 

598 # a dataset that isn't in the collection and won't cause a conflict. 

599 # Should also fail without modifying anything. 

600 dataId3 = {"instrument": "Cam1", "detector": 3} 

601 ref3 = registry.findDataset(datasetType, dataId3, collections=run1) 

602 with self.assertRaises(ConflictingDefinitionError): 

603 registry.associate(tag1, [ref3, ref2b]) 

604 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1) 

605 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

606 self.assertIsNone(registry.findDataset(datasetType, dataId3, collections=tag1)) 

607 # Register a chained collection that searches [tag1, run2] 

608 chain1 = "chain1" 

609 registry.registerCollection(chain1, type=CollectionType.CHAINED) 

610 self.assertIs(registry.getCollectionType(chain1), CollectionType.CHAINED) 

611 # Chained collection exists, but has no collections in it. 

612 self.assertFalse(registry.getCollectionChain(chain1)) 

613 # If we query for all collections, we should get the chained collection 

614 # only if we don't ask to flatten it (i.e. yield only its children). 

615 self.assertEqual(set(registry.queryCollections(flattenChains=False)), {tag1, run1, run2, chain1}) 

616 self.assertEqual(set(registry.queryCollections(flattenChains=True)), {tag1, run1, run2}) 

617 # Attempt to set its child collections to something circular; that 

618 # should fail. 

619 with self.assertRaises(ValueError): 

620 registry.setCollectionChain(chain1, [tag1, chain1]) 

621 # Add the child collections. 

622 registry.setCollectionChain(chain1, [tag1, run2]) 

623 self.assertEqual( 

624 list(registry.getCollectionChain(chain1)), 

625 [tag1, run2] 

626 ) 

627 # Searching for dataId1 or dataId2 in the chain should return ref1 and 

628 # ref2, because both are in tag1. 

629 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain1), ref1) 

630 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain1), ref2) 

631 # Now disassociate ref2 from tag1. The search (for bias) with 

632 # dataId2 in chain1 should then: 

633 # 1. not find it in tag1 

634 # 2. find a different dataset in run2 

635 registry.disassociate(tag1, [ref2]) 

636 ref2b = registry.findDataset(datasetType, dataId2, collections=chain1) 

637 self.assertNotEqual(ref2b, ref2) 

638 self.assertEqual(ref2b, registry.findDataset(datasetType, dataId2, collections=run2)) 

639 # Define a new chain so we can test recursive chains. 

640 chain2 = "chain2" 

641 registry.registerCollection(chain2, type=CollectionType.CHAINED) 

642 registry.setCollectionChain(chain2, [run2, chain1]) 

643 # Query for collections matching a regex. 

644 self.assertCountEqual( 

645 list(registry.queryCollections(re.compile("imported_."), flattenChains=False)), 

646 ["imported_r", "imported_g"] 

647 ) 

648 # Query for collections matching a regex or an explicit str. 

649 self.assertCountEqual( 

650 list(registry.queryCollections([re.compile("imported_."), "chain1"], flattenChains=False)), 

651 ["imported_r", "imported_g", "chain1"] 

652 ) 

653 # Search for bias with dataId1 should find it via tag1 in chain2, 

654 # recursing, because is not in run1. 

655 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=run2)) 

656 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain2), ref1) 

657 # Search for bias with dataId2 should find it in run2 (ref2b). 

658 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain2), ref2b) 

659 # Search for a flat that is in run2. That should not be found 

660 # at the front of chain2, because of the restriction to bias 

661 # on run2 there, but it should be found in at the end of chain1. 

662 dataId4 = {"instrument": "Cam1", "detector": 3, "physical_filter": "Cam1-R2"} 

663 ref4 = registry.findDataset("flat", dataId4, collections=run2) 

664 self.assertIsNotNone(ref4) 

665 self.assertEqual(ref4, registry.findDataset("flat", dataId4, collections=chain2)) 

666 # Deleting a collection that's part of a CHAINED collection is not 

667 # allowed, and is exception-safe. 

668 with self.assertRaises(Exception): 

669 registry.removeCollection(run2) 

670 self.assertEqual(registry.getCollectionType(run2), CollectionType.RUN) 

671 with self.assertRaises(Exception): 

672 registry.removeCollection(chain1) 

673 self.assertEqual(registry.getCollectionType(chain1), CollectionType.CHAINED) 

674 # Actually remove chain2, test that it's gone by asking for its type. 

675 registry.removeCollection(chain2) 

676 with self.assertRaises(MissingCollectionError): 

677 registry.getCollectionType(chain2) 

678 # Actually remove run2 and chain1, which should work now. 

679 registry.removeCollection(chain1) 

680 registry.removeCollection(run2) 

681 with self.assertRaises(MissingCollectionError): 

682 registry.getCollectionType(run2) 

683 with self.assertRaises(MissingCollectionError): 

684 registry.getCollectionType(chain1) 

685 # Remove tag1 as well, just to test that we can remove TAGGED 

686 # collections. 

687 registry.removeCollection(tag1) 

688 with self.assertRaises(MissingCollectionError): 

689 registry.getCollectionType(tag1) 

690 

691 def testCollectionChainFlatten(self): 

692 """Test that Registry.setCollectionChain obeys its 'flatten' option. 

693 """ 

694 registry = self.makeRegistry() 

695 registry.registerCollection("inner", CollectionType.CHAINED) 

696 registry.registerCollection("innermost", CollectionType.RUN) 

697 registry.setCollectionChain("inner", ["innermost"]) 

698 registry.registerCollection("outer", CollectionType.CHAINED) 

699 registry.setCollectionChain("outer", ["inner"], flatten=False) 

700 self.assertEqual(list(registry.getCollectionChain("outer")), ["inner"]) 

701 registry.setCollectionChain("outer", ["inner"], flatten=True) 

702 self.assertEqual(list(registry.getCollectionChain("outer")), ["innermost"]) 

703 

704 def testBasicTransaction(self): 

705 """Test that all operations within a single transaction block are 

706 rolled back if an exception propagates out of the block. 

707 """ 

708 registry = self.makeRegistry() 

709 storageClass = StorageClass("testDatasetType") 

710 registry.storageClasses.registerStorageClass(storageClass) 

711 with registry.transaction(): 

712 registry.insertDimensionData("instrument", {"name": "Cam1", "class_name": "A"}) 

713 with self.assertRaises(ValueError): 

714 with registry.transaction(): 

715 registry.insertDimensionData("instrument", {"name": "Cam2"}) 

716 raise ValueError("Oops, something went wrong") 

717 # Cam1 should exist 

718 self.assertEqual(registry.expandDataId(instrument="Cam1").records["instrument"].class_name, "A") 

719 # But Cam2 and Cam3 should both not exist 

720 with self.assertRaises(LookupError): 

721 registry.expandDataId(instrument="Cam2") 

722 with self.assertRaises(LookupError): 

723 registry.expandDataId(instrument="Cam3") 

724 

725 def testNestedTransaction(self): 

726 """Test that operations within a transaction block are not rolled back 

727 if an exception propagates out of an inner transaction block and is 

728 then caught. 

729 """ 

730 registry = self.makeRegistry() 

731 dimension = registry.dimensions["instrument"] 

732 dataId1 = {"instrument": "DummyCam"} 

733 dataId2 = {"instrument": "DummyCam2"} 

734 checkpointReached = False 

735 with registry.transaction(): 

736 # This should be added and (ultimately) committed. 

737 registry.insertDimensionData(dimension, dataId1) 

738 with self.assertRaises(sqlalchemy.exc.IntegrityError): 

739 with registry.transaction(savepoint=True): 

740 # This does not conflict, and should succeed (but not 

741 # be committed). 

742 registry.insertDimensionData(dimension, dataId2) 

743 checkpointReached = True 

744 # This should conflict and raise, triggerring a rollback 

745 # of the previous insertion within the same transaction 

746 # context, but not the original insertion in the outer 

747 # block. 

748 registry.insertDimensionData(dimension, dataId1) 

749 self.assertTrue(checkpointReached) 

750 self.assertIsNotNone(registry.expandDataId(dataId1, graph=dimension.graph)) 

751 with self.assertRaises(LookupError): 

752 registry.expandDataId(dataId2, graph=dimension.graph) 

753 

754 def testInstrumentDimensions(self): 

755 """Test queries involving only instrument dimensions, with no joins to 

756 skymap.""" 

757 registry = self.makeRegistry() 

758 

759 # need a bunch of dimensions and datasets for test 

760 registry.insertDimensionData( 

761 "instrument", 

762 dict(name="DummyCam", visit_max=25, exposure_max=300, detector_max=6) 

763 ) 

764 registry.insertDimensionData( 

765 "physical_filter", 

766 dict(instrument="DummyCam", name="dummy_r", band="r"), 

767 dict(instrument="DummyCam", name="dummy_i", band="i"), 

768 ) 

769 registry.insertDimensionData( 

770 "detector", 

771 *[dict(instrument="DummyCam", id=i, full_name=str(i)) for i in range(1, 6)] 

772 ) 

773 registry.insertDimensionData( 

774 "visit_system", 

775 dict(instrument="DummyCam", id=1, name="default"), 

776 ) 

777 registry.insertDimensionData( 

778 "visit", 

779 dict(instrument="DummyCam", id=10, name="ten", physical_filter="dummy_i", visit_system=1), 

780 dict(instrument="DummyCam", id=11, name="eleven", physical_filter="dummy_r", visit_system=1), 

781 dict(instrument="DummyCam", id=20, name="twelve", physical_filter="dummy_r", visit_system=1), 

782 ) 

783 registry.insertDimensionData( 

784 "exposure", 

785 dict(instrument="DummyCam", id=100, obs_id="100", physical_filter="dummy_i"), 

786 dict(instrument="DummyCam", id=101, obs_id="101", physical_filter="dummy_i"), 

787 dict(instrument="DummyCam", id=110, obs_id="110", physical_filter="dummy_r"), 

788 dict(instrument="DummyCam", id=111, obs_id="111", physical_filter="dummy_r"), 

789 dict(instrument="DummyCam", id=200, obs_id="200", physical_filter="dummy_r"), 

790 dict(instrument="DummyCam", id=201, obs_id="201", physical_filter="dummy_r"), 

791 ) 

792 registry.insertDimensionData( 

793 "visit_definition", 

794 dict(instrument="DummyCam", exposure=100, visit_system=1, visit=10), 

795 dict(instrument="DummyCam", exposure=101, visit_system=1, visit=10), 

796 dict(instrument="DummyCam", exposure=110, visit_system=1, visit=11), 

797 dict(instrument="DummyCam", exposure=111, visit_system=1, visit=11), 

798 dict(instrument="DummyCam", exposure=200, visit_system=1, visit=20), 

799 dict(instrument="DummyCam", exposure=201, visit_system=1, visit=20), 

800 ) 

801 # dataset types 

802 run1 = "test1_r" 

803 run2 = "test2_r" 

804 tagged2 = "test2_t" 

805 registry.registerRun(run1) 

806 registry.registerRun(run2) 

807 registry.registerCollection(tagged2) 

808 storageClass = StorageClass("testDataset") 

809 registry.storageClasses.registerStorageClass(storageClass) 

810 rawType = DatasetType(name="RAW", 

811 dimensions=registry.dimensions.extract(("instrument", "exposure", "detector")), 

812 storageClass=storageClass) 

813 registry.registerDatasetType(rawType) 

814 calexpType = DatasetType(name="CALEXP", 

815 dimensions=registry.dimensions.extract(("instrument", "visit", "detector")), 

816 storageClass=storageClass) 

817 registry.registerDatasetType(calexpType) 

818 

819 # add pre-existing datasets 

820 for exposure in (100, 101, 110, 111): 

821 for detector in (1, 2, 3): 

822 # note that only 3 of 5 detectors have datasets 

823 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector) 

824 ref, = registry.insertDatasets(rawType, dataIds=[dataId], run=run1) 

825 # exposures 100 and 101 appear in both run1 and tagged2. 

826 # 100 has different datasets in the different collections 

827 # 101 has the same dataset in both collections. 

828 if exposure == 100: 

829 ref, = registry.insertDatasets(rawType, dataIds=[dataId], run=run2) 

830 if exposure in (100, 101): 

831 registry.associate(tagged2, [ref]) 

832 # Add pre-existing datasets to tagged2. 

833 for exposure in (200, 201): 

834 for detector in (3, 4, 5): 

835 # note that only 3 of 5 detectors have datasets 

836 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector) 

837 ref, = registry.insertDatasets(rawType, dataIds=[dataId], run=run2) 

838 registry.associate(tagged2, [ref]) 

839 

840 dimensions = DimensionGraph( 

841 registry.dimensions, 

842 dimensions=(rawType.dimensions.required | calexpType.dimensions.required) 

843 ) 

844 # Test that single dim string works as well as list of str 

845 rows = registry.queryDataIds("visit", datasets=rawType, collections=run1).expanded().toSet() 

846 rowsI = registry.queryDataIds(["visit"], datasets=rawType, collections=run1).expanded().toSet() 

847 self.assertEqual(rows, rowsI) 

848 # with empty expression 

849 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1).expanded().toSet() 

850 self.assertEqual(len(rows), 4*3) # 4 exposures times 3 detectors 

851 for dataId in rows: 

852 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit")) 

853 packer1 = registry.dimensions.makePacker("visit_detector", dataId) 

854 packer2 = registry.dimensions.makePacker("exposure_detector", dataId) 

855 self.assertEqual(packer1.unpack(packer1.pack(dataId)), 

856 DataCoordinate.standardize(dataId, graph=packer1.dimensions)) 

857 self.assertEqual(packer2.unpack(packer2.pack(dataId)), 

858 DataCoordinate.standardize(dataId, graph=packer2.dimensions)) 

859 self.assertNotEqual(packer1.pack(dataId), packer2.pack(dataId)) 

860 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), 

861 (100, 101, 110, 111)) 

862 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 11)) 

863 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3)) 

864 

865 # second collection 

866 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=tagged2).toSet() 

867 self.assertEqual(len(rows), 4*3) # 4 exposures times 3 detectors 

868 for dataId in rows: 

869 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit")) 

870 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), 

871 (100, 101, 200, 201)) 

872 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 20)) 

873 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3, 4, 5)) 

874 

875 # with two input datasets 

876 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=[run1, tagged2]).toSet() 

877 self.assertEqual(len(set(rows)), 6*3) # 6 exposures times 3 detectors; set needed to de-dupe 

878 for dataId in rows: 

879 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit")) 

880 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), 

881 (100, 101, 110, 111, 200, 201)) 

882 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 11, 20)) 

883 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3, 4, 5)) 

884 

885 # limit to single visit 

886 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1, 

887 where="visit = 10", instrument="DummyCam").toSet() 

888 self.assertEqual(len(rows), 2*3) # 2 exposures times 3 detectors 

889 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101)) 

890 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10,)) 

891 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3)) 

892 

893 # more limiting expression, using link names instead of Table.column 

894 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1, 

895 where="visit = 10 and detector > 1 and 'DummyCam'=instrument").toSet() 

896 self.assertEqual(len(rows), 2*2) # 2 exposures times 2 detectors 

897 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101)) 

898 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10,)) 

899 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (2, 3)) 

900 

901 # queryDataIds with only one of `datasets` and `collections` is an 

902 # error. 

903 with self.assertRaises(TypeError): 

904 registry.queryDataIds(dimensions, datasets=rawType) 

905 with self.assertRaises(TypeError): 

906 registry.queryDataIds(dimensions, collections=run1) 

907 

908 # expression excludes everything 

909 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1, 

910 where="visit > 1000", instrument="DummyCam").toSet() 

911 self.assertEqual(len(rows), 0) 

912 

913 # Selecting by physical_filter, this is not in the dimensions, but it 

914 # is a part of the full expression so it should work too. 

915 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1, 

916 where="physical_filter = 'dummy_r'", instrument="DummyCam").toSet() 

917 self.assertEqual(len(rows), 2*3) # 2 exposures times 3 detectors 

918 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (110, 111)) 

919 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (11,)) 

920 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3)) 

921 

922 def testSkyMapDimensions(self): 

923 """Tests involving only skymap dimensions, no joins to instrument.""" 

924 registry = self.makeRegistry() 

925 

926 # need a bunch of dimensions and datasets for test, we want 

927 # "band" in the test so also have to add physical_filter 

928 # dimensions 

929 registry.insertDimensionData( 

930 "instrument", 

931 dict(instrument="DummyCam") 

932 ) 

933 registry.insertDimensionData( 

934 "physical_filter", 

935 dict(instrument="DummyCam", name="dummy_r", band="r"), 

936 dict(instrument="DummyCam", name="dummy_i", band="i"), 

937 ) 

938 registry.insertDimensionData( 

939 "skymap", 

940 dict(name="DummyMap", hash="sha!".encode("utf8")) 

941 ) 

942 for tract in range(10): 

943 registry.insertDimensionData("tract", dict(skymap="DummyMap", id=tract)) 

944 registry.insertDimensionData( 

945 "patch", 

946 *[dict(skymap="DummyMap", tract=tract, id=patch, cell_x=0, cell_y=0) 

947 for patch in range(10)] 

948 ) 

949 

950 # dataset types 

951 run = "test" 

952 registry.registerRun(run) 

953 storageClass = StorageClass("testDataset") 

954 registry.storageClasses.registerStorageClass(storageClass) 

955 calexpType = DatasetType(name="deepCoadd_calexp", 

956 dimensions=registry.dimensions.extract(("skymap", "tract", "patch", 

957 "band")), 

958 storageClass=storageClass) 

959 registry.registerDatasetType(calexpType) 

960 mergeType = DatasetType(name="deepCoadd_mergeDet", 

961 dimensions=registry.dimensions.extract(("skymap", "tract", "patch")), 

962 storageClass=storageClass) 

963 registry.registerDatasetType(mergeType) 

964 measType = DatasetType(name="deepCoadd_meas", 

965 dimensions=registry.dimensions.extract(("skymap", "tract", "patch", 

966 "band")), 

967 storageClass=storageClass) 

968 registry.registerDatasetType(measType) 

969 

970 dimensions = DimensionGraph( 

971 registry.dimensions, 

972 dimensions=(calexpType.dimensions.required | mergeType.dimensions.required 

973 | measType.dimensions.required) 

974 ) 

975 

976 # add pre-existing datasets 

977 for tract in (1, 3, 5): 

978 for patch in (2, 4, 6, 7): 

979 dataId = dict(skymap="DummyMap", tract=tract, patch=patch) 

980 registry.insertDatasets(mergeType, dataIds=[dataId], run=run) 

981 for aFilter in ("i", "r"): 

982 dataId = dict(skymap="DummyMap", tract=tract, patch=patch, band=aFilter) 

983 registry.insertDatasets(calexpType, dataIds=[dataId], run=run) 

984 

985 # with empty expression 

986 rows = registry.queryDataIds(dimensions, 

987 datasets=[calexpType, mergeType], collections=run).toSet() 

988 self.assertEqual(len(rows), 3*4*2) # 4 tracts x 4 patches x 2 filters 

989 for dataId in rows: 

990 self.assertCountEqual(dataId.keys(), ("skymap", "tract", "patch", "band")) 

991 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 3, 5)) 

992 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 4, 6, 7)) 

993 self.assertCountEqual(set(dataId["band"] for dataId in rows), ("i", "r")) 

994 

995 # limit to 2 tracts and 2 patches 

996 rows = registry.queryDataIds(dimensions, 

997 datasets=[calexpType, mergeType], collections=run, 

998 where="tract IN (1, 5) AND patch IN (2, 7)", skymap="DummyMap").toSet() 

999 self.assertEqual(len(rows), 2*2*2) # 2 tracts x 2 patches x 2 filters 

1000 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 5)) 

1001 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 7)) 

1002 self.assertCountEqual(set(dataId["band"] for dataId in rows), ("i", "r")) 

1003 

1004 # limit to single filter 

1005 rows = registry.queryDataIds(dimensions, 

1006 datasets=[calexpType, mergeType], collections=run, 

1007 where="band = 'i'").toSet() 

1008 self.assertEqual(len(rows), 3*4*1) # 4 tracts x 4 patches x 2 filters 

1009 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 3, 5)) 

1010 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 4, 6, 7)) 

1011 self.assertCountEqual(set(dataId["band"] for dataId in rows), ("i",)) 

1012 

1013 # expression excludes everything, specifying non-existing skymap is 

1014 # not a fatal error, it's operator error 

1015 rows = registry.queryDataIds(dimensions, 

1016 datasets=[calexpType, mergeType], collections=run, 

1017 where="skymap = 'Mars'").toSet() 

1018 self.assertEqual(len(rows), 0) 

1019 

1020 def testSpatialJoin(self): 

1021 """Test queries that involve spatial overlap joins. 

1022 """ 

1023 registry = self.makeRegistry() 

1024 self.loadData(registry, "hsc-rc2-subset.yaml") 

1025 

1026 # Dictionary of spatial DatabaseDimensionElements, keyed by the name of 

1027 # the TopologicalFamily they belong to. We'll relate all elements in 

1028 # each family to all of the elements in each other family. 

1029 families = defaultdict(set) 

1030 # Dictionary of {element.name: {dataId: region}}. 

1031 regions = {} 

1032 for element in registry.dimensions.getDatabaseElements(): 

1033 if element.spatial is not None: 

1034 families[element.spatial.name].add(element) 

1035 regions[element.name] = { 

1036 record.dataId: record.region for record in registry.queryDimensionRecords(element) 

1037 } 

1038 

1039 # If this check fails, it's not necessarily a problem - it may just be 

1040 # a reasonable change to the default dimension definitions - but the 

1041 # test below depends on there being more than one family to do anything 

1042 # useful. 

1043 self.assertEqual(len(families), 2) 

1044 

1045 # Overlap DatabaseDimensionElements with each other. 

1046 for family1, family2 in itertools.combinations(families, 2): 

1047 for element1, element2 in itertools.product(families[family1], families[family2]): 

1048 graph = DimensionGraph.union(element1.graph, element2.graph) 

1049 # Construct expected set of overlapping data IDs via a 

1050 # brute-force comparison of the regions we've already fetched. 

1051 expected = { 

1052 DataCoordinate.standardize( 

1053 {**dataId1.byName(), **dataId2.byName()}, 

1054 graph=graph 

1055 ) 

1056 for (dataId1, region1), (dataId2, region2) 

1057 in itertools.product(regions[element1.name].items(), regions[element2.name].items()) 

1058 if not region1.isDisjointFrom(region2) 

1059 } 

1060 self.assertGreater(len(expected), 2, msg="Test that we aren't just comparing empty sets.") 

1061 queried = set(registry.queryDataIds(graph)) 

1062 self.assertEqual(expected, queried) 

1063 

1064 # Overlap each DatabaseDimensionElement with the commonSkyPix system. 

1065 commonSkyPix = registry.dimensions.commonSkyPix 

1066 for elementName, regions in regions.items(): 

1067 graph = DimensionGraph.union(registry.dimensions[elementName].graph, commonSkyPix.graph) 

1068 expected = set() 

1069 for dataId, region in regions.items(): 

1070 for begin, end in commonSkyPix.pixelization.envelope(region): 

1071 expected.update( 

1072 DataCoordinate.standardize( 

1073 {commonSkyPix.name: index, **dataId.byName()}, 

1074 graph=graph 

1075 ) 

1076 for index in range(begin, end) 

1077 ) 

1078 self.assertGreater(len(expected), 2, msg="Test that we aren't just comparing empty sets.") 

1079 queried = set(registry.queryDataIds(graph)) 

1080 self.assertEqual(expected, queried) 

1081 

1082 def testAbstractQuery(self): 

1083 """Test that we can run a query that just lists the known 

1084 bands. This is tricky because band is 

1085 backed by a query against physical_filter. 

1086 """ 

1087 registry = self.makeRegistry() 

1088 registry.insertDimensionData("instrument", dict(name="DummyCam")) 

1089 registry.insertDimensionData( 

1090 "physical_filter", 

1091 dict(instrument="DummyCam", name="dummy_i", band="i"), 

1092 dict(instrument="DummyCam", name="dummy_i2", band="i"), 

1093 dict(instrument="DummyCam", name="dummy_r", band="r"), 

1094 ) 

1095 rows = registry.queryDataIds(["band"]).toSet() 

1096 self.assertCountEqual( 

1097 rows, 

1098 [DataCoordinate.standardize(band="i", universe=registry.dimensions), 

1099 DataCoordinate.standardize(band="r", universe=registry.dimensions)] 

1100 ) 

1101 

1102 def testAttributeManager(self): 

1103 """Test basic functionality of attribute manager. 

1104 """ 

1105 # number of attributes with schema versions in a fresh database, 

1106 # 6 managers with 3 records per manager, plus config for dimensions 

1107 VERSION_COUNT = 6 * 3 + 1 

1108 

1109 registry = self.makeRegistry() 

1110 attributes = registry._managers.attributes 

1111 

1112 # check what get() returns for non-existing key 

1113 self.assertIsNone(attributes.get("attr")) 

1114 self.assertEqual(attributes.get("attr", ""), "") 

1115 self.assertEqual(attributes.get("attr", "Value"), "Value") 

1116 self.assertEqual(len(list(attributes.items())), VERSION_COUNT) 

1117 

1118 # cannot store empty key or value 

1119 with self.assertRaises(ValueError): 

1120 attributes.set("", "value") 

1121 with self.assertRaises(ValueError): 

1122 attributes.set("attr", "") 

1123 

1124 # set value of non-existing key 

1125 attributes.set("attr", "value") 

1126 self.assertEqual(len(list(attributes.items())), VERSION_COUNT + 1) 

1127 self.assertEqual(attributes.get("attr"), "value") 

1128 

1129 # update value of existing key 

1130 with self.assertRaises(ButlerAttributeExistsError): 

1131 attributes.set("attr", "value2") 

1132 

1133 attributes.set("attr", "value2", force=True) 

1134 self.assertEqual(len(list(attributes.items())), VERSION_COUNT + 1) 

1135 self.assertEqual(attributes.get("attr"), "value2") 

1136 

1137 # delete existing key 

1138 self.assertTrue(attributes.delete("attr")) 

1139 self.assertEqual(len(list(attributes.items())), VERSION_COUNT) 

1140 

1141 # delete non-existing key 

1142 self.assertFalse(attributes.delete("non-attr")) 

1143 

1144 # store bunch of keys and get the list back 

1145 data = [ 

1146 ("version.core", "1.2.3"), 

1147 ("version.dimensions", "3.2.1"), 

1148 ("config.managers.opaque", "ByNameOpaqueTableStorageManager"), 

1149 ] 

1150 for key, value in data: 

1151 attributes.set(key, value) 

1152 items = dict(attributes.items()) 

1153 for key, value in data: 

1154 self.assertEqual(items[key], value) 

1155 

1156 def testQueryDatasetsDeduplication(self): 

1157 """Test that the findFirst option to queryDatasets selects datasets 

1158 from collections in the order given". 

1159 """ 

1160 registry = self.makeRegistry() 

1161 self.loadData(registry, "base.yaml") 

1162 self.loadData(registry, "datasets.yaml") 

1163 self.assertCountEqual( 

1164 list(registry.queryDatasets("bias", collections=["imported_g", "imported_r"])), 

1165 [ 

1166 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"), 

1167 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g"), 

1168 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g"), 

1169 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r"), 

1170 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r"), 

1171 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

1172 ] 

1173 ) 

1174 self.assertCountEqual( 

1175 list(registry.queryDatasets("bias", collections=["imported_g", "imported_r"], 

1176 findFirst=True)), 

1177 [ 

1178 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"), 

1179 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g"), 

1180 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g"), 

1181 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

1182 ] 

1183 ) 

1184 self.assertCountEqual( 

1185 list(registry.queryDatasets("bias", collections=["imported_r", "imported_g"], 

1186 findFirst=True)), 

1187 [ 

1188 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"), 

1189 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r"), 

1190 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r"), 

1191 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

1192 ] 

1193 ) 

1194 

1195 def testQueryResults(self): 

1196 """Test querying for data IDs and then manipulating the QueryResults 

1197 object returned to perform other queries. 

1198 """ 

1199 registry = self.makeRegistry() 

1200 self.loadData(registry, "base.yaml") 

1201 self.loadData(registry, "datasets.yaml") 

1202 bias = registry.getDatasetType("bias") 

1203 flat = registry.getDatasetType("flat") 

1204 # Obtain expected results from methods other than those we're testing 

1205 # here. That includes: 

1206 # - the dimensions of the data IDs we want to query: 

1207 expectedGraph = DimensionGraph(registry.dimensions, names=["detector", "physical_filter"]) 

1208 # - the dimensions of some other data IDs we'll extract from that: 

1209 expectedSubsetGraph = DimensionGraph(registry.dimensions, names=["detector"]) 

1210 # - the data IDs we expect to obtain from the first queries: 

1211 expectedDataIds = DataCoordinateSet( 

1212 { 

1213 DataCoordinate.standardize(instrument="Cam1", detector=d, physical_filter=p, 

1214 universe=registry.dimensions) 

1215 for d, p in itertools.product({1, 2, 3}, {"Cam1-G", "Cam1-R1", "Cam1-R2"}) 

1216 }, 

1217 graph=expectedGraph, 

1218 hasFull=False, 

1219 hasRecords=False, 

1220 ) 

1221 # - the flat datasets we expect to find from those data IDs, in just 

1222 # one collection (so deduplication is irrelevant): 

1223 expectedFlats = [ 

1224 registry.findDataset(flat, instrument="Cam1", detector=1, physical_filter="Cam1-R1", 

1225 collections="imported_r"), 

1226 registry.findDataset(flat, instrument="Cam1", detector=2, physical_filter="Cam1-R1", 

1227 collections="imported_r"), 

1228 registry.findDataset(flat, instrument="Cam1", detector=3, physical_filter="Cam1-R2", 

1229 collections="imported_r"), 

1230 ] 

1231 # - the data IDs we expect to extract from that: 

1232 expectedSubsetDataIds = expectedDataIds.subset(expectedSubsetGraph) 

1233 # - the bias datasets we expect to find from those data IDs, after we 

1234 # subset-out the physical_filter dimension, both with duplicates: 

1235 expectedAllBiases = [ 

1236 registry.findDataset(bias, instrument="Cam1", detector=1, collections="imported_g"), 

1237 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_g"), 

1238 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_g"), 

1239 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_r"), 

1240 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_r"), 

1241 ] 

1242 # - ...and without duplicates: 

1243 expectedDeduplicatedBiases = [ 

1244 registry.findDataset(bias, instrument="Cam1", detector=1, collections="imported_g"), 

1245 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_r"), 

1246 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_r"), 

1247 ] 

1248 # Test against those expected results, using a "lazy" query for the 

1249 # data IDs (which re-executes that query each time we use it to do 

1250 # something new). 

1251 dataIds = registry.queryDataIds( 

1252 ["detector", "physical_filter"], 

1253 where="detector.purpose = 'SCIENCE'", # this rejects detector=4 

1254 instrument="Cam1", 

1255 ) 

1256 self.assertEqual(dataIds.graph, expectedGraph) 

1257 self.assertEqual(dataIds.toSet(), expectedDataIds) 

1258 self.assertCountEqual( 

1259 list( 

1260 dataIds.findDatasets( 

1261 flat, 

1262 collections=["imported_r"], 

1263 ) 

1264 ), 

1265 expectedFlats, 

1266 ) 

1267 subsetDataIds = dataIds.subset(expectedSubsetGraph, unique=True) 

1268 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph) 

1269 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds) 

1270 self.assertCountEqual( 

1271 list( 

1272 subsetDataIds.findDatasets( 

1273 bias, 

1274 collections=["imported_r", "imported_g"], 

1275 findFirst=False 

1276 ) 

1277 ), 

1278 expectedAllBiases 

1279 ) 

1280 self.assertCountEqual( 

1281 list( 

1282 subsetDataIds.findDatasets( 

1283 bias, 

1284 collections=["imported_r", "imported_g"], 

1285 findFirst=True 

1286 ) 

1287 ), expectedDeduplicatedBiases 

1288 ) 

1289 # Materialize the bias dataset queries (only) by putting the results 

1290 # into temporary tables, then repeat those tests. 

1291 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], 

1292 findFirst=False).materialize() as biases: 

1293 self.assertCountEqual(list(biases), expectedAllBiases) 

1294 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], 

1295 findFirst=True).materialize() as biases: 

1296 self.assertCountEqual(list(biases), expectedDeduplicatedBiases) 

1297 # Materialize the data ID subset query, but not the dataset queries. 

1298 with subsetDataIds.materialize() as subsetDataIds: 

1299 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph) 

1300 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds) 

1301 self.assertCountEqual( 

1302 list( 

1303 subsetDataIds.findDatasets( 

1304 bias, 

1305 collections=["imported_r", "imported_g"], 

1306 findFirst=False 

1307 ) 

1308 ), 

1309 expectedAllBiases 

1310 ) 

1311 self.assertCountEqual( 

1312 list( 

1313 subsetDataIds.findDatasets( 

1314 bias, 

1315 collections=["imported_r", "imported_g"], 

1316 findFirst=True 

1317 ) 

1318 ), expectedDeduplicatedBiases 

1319 ) 

1320 # Materialize the dataset queries, too. 

1321 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], 

1322 findFirst=False).materialize() as biases: 

1323 self.assertCountEqual(list(biases), expectedAllBiases) 

1324 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], 

1325 findFirst=True).materialize() as biases: 

1326 self.assertCountEqual(list(biases), expectedDeduplicatedBiases) 

1327 # Materialize the original query, but none of the follow-up queries. 

1328 with dataIds.materialize() as dataIds: 

1329 self.assertEqual(dataIds.graph, expectedGraph) 

1330 self.assertEqual(dataIds.toSet(), expectedDataIds) 

1331 self.assertCountEqual( 

1332 list( 

1333 dataIds.findDatasets( 

1334 flat, 

1335 collections=["imported_r"], 

1336 ) 

1337 ), 

1338 expectedFlats, 

1339 ) 

1340 subsetDataIds = dataIds.subset(expectedSubsetGraph, unique=True) 

1341 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph) 

1342 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds) 

1343 self.assertCountEqual( 

1344 list( 

1345 subsetDataIds.findDatasets( 

1346 bias, 

1347 collections=["imported_r", "imported_g"], 

1348 findFirst=False 

1349 ) 

1350 ), 

1351 expectedAllBiases 

1352 ) 

1353 self.assertCountEqual( 

1354 list( 

1355 subsetDataIds.findDatasets( 

1356 bias, 

1357 collections=["imported_r", "imported_g"], 

1358 findFirst=True 

1359 ) 

1360 ), expectedDeduplicatedBiases 

1361 ) 

1362 # Materialize just the bias dataset queries. 

1363 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], 

1364 findFirst=False).materialize() as biases: 

1365 self.assertCountEqual(list(biases), expectedAllBiases) 

1366 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], 

1367 findFirst=True).materialize() as biases: 

1368 self.assertCountEqual(list(biases), expectedDeduplicatedBiases) 

1369 # Materialize the subset data ID query, but not the dataset 

1370 # queries. 

1371 with subsetDataIds.materialize() as subsetDataIds: 

1372 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph) 

1373 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds) 

1374 self.assertCountEqual( 

1375 list( 

1376 subsetDataIds.findDatasets( 

1377 bias, 

1378 collections=["imported_r", "imported_g"], 

1379 findFirst=False 

1380 ) 

1381 ), 

1382 expectedAllBiases 

1383 ) 

1384 self.assertCountEqual( 

1385 list( 

1386 subsetDataIds.findDatasets( 

1387 bias, 

1388 collections=["imported_r", "imported_g"], 

1389 findFirst=True 

1390 ) 

1391 ), expectedDeduplicatedBiases 

1392 ) 

1393 # Materialize the bias dataset queries, too, so now we're 

1394 # materializing every single step. 

1395 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], 

1396 findFirst=False).materialize() as biases: 

1397 self.assertCountEqual(list(biases), expectedAllBiases) 

1398 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], 

1399 findFirst=True).materialize() as biases: 

1400 self.assertCountEqual(list(biases), expectedDeduplicatedBiases) 

1401 

1402 def testEmptyDimensionsQueries(self): 

1403 """Test Query and QueryResults objects in the case where there are no 

1404 dimensions. 

1405 """ 

1406 # Set up test data: one dataset type, two runs, one dataset in each. 

1407 registry = self.makeRegistry() 

1408 self.loadData(registry, "base.yaml") 

1409 schema = DatasetType("schema", dimensions=registry.dimensions.empty, storageClass="Catalog") 

1410 registry.registerDatasetType(schema) 

1411 dataId = DataCoordinate.makeEmpty(registry.dimensions) 

1412 run1 = "run1" 

1413 run2 = "run2" 

1414 registry.registerRun(run1) 

1415 registry.registerRun(run2) 

1416 (dataset1,) = registry.insertDatasets(schema, dataIds=[dataId], run=run1) 

1417 (dataset2,) = registry.insertDatasets(schema, dataIds=[dataId], run=run2) 

1418 # Query directly for both of the datasets, and each one, one at a time. 

1419 self.checkQueryResults( 

1420 registry.queryDatasets(schema, collections=[run1, run2], findFirst=False), 

1421 [dataset1, dataset2] 

1422 ) 

1423 self.checkQueryResults( 

1424 registry.queryDatasets(schema, collections=[run1, run2], findFirst=True), 

1425 [dataset1], 

1426 ) 

1427 self.checkQueryResults( 

1428 registry.queryDatasets(schema, collections=[run2, run1], findFirst=True), 

1429 [dataset2], 

1430 ) 

1431 # Query for data IDs with no dimensions. 

1432 dataIds = registry.queryDataIds([]) 

1433 self.checkQueryResults(dataIds, [dataId]) 

1434 # Use queried data IDs to find the datasets. 

1435 self.checkQueryResults( 

1436 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False), 

1437 [dataset1, dataset2], 

1438 ) 

1439 self.checkQueryResults( 

1440 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), 

1441 [dataset1], 

1442 ) 

1443 self.checkQueryResults( 

1444 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), 

1445 [dataset2], 

1446 ) 

1447 # Now materialize the data ID query results and repeat those tests. 

1448 with dataIds.materialize() as dataIds: 

1449 self.checkQueryResults(dataIds, [dataId]) 

1450 self.checkQueryResults( 

1451 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), 

1452 [dataset1], 

1453 ) 

1454 self.checkQueryResults( 

1455 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), 

1456 [dataset2], 

1457 ) 

1458 # Query for non-empty data IDs, then subset that to get the empty one. 

1459 # Repeat the above tests starting from that. 

1460 dataIds = registry.queryDataIds(["instrument"]).subset(registry.dimensions.empty, unique=True) 

1461 self.checkQueryResults(dataIds, [dataId]) 

1462 self.checkQueryResults( 

1463 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False), 

1464 [dataset1, dataset2], 

1465 ) 

1466 self.checkQueryResults( 

1467 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), 

1468 [dataset1], 

1469 ) 

1470 self.checkQueryResults( 

1471 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), 

1472 [dataset2], 

1473 ) 

1474 with dataIds.materialize() as dataIds: 

1475 self.checkQueryResults(dataIds, [dataId]) 

1476 self.checkQueryResults( 

1477 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False), 

1478 [dataset1, dataset2], 

1479 ) 

1480 self.checkQueryResults( 

1481 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), 

1482 [dataset1], 

1483 ) 

1484 self.checkQueryResults( 

1485 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), 

1486 [dataset2], 

1487 ) 

1488 # Query for non-empty data IDs, then materialize, then subset to get 

1489 # the empty one. Repeat again. 

1490 with registry.queryDataIds(["instrument"]).materialize() as nonEmptyDataIds: 

1491 dataIds = nonEmptyDataIds.subset(registry.dimensions.empty, unique=True) 

1492 self.checkQueryResults(dataIds, [dataId]) 

1493 self.checkQueryResults( 

1494 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False), 

1495 [dataset1, dataset2], 

1496 ) 

1497 self.checkQueryResults( 

1498 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), 

1499 [dataset1], 

1500 ) 

1501 self.checkQueryResults( 

1502 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), 

1503 [dataset2], 

1504 ) 

1505 with dataIds.materialize() as dataIds: 

1506 self.checkQueryResults( 

1507 dataIds, 

1508 [dataId] 

1509 ) 

1510 self.checkQueryResults( 

1511 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False), 

1512 [dataset1, dataset2], 

1513 ) 

1514 self.checkQueryResults( 

1515 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), 

1516 [dataset1], 

1517 ) 

1518 self.checkQueryResults( 

1519 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), 

1520 [dataset2], 

1521 ) 

1522 

1523 def testDimensionDataModifications(self): 

1524 """Test that modifying dimension records via: 

1525 syncDimensionData(..., update=True) and 

1526 insertDimensionData(..., replace=True) works as expected, even in the 

1527 presence of datasets using those dimensions and spatial overlap 

1528 relationships. 

1529 """ 

1530 

1531 def unpack_range_set(ranges: lsst.sphgeom.RangeSet) -> Iterator[int]: 

1532 """Unpack a sphgeom.RangeSet into the integers it contains. 

1533 """ 

1534 for begin, end in ranges: 

1535 yield from range(begin, end) 

1536 

1537 def range_set_hull( 

1538 ranges: lsst.sphgeom.RangeSet, 

1539 pixelization: lsst.sphgeom.HtmPixelization, 

1540 ) -> lsst.sphgeom.ConvexPolygon: 

1541 """Create a ConvexPolygon hull of the region defined by a set of 

1542 HTM pixelization index ranges. 

1543 """ 

1544 points = [] 

1545 for index in unpack_range_set(ranges): 

1546 points.extend(pixelization.triangle(index).getVertices()) 

1547 return lsst.sphgeom.ConvexPolygon(points) 

1548 

1549 # Use HTM to set up an initial parent region (one arbitrary trixel) 

1550 # and four child regions (the trixels within the parent at the next 

1551 # level. We'll use the parent as a tract/visit region and the children 

1552 # as its patch/visit_detector regions. 

1553 registry = self.makeRegistry() 

1554 htm6 = registry.dimensions.skypix["htm"][6].pixelization 

1555 commonSkyPix = registry.dimensions.commonSkyPix.pixelization 

1556 index = 12288 

1557 child_ranges_small = lsst.sphgeom.RangeSet(index).scaled(4) 

1558 assert htm6.universe().contains(child_ranges_small) 

1559 child_regions_small = [htm6.triangle(i) for i in unpack_range_set(child_ranges_small)] 

1560 parent_region_small = lsst.sphgeom.ConvexPolygon( 

1561 list(itertools.chain.from_iterable(c.getVertices() for c in child_regions_small)) 

1562 ) 

1563 assert all(parent_region_small.contains(c) for c in child_regions_small) 

1564 # Make a larger version of each child region, defined to be the set of 

1565 # htm6 trixels that overlap the original's bounding circle. Make a new 

1566 # parent that's the convex hull of the new children. 

1567 child_regions_large = [ 

1568 range_set_hull(htm6.envelope(c.getBoundingCircle()), htm6) 

1569 for c in child_regions_small 

1570 ] 

1571 assert all(large.contains(small) for large, small in zip(child_regions_large, child_regions_small)) 

1572 parent_region_large = lsst.sphgeom.ConvexPolygon( 

1573 list(itertools.chain.from_iterable(c.getVertices() for c in child_regions_large)) 

1574 ) 

1575 assert all(parent_region_large.contains(c) for c in child_regions_large) 

1576 assert parent_region_large.contains(parent_region_small) 

1577 assert not parent_region_small.contains(parent_region_large) 

1578 assert not all(parent_region_small.contains(c) for c in child_regions_large) 

1579 # Find some commonSkyPix indices that overlap the large regions but not 

1580 # overlap the small regions. We use commonSkyPix here to make sure the 

1581 # real tests later involve what's in the database, not just post-query 

1582 # region filtering. 

1583 child_difference_indices = [] 

1584 for large, small in zip(child_regions_large, child_regions_small): 

1585 difference = list(unpack_range_set(commonSkyPix.envelope(large) - commonSkyPix.envelope(small))) 

1586 assert difference, "if this is empty, we can't test anything useful with these regions" 

1587 assert all( 

1588 not commonSkyPix.triangle(d).isDisjointFrom(large) 

1589 and commonSkyPix.triangle(d).isDisjointFrom(small) 

1590 for d in difference 

1591 ) 

1592 child_difference_indices.append(difference) 

1593 parent_difference_indices = list( 

1594 unpack_range_set( 

1595 commonSkyPix.envelope(parent_region_large) - commonSkyPix.envelope(parent_region_small) 

1596 ) 

1597 ) 

1598 assert parent_difference_indices, "if this is empty, we can't test anything useful with these regions" 

1599 assert all( 

1600 ( 

1601 not commonSkyPix.triangle(d).isDisjointFrom(parent_region_large) 

1602 and commonSkyPix.triangle(d).isDisjointFrom(parent_region_small) 

1603 ) 

1604 for d in parent_difference_indices 

1605 ) 

1606 # Now that we've finally got those regions, we'll insert the large ones 

1607 # as tract/patch dimension records. 

1608 skymap_name = "testing_v1" 

1609 registry.insertDimensionData( 

1610 "skymap", { 

1611 "name": skymap_name, 

1612 "hash": bytes([42]), 

1613 "tract_max": 1, 

1614 "patch_nx_max": 2, 

1615 "patch_ny_max": 2, 

1616 } 

1617 ) 

1618 registry.insertDimensionData( 

1619 "tract", 

1620 {"skymap": skymap_name, "id": 0, "region": parent_region_large} 

1621 ) 

1622 registry.insertDimensionData( 

1623 "patch", 

1624 *[{ 

1625 "skymap": skymap_name, 

1626 "tract": 0, 

1627 "id": n, 

1628 "cell_x": n % 2, 

1629 "cell_y": n // 2, 

1630 "region": c 

1631 } for n, c in enumerate(child_regions_large)] 

1632 ) 

1633 # Add at dataset that uses these dimensions to make sure that modifying 

1634 # them doesn't disrupt foreign keys (need to make sure DB doesn't 

1635 # implement insert with replace=True as delete-then-insert). 

1636 dataset_type = DatasetType( 

1637 "coadd", 

1638 dimensions=["tract", "patch"], 

1639 universe=registry.dimensions, 

1640 storageClass="Exposure", 

1641 ) 

1642 registry.registerDatasetType(dataset_type) 

1643 registry.registerCollection("the_run", CollectionType.RUN) 

1644 registry.insertDatasets( 

1645 dataset_type, 

1646 [{"skymap": skymap_name, "tract": 0, "patch": 2}], 

1647 run="the_run", 

1648 ) 

1649 # Query for tracts and patches that overlap some "difference" htm9 

1650 # pixels; there should be overlaps, because the database has 

1651 # the "large" suite of regions. 

1652 self.assertEqual( 

1653 {0}, 

1654 { 

1655 data_id["tract"] for data_id in registry.queryDataIds( 

1656 ["tract"], 

1657 skymap=skymap_name, 

1658 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]}, 

1659 ) 

1660 } 

1661 ) 

1662 for patch_id, patch_difference_indices in enumerate(child_difference_indices): 

1663 self.assertIn( 

1664 patch_id, 

1665 { 

1666 data_id["patch"] for data_id in registry.queryDataIds( 

1667 ["patch"], 

1668 skymap=skymap_name, 

1669 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]}, 

1670 ) 

1671 } 

1672 ) 

1673 # Use sync to update the tract region and insert to update the patch 

1674 # regions, to the "small" suite. 

1675 updated = registry.syncDimensionData( 

1676 "tract", 

1677 {"skymap": skymap_name, "id": 0, "region": parent_region_small}, 

1678 update=True, 

1679 ) 

1680 self.assertEqual(updated, {"region": parent_region_large}) 

1681 registry.insertDimensionData( 

1682 "patch", 

1683 *[{ 

1684 "skymap": skymap_name, 

1685 "tract": 0, 

1686 "id": n, 

1687 "cell_x": n % 2, 

1688 "cell_y": n // 2, 

1689 "region": c 

1690 } for n, c in enumerate(child_regions_small)], 

1691 replace=True 

1692 ) 

1693 # Query again; there now should be no such overlaps, because the 

1694 # database has the "small" suite of regions. 

1695 self.assertFalse( 

1696 set( 

1697 registry.queryDataIds( 

1698 ["tract"], 

1699 skymap=skymap_name, 

1700 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]}, 

1701 ) 

1702 ) 

1703 ) 

1704 for patch_id, patch_difference_indices in enumerate(child_difference_indices): 

1705 self.assertNotIn( 

1706 patch_id, 

1707 { 

1708 data_id["patch"] for data_id in registry.queryDataIds( 

1709 ["patch"], 

1710 skymap=skymap_name, 

1711 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]}, 

1712 ) 

1713 } 

1714 ) 

1715 # Update back to the large regions and query one more time. 

1716 updated = registry.syncDimensionData( 

1717 "tract", 

1718 {"skymap": skymap_name, "id": 0, "region": parent_region_large}, 

1719 update=True, 

1720 ) 

1721 self.assertEqual(updated, {"region": parent_region_small}) 

1722 registry.insertDimensionData( 

1723 "patch", 

1724 *[{ 

1725 "skymap": skymap_name, 

1726 "tract": 0, 

1727 "id": n, 

1728 "cell_x": n % 2, 

1729 "cell_y": n // 2, 

1730 "region": c 

1731 } for n, c in enumerate(child_regions_large)], 

1732 replace=True 

1733 ) 

1734 self.assertEqual( 

1735 {0}, 

1736 { 

1737 data_id["tract"] for data_id in registry.queryDataIds( 

1738 ["tract"], 

1739 skymap=skymap_name, 

1740 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]}, 

1741 ) 

1742 } 

1743 ) 

1744 for patch_id, patch_difference_indices in enumerate(child_difference_indices): 

1745 self.assertIn( 

1746 patch_id, 

1747 { 

1748 data_id["patch"] for data_id in registry.queryDataIds( 

1749 ["patch"], 

1750 skymap=skymap_name, 

1751 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]}, 

1752 ) 

1753 } 

1754 ) 

1755 

1756 def testCalibrationCollections(self): 

1757 """Test operations on `~CollectionType.CALIBRATION` collections, 

1758 including `Registry.certify`, `Registry.decertify`, and 

1759 `Registry.findDataset`. 

1760 """ 

1761 # Setup - make a Registry, fill it with some datasets in 

1762 # non-calibration collections. 

1763 registry = self.makeRegistry() 

1764 self.loadData(registry, "base.yaml") 

1765 self.loadData(registry, "datasets.yaml") 

1766 # Set up some timestamps. 

1767 t1 = astropy.time.Time('2020-01-01T01:00:00', format="isot", scale="tai") 

1768 t2 = astropy.time.Time('2020-01-01T02:00:00', format="isot", scale="tai") 

1769 t3 = astropy.time.Time('2020-01-01T03:00:00', format="isot", scale="tai") 

1770 t4 = astropy.time.Time('2020-01-01T04:00:00', format="isot", scale="tai") 

1771 t5 = astropy.time.Time('2020-01-01T05:00:00', format="isot", scale="tai") 

1772 allTimespans = [ 

1773 Timespan(a, b) for a, b in itertools.combinations([None, t1, t2, t3, t4, t5, None], r=2) 

1774 ] 

1775 # Get references to some datasets. 

1776 bias2a = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g") 

1777 bias3a = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g") 

1778 bias2b = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r") 

1779 bias3b = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r") 

1780 # Register the main calibration collection we'll be working with. 

1781 collection = "Cam1/calibs/default" 

1782 registry.registerCollection(collection, type=CollectionType.CALIBRATION) 

1783 # Cannot associate into a calibration collection (no timespan). 

1784 with self.assertRaises(TypeError): 

1785 registry.associate(collection, [bias2a]) 

1786 # Certify 2a dataset with [t2, t4) validity. 

1787 registry.certify(collection, [bias2a], Timespan(begin=t2, end=t4)) 

1788 # We should not be able to certify 2b with anything overlapping that 

1789 # window. 

1790 with self.assertRaises(ConflictingDefinitionError): 

1791 registry.certify(collection, [bias2b], Timespan(begin=None, end=t3)) 

1792 with self.assertRaises(ConflictingDefinitionError): 

1793 registry.certify(collection, [bias2b], Timespan(begin=None, end=t5)) 

1794 with self.assertRaises(ConflictingDefinitionError): 

1795 registry.certify(collection, [bias2b], Timespan(begin=t1, end=t3)) 

1796 with self.assertRaises(ConflictingDefinitionError): 

1797 registry.certify(collection, [bias2b], Timespan(begin=t1, end=t5)) 

1798 with self.assertRaises(ConflictingDefinitionError): 

1799 registry.certify(collection, [bias2b], Timespan(begin=t1, end=None)) 

1800 with self.assertRaises(ConflictingDefinitionError): 

1801 registry.certify(collection, [bias2b], Timespan(begin=t2, end=t3)) 

1802 with self.assertRaises(ConflictingDefinitionError): 

1803 registry.certify(collection, [bias2b], Timespan(begin=t2, end=t5)) 

1804 with self.assertRaises(ConflictingDefinitionError): 

1805 registry.certify(collection, [bias2b], Timespan(begin=t2, end=None)) 

1806 # We should be able to certify 3a with a range overlapping that window, 

1807 # because it's for a different detector. 

1808 # We'll certify 3a over [t1, t3). 

1809 registry.certify(collection, [bias3a], Timespan(begin=t1, end=t3)) 

1810 # Now we'll certify 2b and 3b together over [t4, ∞). 

1811 registry.certify(collection, [bias2b, bias3b], Timespan(begin=t4, end=None)) 

1812 

1813 # Fetch all associations and check that they are what we expect. 

1814 self.assertCountEqual( 

1815 list( 

1816 registry.queryDatasetAssociations( 

1817 "bias", 

1818 collections=[collection, "imported_g", "imported_r"], 

1819 ) 

1820 ), 

1821 [ 

1822 DatasetAssociation( 

1823 ref=registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"), 

1824 collection="imported_g", 

1825 timespan=None, 

1826 ), 

1827 DatasetAssociation( 

1828 ref=registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

1829 collection="imported_r", 

1830 timespan=None, 

1831 ), 

1832 DatasetAssociation(ref=bias2a, collection="imported_g", timespan=None), 

1833 DatasetAssociation(ref=bias3a, collection="imported_g", timespan=None), 

1834 DatasetAssociation(ref=bias2b, collection="imported_r", timespan=None), 

1835 DatasetAssociation(ref=bias3b, collection="imported_r", timespan=None), 

1836 DatasetAssociation(ref=bias2a, collection=collection, timespan=Timespan(begin=t2, end=t4)), 

1837 DatasetAssociation(ref=bias3a, collection=collection, timespan=Timespan(begin=t1, end=t3)), 

1838 DatasetAssociation(ref=bias2b, collection=collection, timespan=Timespan(begin=t4, end=None)), 

1839 DatasetAssociation(ref=bias3b, collection=collection, timespan=Timespan(begin=t4, end=None)), 

1840 ] 

1841 ) 

1842 

1843 class Ambiguous: 

1844 """Tag class to denote lookups that are expected to be ambiguous. 

1845 """ 

1846 pass 

1847 

1848 def assertLookup(detector: int, timespan: Timespan, 

1849 expected: Optional[Union[DatasetRef, Type[Ambiguous]]]) -> None: 

1850 """Local function that asserts that a bias lookup returns the given 

1851 expected result. 

1852 """ 

1853 if expected is Ambiguous: 

1854 with self.assertRaises(RuntimeError): 

1855 registry.findDataset("bias", collections=collection, instrument="Cam1", 

1856 detector=detector, timespan=timespan) 

1857 else: 

1858 self.assertEqual( 

1859 expected, 

1860 registry.findDataset("bias", collections=collection, instrument="Cam1", 

1861 detector=detector, timespan=timespan) 

1862 ) 

1863 

1864 # Systematically test lookups against expected results. 

1865 assertLookup(detector=2, timespan=Timespan(None, t1), expected=None) 

1866 assertLookup(detector=2, timespan=Timespan(None, t2), expected=None) 

1867 assertLookup(detector=2, timespan=Timespan(None, t3), expected=bias2a) 

1868 assertLookup(detector=2, timespan=Timespan(None, t4), expected=bias2a) 

1869 assertLookup(detector=2, timespan=Timespan(None, t5), expected=Ambiguous) 

1870 assertLookup(detector=2, timespan=Timespan(None, None), expected=Ambiguous) 

1871 assertLookup(detector=2, timespan=Timespan(t1, t2), expected=None) 

1872 assertLookup(detector=2, timespan=Timespan(t1, t3), expected=bias2a) 

1873 assertLookup(detector=2, timespan=Timespan(t1, t4), expected=bias2a) 

1874 assertLookup(detector=2, timespan=Timespan(t1, t5), expected=Ambiguous) 

1875 assertLookup(detector=2, timespan=Timespan(t1, None), expected=Ambiguous) 

1876 assertLookup(detector=2, timespan=Timespan(t2, t3), expected=bias2a) 

1877 assertLookup(detector=2, timespan=Timespan(t2, t4), expected=bias2a) 

1878 assertLookup(detector=2, timespan=Timespan(t2, t5), expected=Ambiguous) 

1879 assertLookup(detector=2, timespan=Timespan(t2, None), expected=Ambiguous) 

1880 assertLookup(detector=2, timespan=Timespan(t3, t4), expected=bias2a) 

1881 assertLookup(detector=2, timespan=Timespan(t3, t5), expected=Ambiguous) 

1882 assertLookup(detector=2, timespan=Timespan(t3, None), expected=Ambiguous) 

1883 assertLookup(detector=2, timespan=Timespan(t4, t5), expected=bias2b) 

1884 assertLookup(detector=2, timespan=Timespan(t4, None), expected=bias2b) 

1885 assertLookup(detector=2, timespan=Timespan(t5, None), expected=bias2b) 

1886 assertLookup(detector=3, timespan=Timespan(None, t1), expected=None) 

1887 assertLookup(detector=3, timespan=Timespan(None, t2), expected=bias3a) 

1888 assertLookup(detector=3, timespan=Timespan(None, t3), expected=bias3a) 

1889 assertLookup(detector=3, timespan=Timespan(None, t4), expected=bias3a) 

1890 assertLookup(detector=3, timespan=Timespan(None, t5), expected=Ambiguous) 

1891 assertLookup(detector=3, timespan=Timespan(None, None), expected=Ambiguous) 

1892 assertLookup(detector=3, timespan=Timespan(t1, t2), expected=bias3a) 

1893 assertLookup(detector=3, timespan=Timespan(t1, t3), expected=bias3a) 

1894 assertLookup(detector=3, timespan=Timespan(t1, t4), expected=bias3a) 

1895 assertLookup(detector=3, timespan=Timespan(t1, t5), expected=Ambiguous) 

1896 assertLookup(detector=3, timespan=Timespan(t1, None), expected=Ambiguous) 

1897 assertLookup(detector=3, timespan=Timespan(t2, t3), expected=bias3a) 

1898 assertLookup(detector=3, timespan=Timespan(t2, t4), expected=bias3a) 

1899 assertLookup(detector=3, timespan=Timespan(t2, t5), expected=Ambiguous) 

1900 assertLookup(detector=3, timespan=Timespan(t2, None), expected=Ambiguous) 

1901 assertLookup(detector=3, timespan=Timespan(t3, t4), expected=None) 

1902 assertLookup(detector=3, timespan=Timespan(t3, t5), expected=bias3b) 

1903 assertLookup(detector=3, timespan=Timespan(t3, None), expected=bias3b) 

1904 assertLookup(detector=3, timespan=Timespan(t4, t5), expected=bias3b) 

1905 assertLookup(detector=3, timespan=Timespan(t4, None), expected=bias3b) 

1906 assertLookup(detector=3, timespan=Timespan(t5, None), expected=bias3b) 

1907 

1908 # Decertify [t3, t5) for all data IDs, and do test lookups again. 

1909 # This should truncate bias2a to [t2, t3), leave bias3a unchanged at 

1910 # [t1, t3), and truncate bias2b and bias3b to [t5, ∞). 

1911 registry.decertify(collection=collection, datasetType="bias", timespan=Timespan(t3, t5)) 

1912 assertLookup(detector=2, timespan=Timespan(None, t1), expected=None) 

1913 assertLookup(detector=2, timespan=Timespan(None, t2), expected=None) 

1914 assertLookup(detector=2, timespan=Timespan(None, t3), expected=bias2a) 

1915 assertLookup(detector=2, timespan=Timespan(None, t4), expected=bias2a) 

1916 assertLookup(detector=2, timespan=Timespan(None, t5), expected=bias2a) 

1917 assertLookup(detector=2, timespan=Timespan(None, None), expected=Ambiguous) 

1918 assertLookup(detector=2, timespan=Timespan(t1, t2), expected=None) 

1919 assertLookup(detector=2, timespan=Timespan(t1, t3), expected=bias2a) 

1920 assertLookup(detector=2, timespan=Timespan(t1, t4), expected=bias2a) 

1921 assertLookup(detector=2, timespan=Timespan(t1, t5), expected=bias2a) 

1922 assertLookup(detector=2, timespan=Timespan(t1, None), expected=Ambiguous) 

1923 assertLookup(detector=2, timespan=Timespan(t2, t3), expected=bias2a) 

1924 assertLookup(detector=2, timespan=Timespan(t2, t4), expected=bias2a) 

1925 assertLookup(detector=2, timespan=Timespan(t2, t5), expected=bias2a) 

1926 assertLookup(detector=2, timespan=Timespan(t2, None), expected=Ambiguous) 

1927 assertLookup(detector=2, timespan=Timespan(t3, t4), expected=None) 

1928 assertLookup(detector=2, timespan=Timespan(t3, t5), expected=None) 

1929 assertLookup(detector=2, timespan=Timespan(t3, None), expected=bias2b) 

1930 assertLookup(detector=2, timespan=Timespan(t4, t5), expected=None) 

1931 assertLookup(detector=2, timespan=Timespan(t4, None), expected=bias2b) 

1932 assertLookup(detector=2, timespan=Timespan(t5, None), expected=bias2b) 

1933 assertLookup(detector=3, timespan=Timespan(None, t1), expected=None) 

1934 assertLookup(detector=3, timespan=Timespan(None, t2), expected=bias3a) 

1935 assertLookup(detector=3, timespan=Timespan(None, t3), expected=bias3a) 

1936 assertLookup(detector=3, timespan=Timespan(None, t4), expected=bias3a) 

1937 assertLookup(detector=3, timespan=Timespan(None, t5), expected=bias3a) 

1938 assertLookup(detector=3, timespan=Timespan(None, None), expected=Ambiguous) 

1939 assertLookup(detector=3, timespan=Timespan(t1, t2), expected=bias3a) 

1940 assertLookup(detector=3, timespan=Timespan(t1, t3), expected=bias3a) 

1941 assertLookup(detector=3, timespan=Timespan(t1, t4), expected=bias3a) 

1942 assertLookup(detector=3, timespan=Timespan(t1, t5), expected=bias3a) 

1943 assertLookup(detector=3, timespan=Timespan(t1, None), expected=Ambiguous) 

1944 assertLookup(detector=3, timespan=Timespan(t2, t3), expected=bias3a) 

1945 assertLookup(detector=3, timespan=Timespan(t2, t4), expected=bias3a) 

1946 assertLookup(detector=3, timespan=Timespan(t2, t5), expected=bias3a) 

1947 assertLookup(detector=3, timespan=Timespan(t2, None), expected=Ambiguous) 

1948 assertLookup(detector=3, timespan=Timespan(t3, t4), expected=None) 

1949 assertLookup(detector=3, timespan=Timespan(t3, t5), expected=None) 

1950 assertLookup(detector=3, timespan=Timespan(t3, None), expected=bias3b) 

1951 assertLookup(detector=3, timespan=Timespan(t4, t5), expected=None) 

1952 assertLookup(detector=3, timespan=Timespan(t4, None), expected=bias3b) 

1953 assertLookup(detector=3, timespan=Timespan(t5, None), expected=bias3b) 

1954 

1955 # Decertify everything, this time with explicit data IDs, then check 

1956 # that no lookups succeed. 

1957 registry.decertify( 

1958 collection, "bias", Timespan(None, None), 

1959 dataIds=[ 

1960 dict(instrument="Cam1", detector=2), 

1961 dict(instrument="Cam1", detector=3), 

1962 ] 

1963 ) 

1964 for detector in (2, 3): 

1965 for timespan in allTimespans: 

1966 assertLookup(detector=detector, timespan=timespan, expected=None) 

1967 # Certify bias2a and bias3a over (-∞, ∞), check that all lookups return 

1968 # those. 

1969 registry.certify(collection, [bias2a, bias3a], Timespan(None, None),) 

1970 for timespan in allTimespans: 

1971 assertLookup(detector=2, timespan=timespan, expected=bias2a) 

1972 assertLookup(detector=3, timespan=timespan, expected=bias3a) 

1973 # Decertify just bias2 over [t2, t4). 

1974 # This should split a single certification row into two (and leave the 

1975 # other existing row, for bias3a, alone). 

1976 registry.decertify(collection, "bias", Timespan(t2, t4), 

1977 dataIds=[dict(instrument="Cam1", detector=2)]) 

1978 for timespan in allTimespans: 

1979 assertLookup(detector=3, timespan=timespan, expected=bias3a) 

1980 overlapsBefore = timespan.overlaps(Timespan(None, t2)) 

1981 overlapsAfter = timespan.overlaps(Timespan(t4, None)) 

1982 if overlapsBefore and overlapsAfter: 

1983 expected = Ambiguous 

1984 elif overlapsBefore or overlapsAfter: 

1985 expected = bias2a 

1986 else: 

1987 expected = None 

1988 assertLookup(detector=2, timespan=timespan, expected=expected) 

1989 

1990 def testSkipCalibs(self): 

1991 """Test how queries handle skipping of calibration collections. 

1992 """ 

1993 registry = self.makeRegistry() 

1994 self.loadData(registry, "base.yaml") 

1995 self.loadData(registry, "datasets.yaml") 

1996 

1997 coll_calib = "Cam1/calibs/default" 

1998 registry.registerCollection(coll_calib, type=CollectionType.CALIBRATION) 

1999 

2000 # Add all biases to the calibration collection. 

2001 # Without this, the logic that prunes dataset subqueries based on 

2002 # datasetType-collection summary information will fire before the logic 

2003 # we want to test below. This is a good thing (it avoids the dreaded 

2004 # NotImplementedError a bit more often) everywhere but here. 

2005 registry.certify(coll_calib, registry.queryDatasets("bias", collections=...), Timespan(None, None)) 

2006 

2007 coll_list = [coll_calib, "imported_g", "imported_r"] 

2008 chain = "Cam1/chain" 

2009 registry.registerCollection(chain, type=CollectionType.CHAINED) 

2010 registry.setCollectionChain(chain, coll_list) 

2011 

2012 # explicit list will raise if findFirst=True or there are temporal 

2013 # dimensions 

2014 with self.assertRaises(NotImplementedError): 

2015 registry.queryDatasets("bias", collections=coll_list, findFirst=True) 

2016 with self.assertRaises(NotImplementedError): 

2017 registry.queryDataIds(["instrument", "detector", "exposure"], datasets="bias", 

2018 collections=coll_list) 

2019 

2020 # chain will skip 

2021 datasets = list(registry.queryDatasets("bias", collections=chain)) 

2022 self.assertGreater(len(datasets), 0) 

2023 

2024 dataIds = list(registry.queryDataIds(["instrument", "detector"], datasets="bias", 

2025 collections=chain)) 

2026 self.assertGreater(len(dataIds), 0) 

2027 

2028 # glob will skip too 

2029 datasets = list(registry.queryDatasets("bias", collections="*d*")) 

2030 self.assertGreater(len(datasets), 0) 

2031 

2032 # regular expression will skip too 

2033 pattern = re.compile(".*") 

2034 datasets = list(registry.queryDatasets("bias", collections=pattern)) 

2035 self.assertGreater(len(datasets), 0) 

2036 

2037 # ellipsis should work as usual 

2038 datasets = list(registry.queryDatasets("bias", collections=...)) 

2039 self.assertGreater(len(datasets), 0) 

2040 

2041 # few tests with findFirst 

2042 datasets = list(registry.queryDatasets("bias", collections=chain, findFirst=True)) 

2043 self.assertGreater(len(datasets), 0) 

2044 

2045 def testIngestTimeQuery(self): 

2046 

2047 registry = self.makeRegistry() 

2048 self.loadData(registry, "base.yaml") 

2049 dt0 = datetime.utcnow() 

2050 self.loadData(registry, "datasets.yaml") 

2051 dt1 = datetime.utcnow() 

2052 

2053 datasets = list(registry.queryDatasets(..., collections=...)) 

2054 len0 = len(datasets) 

2055 self.assertGreater(len0, 0) 

2056 

2057 where = "ingest_date > T'2000-01-01'" 

2058 datasets = list(registry.queryDatasets(..., collections=..., where=where)) 

2059 len1 = len(datasets) 

2060 self.assertEqual(len0, len1) 

2061 

2062 # no one will ever use this piece of software in 30 years 

2063 where = "ingest_date > T'2050-01-01'" 

2064 datasets = list(registry.queryDatasets(..., collections=..., where=where)) 

2065 len2 = len(datasets) 

2066 self.assertEqual(len2, 0) 

2067 

2068 # Check more exact timing to make sure there is no 37 seconds offset 

2069 # (after fixing DM-30124). SQLite time precision is 1 second, make 

2070 # sure that we don't test with higher precision. 

2071 tests = [ 

2072 # format: (timestamp, operator, expected_len) 

2073 (dt0 - timedelta(seconds=1), ">", len0), 

2074 (dt0 - timedelta(seconds=1), "<", 0), 

2075 (dt1 + timedelta(seconds=1), "<", len0), 

2076 (dt1 + timedelta(seconds=1), ">", 0), 

2077 ] 

2078 for dt, op, expect_len in tests: 

2079 dt_str = dt.isoformat(sep=" ") 

2080 

2081 where = f"ingest_date {op} T'{dt_str}'" 

2082 datasets = list(registry.queryDatasets(..., collections=..., where=where)) 

2083 self.assertEqual(len(datasets), expect_len) 

2084 

2085 # same with bind using datetime or astropy Time 

2086 where = f"ingest_date {op} ingest_time" 

2087 datasets = list(registry.queryDatasets(..., collections=..., where=where, 

2088 bind={"ingest_time": dt})) 

2089 self.assertEqual(len(datasets), expect_len) 

2090 

2091 dt_astropy = astropy.time.Time(dt, format="datetime") 

2092 datasets = list(registry.queryDatasets(..., collections=..., where=where, 

2093 bind={"ingest_time": dt_astropy})) 

2094 self.assertEqual(len(datasets), expect_len) 

2095 

2096 def testTimespanQueries(self): 

2097 """Test query expressions involving timespans. 

2098 """ 

2099 registry = self.makeRegistry() 

2100 self.loadData(registry, "hsc-rc2-subset.yaml") 

2101 # All exposures in the database; mapping from ID to timespan. 

2102 visits = {record.id: record.timespan for record in registry.queryDimensionRecords("visit")} 

2103 # Just those IDs, sorted (which is also temporal sorting, because HSC 

2104 # exposure IDs are monotonically increasing). 

2105 ids = sorted(visits.keys()) 

2106 self.assertGreater(len(ids), 20) 

2107 # Pick some quasi-random indexes into `ids` to play with. 

2108 i1 = int(len(ids)*0.1) 

2109 i2 = int(len(ids)*0.3) 

2110 i3 = int(len(ids)*0.6) 

2111 i4 = int(len(ids)*0.8) 

2112 # Extract some times from those: just before the beginning of i1 (which 

2113 # should be after the end of the exposure before), exactly the 

2114 # beginning of i2, just after the beginning of i3 (and before its end), 

2115 # and the exact end of i4. 

2116 t1 = visits[ids[i1]].begin - astropy.time.TimeDelta(1.0, format="sec") 

2117 self.assertGreater(t1, visits[ids[i1 - 1]].end) 

2118 t2 = visits[ids[i2]].begin 

2119 t3 = visits[ids[i3]].begin + astropy.time.TimeDelta(1.0, format="sec") 

2120 self.assertLess(t3, visits[ids[i3]].end) 

2121 t4 = visits[ids[i4]].end 

2122 # Make sure those are actually in order. 

2123 self.assertEqual([t1, t2, t3, t4], sorted([t4, t3, t2, t1])) 

2124 

2125 bind = { 

2126 "t1": t1, 

2127 "t2": t2, 

2128 "t3": t3, 

2129 "t4": t4, 

2130 "ts23": Timespan(t2, t3), 

2131 } 

2132 

2133 def query(where): 

2134 """Helper function that queries for visit data IDs and returns 

2135 results as a sorted, deduplicated list of visit IDs. 

2136 """ 

2137 return sorted( 

2138 {dataId["visit"] for dataId in registry.queryDataIds("visit", 

2139 instrument="HSC", 

2140 bind=bind, 

2141 where=where)} 

2142 ) 

2143 

2144 # Try a bunch of timespan queries, mixing up the bounds themselves, 

2145 # where they appear in the expression, and how we get the timespan into 

2146 # the expression. 

2147 

2148 # t1 is before the start of i1, so this should not include i1. 

2149 self.assertEqual(ids[:i1], query("visit.timespan OVERLAPS (null, t1)")) 

2150 # t2 is exactly at the start of i2, but ends are exclusive, so these 

2151 # should not include i2. 

2152 self.assertEqual(ids[i1:i2], query("(t1, t2) OVERLAPS visit.timespan")) 

2153 self.assertEqual(ids[:i2], query("visit.timespan < (t2, t4)")) 

2154 # t3 is in the middle of i3, so this should include i3. 

2155 self.assertEqual(ids[i2:i3 + 1], query("visit.timespan OVERLAPS ts23")) 

2156 # This one should not include t3 by the same reasoning. 

2157 self.assertEqual(ids[i3 + 1:], query("visit.timespan > (t1, t3)")) 

2158 # t4 is exactly at the end of i4, so this should include i4. 

2159 self.assertEqual(ids[i3:i4 + 1], query(f"visit.timespan OVERLAPS (T'{t3.tai.isot}', t4)")) 

2160 # i4's upper bound of t4 is exclusive so this should not include t4. 

2161 self.assertEqual(ids[i4 + 1:], query("visit.timespan OVERLAPS (t4, NULL)")) 

2162 

2163 # Now some timespan vs. time scalar queries. 

2164 self.assertEqual(ids[:i2], query("visit.timespan < t2")) 

2165 self.assertEqual(ids[:i2], query("t2 > visit.timespan")) 

2166 self.assertEqual(ids[i3 + 1:], query("visit.timespan > t3")) 

2167 self.assertEqual(ids[i3 + 1:], query("t3 < visit.timespan")) 

2168 self.assertEqual(ids[i3:i3+1], query("visit.timespan OVERLAPS t3")) 

2169 self.assertEqual(ids[i3:i3+1], query(f"T'{t3.tai.isot}' OVERLAPS visit.timespan")) 

2170 

2171 # Empty timespans should not overlap anything. 

2172 self.assertEqual([], query("visit.timespan OVERLAPS (t3, t2)")) 

2173 

2174 def testCollectionSummaries(self): 

2175 """Test recording and retrieval of collection summaries. 

2176 """ 

2177 self.maxDiff = None 

2178 registry = self.makeRegistry() 

2179 # Importing datasets from yaml should go through the code path where 

2180 # we update collection summaries as we insert datasets. 

2181 self.loadData(registry, "base.yaml") 

2182 self.loadData(registry, "datasets.yaml") 

2183 flat = registry.getDatasetType("flat") 

2184 expected1 = CollectionSummary.makeEmpty(registry.dimensions) 

2185 expected1.datasetTypes.add(registry.getDatasetType("bias")) 

2186 expected1.datasetTypes.add(flat) 

2187 expected1.dimensions.update_extract( 

2188 DataCoordinate.standardize(instrument="Cam1", universe=registry.dimensions) 

2189 ) 

2190 self.assertEqual(registry.getCollectionSummary("imported_g"), expected1) 

2191 self.assertEqual(registry.getCollectionSummary("imported_r"), expected1) 

2192 # Create a chained collection with both of the imported runs; the 

2193 # summary should be the same, because it's a union with itself. 

2194 chain = "chain" 

2195 registry.registerCollection(chain, CollectionType.CHAINED) 

2196 registry.setCollectionChain(chain, ["imported_r", "imported_g"]) 

2197 self.assertEqual(registry.getCollectionSummary(chain), expected1) 

2198 # Associate flats only into a tagged collection and a calibration 

2199 # collection to check summaries of those. 

2200 tag = "tag" 

2201 registry.registerCollection(tag, CollectionType.TAGGED) 

2202 registry.associate(tag, registry.queryDatasets(flat, collections="imported_g")) 

2203 calibs = "calibs" 

2204 registry.registerCollection(calibs, CollectionType.CALIBRATION) 

2205 registry.certify(calibs, registry.queryDatasets(flat, collections="imported_g"), 

2206 timespan=Timespan(None, None)) 

2207 expected2 = expected1.copy() 

2208 expected2.datasetTypes.discard("bias") 

2209 self.assertEqual(registry.getCollectionSummary(tag), expected2) 

2210 self.assertEqual(registry.getCollectionSummary(calibs), expected2) 

2211 # Explicitly calling Registry.refresh() should load those same 

2212 # summaries, via a totally different code path. 

2213 registry.refresh() 

2214 self.assertEqual(registry.getCollectionSummary("imported_g"), expected1) 

2215 self.assertEqual(registry.getCollectionSummary("imported_r"), expected1) 

2216 self.assertEqual(registry.getCollectionSummary(tag), expected2) 

2217 self.assertEqual(registry.getCollectionSummary(calibs), expected2) 

2218 

2219 def testUnrelatedDimensionQueries(self): 

2220 """Test that WHERE expressions in queries can reference dimensions that 

2221 are not in the result set. 

2222 """ 

2223 registry = self.makeRegistry() 

2224 # There is no data to back this query, but it should still return 

2225 # zero records instead of raising. 

2226 self.assertFalse( 

2227 set(registry.queryDataIds(["visit", "detector"], 

2228 where="instrument='Cam1' AND skymap='not_here' AND tract=0")), 

2229 ) 

2230 

2231 def testBindInQueryDatasets(self): 

2232 """Test that the bind parameter is correctly forwarded in 

2233 queryDatasets recursion. 

2234 """ 

2235 registry = self.makeRegistry() 

2236 # Importing datasets from yaml should go through the code path where 

2237 # we update collection summaries as we insert datasets. 

2238 self.loadData(registry, "base.yaml") 

2239 self.loadData(registry, "datasets.yaml") 

2240 self.assertEqual( 

2241 set(registry.queryDatasets("flat", band="r", collections=...)), 

2242 set(registry.queryDatasets("flat", where="band=my_band", bind={"my_band": "r"}, collections=...)), 

2243 ) 

2244 

2245 def testQueryResultSummaries(self): 

2246 """Test summary methods like `count`, `any`, and `explain_no_results` 

2247 on `DataCoordinateQueryResults` and `DatasetQueryResults` 

2248 """ 

2249 registry = self.makeRegistry() 

2250 self.loadData(registry, "base.yaml") 

2251 self.loadData(registry, "datasets.yaml") 

2252 self.loadData(registry, "spatial.yaml") 

2253 # Default test dataset has two collections, each with both flats and 

2254 # biases. Add a new collection with only biases. 

2255 registry.registerCollection("biases", CollectionType.TAGGED) 

2256 registry.associate("biases", registry.queryDatasets("bias", collections=["imported_g"])) 

2257 # First query yields two results, and involves no postprocessing. 

2258 query1 = registry.queryDataIds(["physical_filter"], band="r") 

2259 self.assertTrue(query1.any(execute=False, exact=False)) 

2260 self.assertTrue(query1.any(execute=True, exact=False)) 

2261 self.assertTrue(query1.any(execute=True, exact=True)) 

2262 self.assertEqual(query1.count(exact=False), 2) 

2263 self.assertEqual(query1.count(exact=True), 2) 

2264 self.assertFalse(list(query1.explain_no_results())) 

2265 # Second query should yield no results, but this isn't detectable 

2266 # unless we actually run a query. 

2267 query2 = registry.queryDataIds(["physical_filter"], band="h") 

2268 self.assertTrue(query2.any(execute=False, exact=False)) 

2269 self.assertFalse(query2.any(execute=True, exact=False)) 

2270 self.assertFalse(query2.any(execute=True, exact=True)) 

2271 self.assertEqual(query2.count(exact=False), 0) 

2272 self.assertEqual(query2.count(exact=True), 0) 

2273 self.assertFalse(list(query2.explain_no_results())) 

2274 # These queries yield no results due to various problems that can be 

2275 # spotted prior to execution, yielding helpful diagnostics. 

2276 for query, snippets in [ 

2277 ( 

2278 # Dataset type name doesn't match any existing dataset types. 

2279 registry.queryDatasets("nonexistent", collections=...), 

2280 ["nonexistent"], 

2281 ), 

2282 ( 

2283 # Dataset type object isn't registered. 

2284 registry.queryDatasets( 

2285 DatasetType( 

2286 "nonexistent", 

2287 dimensions=["instrument"], 

2288 universe=registry.dimensions, 

2289 storageClass="Image", 

2290 ), 

2291 collections=... 

2292 ), 

2293 ["nonexistent"], 

2294 ), 

2295 ( 

2296 # No datasets of this type in this collection. 

2297 registry.queryDatasets("flat", collections=["biases"]), 

2298 ["flat", "biases"], 

2299 ), 

2300 ( 

2301 # No collections matching at all. 

2302 registry.queryDatasets("flat", collections=re.compile("potato.+")), 

2303 ["potato"], 

2304 ), 

2305 ]: 

2306 

2307 self.assertFalse(query.any(execute=False, exact=False)) 

2308 self.assertFalse(query.any(execute=True, exact=False)) 

2309 self.assertFalse(query.any(execute=True, exact=True)) 

2310 self.assertEqual(query.count(exact=False), 0) 

2311 self.assertEqual(query.count(exact=True), 0) 

2312 messages = list(query.explain_no_results()) 

2313 self.assertTrue(messages) 

2314 # Want all expected snippets to appear in at least one message. 

2315 self.assertTrue( 

2316 any( 

2317 all(snippet in message for snippet in snippets) 

2318 for message in query.explain_no_results() 

2319 ), 

2320 messages 

2321 ) 

2322 # This query yields four overlaps in the database, but one is filtered 

2323 # out in postprocessing. The count queries aren't accurate because 

2324 # they don't account for duplication that happens due to an internal 

2325 # join against commonSkyPix. 

2326 query3 = registry.queryDataIds(["visit", "tract"], instrument="Cam1", skymap="SkyMap1") 

2327 self.assertEqual( 

2328 { 

2329 DataCoordinate.standardize( 

2330 instrument="Cam1", 

2331 skymap="SkyMap1", 

2332 visit=v, 

2333 tract=t, 

2334 universe=registry.dimensions, 

2335 ) 

2336 for v, t in [(1, 0), (2, 0), (2, 1)] 

2337 }, 

2338 set(query3), 

2339 ) 

2340 self.assertTrue(query3.any(execute=False, exact=False)) 

2341 self.assertTrue(query3.any(execute=True, exact=False)) 

2342 self.assertTrue(query3.any(execute=True, exact=True)) 

2343 self.assertGreaterEqual(query3.count(exact=False), 4) 

2344 self.assertGreaterEqual(query3.count(exact=True), 3) 

2345 self.assertFalse(list(query3.explain_no_results())) 

2346 # This query yields overlaps in the database, but all are filtered 

2347 # out in postprocessing. The count queries again aren't very useful. 

2348 # We have to use `where=` here to avoid an optimization that 

2349 # (currently) skips the spatial postprocess-filtering because it 

2350 # recognizes that no spatial join is necessary. That's not ideal, but 

2351 # fixing it is out of scope for this ticket. 

2352 query4 = registry.queryDataIds(["visit", "tract"], instrument="Cam1", skymap="SkyMap1", 

2353 where="visit=1 AND detector=1 AND tract=0 AND patch=4") 

2354 self.assertFalse(set(query4)) 

2355 self.assertTrue(query4.any(execute=False, exact=False)) 

2356 self.assertTrue(query4.any(execute=True, exact=False)) 

2357 self.assertFalse(query4.any(execute=True, exact=True)) 

2358 self.assertGreaterEqual(query4.count(exact=False), 1) 

2359 self.assertEqual(query4.count(exact=True), 0) 

2360 messages = list(query4.explain_no_results()) 

2361 self.assertTrue(messages) 

2362 self.assertTrue( 

2363 any( 

2364 "regions did not overlap" in message 

2365 for message in messages 

2366 ) 

2367 )