Coverage for python/lsst/daf/butler/registry/tests/_registry.py: 4%

1420 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2023-03-22 02:07 -0700

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = ["RegistryTests"] 

24 

25import itertools 

26import logging 

27import os 

28import re 

29import unittest 

30import uuid 

31from abc import ABC, abstractmethod 

32from collections import defaultdict, namedtuple 

33from datetime import datetime, timedelta 

34from typing import TYPE_CHECKING, Iterator, Optional, Type, Union 

35 

36import astropy.time 

37import sqlalchemy 

38 

39try: 

40 import numpy as np 

41except ImportError: 

42 np = None 

43 

44import lsst.sphgeom 

45from lsst.daf.relation import RelationalAlgebraError 

46 

47from ...core import ( 

48 DataCoordinate, 

49 DataCoordinateSet, 

50 DatasetAssociation, 

51 DatasetRef, 

52 DatasetType, 

53 DimensionGraph, 

54 NamedValueSet, 

55 SkyPixDimension, 

56 StorageClass, 

57 Timespan, 

58 ddl, 

59) 

60from .._collection_summary import CollectionSummary 

61from .._collectionType import CollectionType 

62from .._config import RegistryConfig 

63from .._exceptions import ( 

64 ArgumentError, 

65 CollectionError, 

66 CollectionTypeError, 

67 ConflictingDefinitionError, 

68 DataIdValueError, 

69 DatasetTypeError, 

70 InconsistentDataIdError, 

71 MissingCollectionError, 

72 MissingDatasetTypeError, 

73 OrphanedRecordError, 

74) 

75from ..interfaces import ButlerAttributeExistsError, DatasetIdGenEnum 

76 

77if TYPE_CHECKING: 77 ↛ 78line 77 didn't jump to line 78, because the condition on line 77 was never true

78 from .._registry import Registry 

79 

80 

81class RegistryTests(ABC): 

82 """Generic tests for the `Registry` class that can be subclassed to 

83 generate tests for different configurations. 

84 """ 

85 

86 collectionsManager: Optional[str] = None 

87 """Name of the collections manager class, if subclass provides value for 

88 this member then it overrides name specified in default configuration 

89 (`str`). 

90 """ 

91 

92 datasetsManager: Optional[str] = None 

93 """Name of the datasets manager class, if subclass provides value for 

94 this member then it overrides name specified in default configuration 

95 (`str`). 

96 """ 

97 

98 @classmethod 

99 @abstractmethod 

100 def getDataDir(cls) -> str: 

101 """Return the root directory containing test data YAML files.""" 

102 raise NotImplementedError() 

103 

104 def makeRegistryConfig(self) -> RegistryConfig: 

105 """Create RegistryConfig used to create a registry. 

106 

107 This method should be called by a subclass from `makeRegistry`. 

108 Returned instance will be pre-configured based on the values of class 

109 members, and default-configured for all other parameters. Subclasses 

110 that need default configuration should just instantiate 

111 `RegistryConfig` directly. 

112 """ 

113 config = RegistryConfig() 

114 if self.collectionsManager: 

115 config["managers", "collections"] = self.collectionsManager 

116 if self.datasetsManager: 

117 config["managers", "datasets"] = self.datasetsManager 

118 return config 

119 

120 @abstractmethod 

121 def makeRegistry(self, share_repo_with: Optional[Registry] = None) -> Optional[Registry]: 

122 """Return the Registry instance to be tested. 

123 

124 Parameters 

125 ---------- 

126 share_repo_with : `Registry`, optional 

127 If provided, the new registry should point to the same data 

128 repository as this existing registry. 

129 

130 Returns 

131 ------- 

132 registry : `Registry` 

133 New `Registry` instance, or `None` *only* if `share_repo_with` is 

134 not `None` and this test case does not support that argument 

135 (e.g. it is impossible with in-memory SQLite DBs). 

136 """ 

137 raise NotImplementedError() 

138 

139 def loadData(self, registry: Registry, filename: str): 

140 """Load registry test data from ``getDataDir/<filename>``, 

141 which should be a YAML import/export file. 

142 """ 

143 from ...transfers import YamlRepoImportBackend 

144 

145 with open(os.path.join(self.getDataDir(), filename), "r") as stream: 

146 backend = YamlRepoImportBackend(stream, registry) 

147 backend.register() 

148 backend.load(datastore=None) 

149 

150 def checkQueryResults(self, results, expected): 

151 """Check that a query results object contains expected values. 

152 

153 Parameters 

154 ---------- 

155 results : `DataCoordinateQueryResults` or `DatasetQueryResults` 

156 A lazy-evaluation query results object. 

157 expected : `list` 

158 A list of `DataCoordinate` o `DatasetRef` objects that should be 

159 equal to results of the query, aside from ordering. 

160 """ 

161 self.assertCountEqual(list(results), expected) 

162 self.assertEqual(results.count(), len(expected)) 

163 if expected: 

164 self.assertTrue(results.any()) 

165 else: 

166 self.assertFalse(results.any()) 

167 

168 def testOpaque(self): 

169 """Tests for `Registry.registerOpaqueTable`, 

170 `Registry.insertOpaqueData`, `Registry.fetchOpaqueData`, and 

171 `Registry.deleteOpaqueData`. 

172 """ 

173 registry = self.makeRegistry() 

174 table = "opaque_table_for_testing" 

175 registry.registerOpaqueTable( 

176 table, 

177 spec=ddl.TableSpec( 

178 fields=[ 

179 ddl.FieldSpec("id", dtype=sqlalchemy.BigInteger, primaryKey=True), 

180 ddl.FieldSpec("name", dtype=sqlalchemy.String, length=16, nullable=False), 

181 ddl.FieldSpec("count", dtype=sqlalchemy.SmallInteger, nullable=True), 

182 ], 

183 ), 

184 ) 

185 rows = [ 

186 {"id": 1, "name": "one", "count": None}, 

187 {"id": 2, "name": "two", "count": 5}, 

188 {"id": 3, "name": "three", "count": 6}, 

189 ] 

190 registry.insertOpaqueData(table, *rows) 

191 self.assertCountEqual(rows, list(registry.fetchOpaqueData(table))) 

192 self.assertEqual(rows[0:1], list(registry.fetchOpaqueData(table, id=1))) 

193 self.assertEqual(rows[1:2], list(registry.fetchOpaqueData(table, name="two"))) 

194 self.assertEqual(rows[0:1], list(registry.fetchOpaqueData(table, id=(1, 3), name=("one", "two")))) 

195 self.assertEqual(rows, list(registry.fetchOpaqueData(table, id=(1, 2, 3)))) 

196 # Test very long IN clause which exceeds sqlite limit on number of 

197 # parameters. SQLite says the limit is 32k but it looks like it is 

198 # much higher. 

199 self.assertEqual(rows, list(registry.fetchOpaqueData(table, id=list(range(300_000))))) 

200 # Two IN clauses, each longer than 1k batch size, first with 

201 # duplicates, second has matching elements in different batches (after 

202 # sorting). 

203 self.assertEqual( 

204 rows[0:2], 

205 list( 

206 registry.fetchOpaqueData( 

207 table, 

208 id=list(range(1000)) + list(range(100, 0, -1)), 

209 name=["one"] + [f"q{i}" for i in range(2200)] + ["two"], 

210 ) 

211 ), 

212 ) 

213 self.assertEqual([], list(registry.fetchOpaqueData(table, id=1, name="two"))) 

214 registry.deleteOpaqueData(table, id=3) 

215 self.assertCountEqual(rows[:2], list(registry.fetchOpaqueData(table))) 

216 registry.deleteOpaqueData(table) 

217 self.assertEqual([], list(registry.fetchOpaqueData(table))) 

218 

219 def testDatasetType(self): 

220 """Tests for `Registry.registerDatasetType` and 

221 `Registry.getDatasetType`. 

222 """ 

223 registry = self.makeRegistry() 

224 # Check valid insert 

225 datasetTypeName = "test" 

226 storageClass = StorageClass("testDatasetType") 

227 registry.storageClasses.registerStorageClass(storageClass) 

228 dimensions = registry.dimensions.extract(("instrument", "visit")) 

229 differentDimensions = registry.dimensions.extract(("instrument", "patch")) 

230 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

231 # Inserting for the first time should return True 

232 self.assertTrue(registry.registerDatasetType(inDatasetType)) 

233 outDatasetType1 = registry.getDatasetType(datasetTypeName) 

234 self.assertEqual(outDatasetType1, inDatasetType) 

235 

236 # Re-inserting should work 

237 self.assertFalse(registry.registerDatasetType(inDatasetType)) 

238 # Except when they are not identical 

239 with self.assertRaises(ConflictingDefinitionError): 

240 nonIdenticalDatasetType = DatasetType(datasetTypeName, differentDimensions, storageClass) 

241 registry.registerDatasetType(nonIdenticalDatasetType) 

242 

243 # Template can be None 

244 datasetTypeName = "testNoneTemplate" 

245 storageClass = StorageClass("testDatasetType2") 

246 registry.storageClasses.registerStorageClass(storageClass) 

247 dimensions = registry.dimensions.extract(("instrument", "visit")) 

248 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

249 registry.registerDatasetType(inDatasetType) 

250 outDatasetType2 = registry.getDatasetType(datasetTypeName) 

251 self.assertEqual(outDatasetType2, inDatasetType) 

252 

253 allTypes = set(registry.queryDatasetTypes()) 

254 self.assertEqual(allTypes, {outDatasetType1, outDatasetType2}) 

255 

256 def testDimensions(self): 

257 """Tests for `Registry.insertDimensionData`, 

258 `Registry.syncDimensionData`, and `Registry.expandDataId`. 

259 """ 

260 registry = self.makeRegistry() 

261 dimensionName = "instrument" 

262 dimension = registry.dimensions[dimensionName] 

263 dimensionValue = { 

264 "name": "DummyCam", 

265 "visit_max": 10, 

266 "visit_system": 0, 

267 "exposure_max": 10, 

268 "detector_max": 2, 

269 "class_name": "lsst.pipe.base.Instrument", 

270 } 

271 registry.insertDimensionData(dimensionName, dimensionValue) 

272 # Inserting the same value twice should fail 

273 with self.assertRaises(sqlalchemy.exc.IntegrityError): 

274 registry.insertDimensionData(dimensionName, dimensionValue) 

275 # expandDataId should retrieve the record we just inserted 

276 self.assertEqual( 

277 registry.expandDataId(instrument="DummyCam", graph=dimension.graph) 

278 .records[dimensionName] 

279 .toDict(), 

280 dimensionValue, 

281 ) 

282 # expandDataId should raise if there is no record with the given ID. 

283 with self.assertRaises(DataIdValueError): 

284 registry.expandDataId({"instrument": "Unknown"}, graph=dimension.graph) 

285 # band doesn't have a table; insert should fail. 

286 with self.assertRaises(TypeError): 

287 registry.insertDimensionData("band", {"band": "i"}) 

288 dimensionName2 = "physical_filter" 

289 dimension2 = registry.dimensions[dimensionName2] 

290 dimensionValue2 = {"name": "DummyCam_i", "band": "i"} 

291 # Missing required dependency ("instrument") should fail 

292 with self.assertRaises(KeyError): 

293 registry.insertDimensionData(dimensionName2, dimensionValue2) 

294 # Adding required dependency should fix the failure 

295 dimensionValue2["instrument"] = "DummyCam" 

296 registry.insertDimensionData(dimensionName2, dimensionValue2) 

297 # expandDataId should retrieve the record we just inserted. 

298 self.assertEqual( 

299 registry.expandDataId(instrument="DummyCam", physical_filter="DummyCam_i", graph=dimension2.graph) 

300 .records[dimensionName2] 

301 .toDict(), 

302 dimensionValue2, 

303 ) 

304 # Use syncDimensionData to insert a new record successfully. 

305 dimensionName3 = "detector" 

306 dimensionValue3 = { 

307 "instrument": "DummyCam", 

308 "id": 1, 

309 "full_name": "one", 

310 "name_in_raft": "zero", 

311 "purpose": "SCIENCE", 

312 } 

313 self.assertTrue(registry.syncDimensionData(dimensionName3, dimensionValue3)) 

314 # Sync that again. Note that one field ("raft") is NULL, and that 

315 # should be okay. 

316 self.assertFalse(registry.syncDimensionData(dimensionName3, dimensionValue3)) 

317 # Now try that sync with the same primary key but a different value. 

318 # This should fail. 

319 with self.assertRaises(ConflictingDefinitionError): 

320 registry.syncDimensionData( 

321 dimensionName3, 

322 { 

323 "instrument": "DummyCam", 

324 "id": 1, 

325 "full_name": "one", 

326 "name_in_raft": "four", 

327 "purpose": "SCIENCE", 

328 }, 

329 ) 

330 

331 @unittest.skipIf(np is None, "numpy not available.") 

332 def testNumpyDataId(self): 

333 """Test that we can use a numpy int in a dataId.""" 

334 registry = self.makeRegistry() 

335 dimensionEntries = [ 

336 ("instrument", {"instrument": "DummyCam"}), 

337 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}), 

338 # Using an np.int64 here fails unless Records.fromDict is also 

339 # patched to look for numbers.Integral 

340 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}), 

341 ] 

342 for args in dimensionEntries: 

343 registry.insertDimensionData(*args) 

344 

345 # Try a normal integer and something that looks like an int but 

346 # is not. 

347 for visit_id in (42, np.int64(42)): 

348 with self.subTest(visit_id=visit_id, id_type=type(visit_id).__name__): 

349 expanded = registry.expandDataId({"instrument": "DummyCam", "visit": visit_id}) 

350 self.assertEqual(expanded["visit"], int(visit_id)) 

351 self.assertIsInstance(expanded["visit"], int) 

352 

353 def testDataIdRelationships(self): 

354 """Test that `Registry.expandDataId` raises an exception when the given 

355 keys are inconsistent. 

356 """ 

357 registry = self.makeRegistry() 

358 self.loadData(registry, "base.yaml") 

359 # Insert a few more dimension records for the next test. 

360 registry.insertDimensionData( 

361 "exposure", 

362 {"instrument": "Cam1", "id": 1, "obs_id": "one", "physical_filter": "Cam1-G"}, 

363 ) 

364 registry.insertDimensionData( 

365 "exposure", 

366 {"instrument": "Cam1", "id": 2, "obs_id": "two", "physical_filter": "Cam1-G"}, 

367 ) 

368 registry.insertDimensionData( 

369 "visit_system", 

370 {"instrument": "Cam1", "id": 0, "name": "one-to-one"}, 

371 ) 

372 registry.insertDimensionData( 

373 "visit", 

374 {"instrument": "Cam1", "id": 1, "name": "one", "physical_filter": "Cam1-G", "visit_system": 0}, 

375 ) 

376 registry.insertDimensionData( 

377 "visit_definition", 

378 {"instrument": "Cam1", "visit": 1, "exposure": 1, "visit_system": 0}, 

379 ) 

380 with self.assertRaises(InconsistentDataIdError): 

381 registry.expandDataId( 

382 {"instrument": "Cam1", "visit": 1, "exposure": 2}, 

383 ) 

384 

385 def testDataset(self): 

386 """Basic tests for `Registry.insertDatasets`, `Registry.getDataset`, 

387 and `Registry.removeDatasets`. 

388 """ 

389 registry = self.makeRegistry() 

390 self.loadData(registry, "base.yaml") 

391 run = "tésτ" 

392 registry.registerRun(run) 

393 datasetType = registry.getDatasetType("bias") 

394 dataId = {"instrument": "Cam1", "detector": 2} 

395 (ref,) = registry.insertDatasets(datasetType, dataIds=[dataId], run=run) 

396 outRef = registry.getDataset(ref.id) 

397 self.assertIsNotNone(ref.id) 

398 self.assertEqual(ref, outRef) 

399 with self.assertRaises(ConflictingDefinitionError): 

400 registry.insertDatasets(datasetType, dataIds=[dataId], run=run) 

401 registry.removeDatasets([ref]) 

402 self.assertIsNone(registry.findDataset(datasetType, dataId, collections=[run])) 

403 

404 def testFindDataset(self): 

405 """Tests for `Registry.findDataset`.""" 

406 registry = self.makeRegistry() 

407 self.loadData(registry, "base.yaml") 

408 run = "tésτ" 

409 datasetType = registry.getDatasetType("bias") 

410 dataId = {"instrument": "Cam1", "detector": 4} 

411 registry.registerRun(run) 

412 (inputRef,) = registry.insertDatasets(datasetType, dataIds=[dataId], run=run) 

413 outputRef = registry.findDataset(datasetType, dataId, collections=[run]) 

414 self.assertEqual(outputRef, inputRef) 

415 # Check that retrieval with invalid dataId raises 

416 with self.assertRaises(LookupError): 

417 dataId = {"instrument": "Cam1"} # no detector 

418 registry.findDataset(datasetType, dataId, collections=run) 

419 # Check that different dataIds match to different datasets 

420 dataId1 = {"instrument": "Cam1", "detector": 1} 

421 (inputRef1,) = registry.insertDatasets(datasetType, dataIds=[dataId1], run=run) 

422 dataId2 = {"instrument": "Cam1", "detector": 2} 

423 (inputRef2,) = registry.insertDatasets(datasetType, dataIds=[dataId2], run=run) 

424 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef1) 

425 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef2) 

426 self.assertNotEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef2) 

427 self.assertNotEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef1) 

428 # Check that requesting a non-existing dataId returns None 

429 nonExistingDataId = {"instrument": "Cam1", "detector": 3} 

430 self.assertIsNone(registry.findDataset(datasetType, nonExistingDataId, collections=run)) 

431 # Search more than one collection, in which two have the right 

432 # dataset type and another does not. 

433 registry.registerRun("empty") 

434 self.loadData(registry, "datasets-uuid.yaml") 

435 bias1 = registry.findDataset("bias", instrument="Cam1", detector=2, collections=["imported_g"]) 

436 self.assertIsNotNone(bias1) 

437 bias2 = registry.findDataset("bias", instrument="Cam1", detector=2, collections=["imported_r"]) 

438 self.assertIsNotNone(bias2) 

439 self.assertEqual( 

440 bias1, 

441 registry.findDataset( 

442 "bias", instrument="Cam1", detector=2, collections=["empty", "imported_g", "imported_r"] 

443 ), 

444 ) 

445 self.assertEqual( 

446 bias2, 

447 registry.findDataset( 

448 "bias", instrument="Cam1", detector=2, collections=["empty", "imported_r", "imported_g"] 

449 ), 

450 ) 

451 # Search more than one collection, with one of them a CALIBRATION 

452 # collection. 

453 registry.registerCollection("Cam1/calib", CollectionType.CALIBRATION) 

454 timespan = Timespan( 

455 begin=astropy.time.Time("2020-01-01T01:00:00", format="isot", scale="tai"), 

456 end=astropy.time.Time("2020-01-01T02:00:00", format="isot", scale="tai"), 

457 ) 

458 registry.certify("Cam1/calib", [bias2], timespan=timespan) 

459 self.assertEqual( 

460 bias1, 

461 registry.findDataset( 

462 "bias", 

463 instrument="Cam1", 

464 detector=2, 

465 collections=["empty", "imported_g", "Cam1/calib"], 

466 timespan=timespan, 

467 ), 

468 ) 

469 self.assertEqual( 

470 bias2, 

471 registry.findDataset( 

472 "bias", 

473 instrument="Cam1", 

474 detector=2, 

475 collections=["empty", "Cam1/calib", "imported_g"], 

476 timespan=timespan, 

477 ), 

478 ) 

479 # If we try to search those same collections without a timespan, it 

480 # should still work, since the CALIBRATION collection is ignored. 

481 self.assertEqual( 

482 bias1, 

483 registry.findDataset( 

484 "bias", instrument="Cam1", detector=2, collections=["empty", "imported_g", "Cam1/calib"] 

485 ), 

486 ) 

487 self.assertEqual( 

488 bias1, 

489 registry.findDataset( 

490 "bias", instrument="Cam1", detector=2, collections=["empty", "Cam1/calib", "imported_g"] 

491 ), 

492 ) 

493 

494 def testRemoveDatasetTypeSuccess(self): 

495 """Test that Registry.removeDatasetType works when there are no 

496 datasets of that type present. 

497 """ 

498 registry = self.makeRegistry() 

499 self.loadData(registry, "base.yaml") 

500 registry.removeDatasetType("flat") 

501 with self.assertRaises(MissingDatasetTypeError): 

502 registry.getDatasetType("flat") 

503 

504 def testRemoveDatasetTypeFailure(self): 

505 """Test that Registry.removeDatasetType raises when there are datasets 

506 of that type present or if the dataset type is for a component. 

507 """ 

508 registry = self.makeRegistry() 

509 self.loadData(registry, "base.yaml") 

510 self.loadData(registry, "datasets.yaml") 

511 with self.assertRaises(OrphanedRecordError): 

512 registry.removeDatasetType("flat") 

513 with self.assertRaises(ValueError): 

514 registry.removeDatasetType(DatasetType.nameWithComponent("flat", "image")) 

515 

516 def testImportDatasetsUUID(self): 

517 """Test for `Registry._importDatasets` with UUID dataset ID.""" 

518 if not self.datasetsManager.endswith(".ByDimensionsDatasetRecordStorageManagerUUID"): 

519 self.skipTest(f"Unexpected dataset manager {self.datasetsManager}") 

520 

521 registry = self.makeRegistry() 

522 self.loadData(registry, "base.yaml") 

523 for run in range(6): 

524 registry.registerRun(f"run{run}") 

525 datasetTypeBias = registry.getDatasetType("bias") 

526 datasetTypeFlat = registry.getDatasetType("flat") 

527 dataIdBias1 = {"instrument": "Cam1", "detector": 1} 

528 dataIdBias2 = {"instrument": "Cam1", "detector": 2} 

529 dataIdFlat1 = {"instrument": "Cam1", "detector": 1, "physical_filter": "Cam1-G", "band": "g"} 

530 

531 dataset_id = uuid.uuid4() 

532 ref = DatasetRef(datasetTypeBias, dataIdBias1, id=dataset_id, run="run0") 

533 (ref1,) = registry._importDatasets([ref]) 

534 # UUID is used without change 

535 self.assertEqual(ref.id, ref1.id) 

536 

537 # All different failure modes 

538 refs = ( 

539 # Importing same DatasetRef with different dataset ID is an error 

540 DatasetRef(datasetTypeBias, dataIdBias1, id=uuid.uuid4(), run="run0"), 

541 # Same DatasetId but different DataId 

542 DatasetRef(datasetTypeBias, dataIdBias2, id=ref1.id, run="run0"), 

543 DatasetRef(datasetTypeFlat, dataIdFlat1, id=ref1.id, run="run0"), 

544 # Same DatasetRef and DatasetId but different run 

545 DatasetRef(datasetTypeBias, dataIdBias1, id=ref1.id, run="run1"), 

546 ) 

547 for ref in refs: 

548 with self.assertRaises(ConflictingDefinitionError): 

549 registry._importDatasets([ref]) 

550 

551 # Test for non-unique IDs, they can be re-imported multiple times. 

552 for run, idGenMode in ((2, DatasetIdGenEnum.DATAID_TYPE), (4, DatasetIdGenEnum.DATAID_TYPE_RUN)): 

553 with self.subTest(idGenMode=idGenMode): 

554 # Use integer dataset ID to force UUID calculation in _import 

555 ref = DatasetRef(datasetTypeBias, dataIdBias1, id=0, run=f"run{run}") 

556 (ref1,) = registry._importDatasets([ref], idGenerationMode=idGenMode) 

557 self.assertIsInstance(ref1.id, uuid.UUID) 

558 self.assertEqual(ref1.id.version, 5) 

559 

560 # Importing it again is OK 

561 (ref2,) = registry._importDatasets([ref1]) 

562 self.assertEqual(ref2.id, ref1.id) 

563 

564 # Cannot import to different run with the same ID 

565 ref = DatasetRef(datasetTypeBias, dataIdBias1, id=ref1.id, run=f"run{run+1}") 

566 with self.assertRaises(ConflictingDefinitionError): 

567 registry._importDatasets([ref]) 

568 

569 ref = DatasetRef(datasetTypeBias, dataIdBias1, id=0, run=f"run{run+1}") 

570 if idGenMode is DatasetIdGenEnum.DATAID_TYPE: 

571 # Cannot import same DATAID_TYPE ref into a new run 

572 with self.assertRaises(ConflictingDefinitionError): 

573 (ref2,) = registry._importDatasets([ref], idGenerationMode=idGenMode) 

574 else: 

575 # DATAID_TYPE_RUN ref can be imported into a new run 

576 (ref2,) = registry._importDatasets([ref], idGenerationMode=idGenMode) 

577 

578 def testDatasetTypeComponentQueries(self): 

579 """Test component options when querying for dataset types. 

580 

581 All of the behavior here is deprecated, so many of these tests are 

582 currently wrapped in a context to check that we get a warning whenever 

583 a component dataset is actually returned. 

584 """ 

585 registry = self.makeRegistry() 

586 self.loadData(registry, "base.yaml") 

587 self.loadData(registry, "datasets.yaml") 

588 # Test querying for dataset types with different inputs. 

589 # First query for all dataset types; components should only be included 

590 # when components=True. 

591 self.assertEqual({"bias", "flat"}, NamedValueSet(registry.queryDatasetTypes()).names) 

592 self.assertEqual({"bias", "flat"}, NamedValueSet(registry.queryDatasetTypes(components=False)).names) 

593 with self.assertWarns(FutureWarning): 

594 self.assertLess( 

595 {"bias", "flat", "bias.wcs", "flat.photoCalib"}, 

596 NamedValueSet(registry.queryDatasetTypes(components=True)).names, 

597 ) 

598 # Use a pattern that can match either parent or components. Again, 

599 # components are only returned if components=True. 

600 self.assertEqual({"bias"}, NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"))).names) 

601 self.assertEqual( 

602 {"bias"}, NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"), components=False)).names 

603 ) 

604 with self.assertWarns(FutureWarning): 

605 self.assertLess( 

606 {"bias", "bias.wcs"}, 

607 NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"), components=True)).names, 

608 ) 

609 # This pattern matches only a component. In this case we also return 

610 # that component dataset type if components=None. 

611 with self.assertWarns(FutureWarning): 

612 self.assertEqual( 

613 {"bias.wcs"}, NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"))).names 

614 ) 

615 self.assertEqual( 

616 set(), 

617 NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"), components=False)).names, 

618 ) 

619 with self.assertWarns(FutureWarning): 

620 self.assertEqual( 

621 {"bias.wcs"}, 

622 NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"), components=True)).names, 

623 ) 

624 # Add a dataset type using a StorageClass that we'll then remove; check 

625 # that this does not affect our ability to query for dataset types 

626 # (though it will warn). 

627 tempStorageClass = StorageClass( 

628 name="TempStorageClass", 

629 components={ 

630 "data1": registry.storageClasses.getStorageClass("StructuredDataDict"), 

631 "data2": registry.storageClasses.getStorageClass("StructuredDataDict"), 

632 }, 

633 ) 

634 registry.storageClasses.registerStorageClass(tempStorageClass) 

635 datasetType = DatasetType( 

636 "temporary", 

637 dimensions=["instrument"], 

638 storageClass=tempStorageClass, 

639 universe=registry.dimensions, 

640 ) 

641 registry.registerDatasetType(datasetType) 

642 registry.storageClasses._unregisterStorageClass(tempStorageClass.name) 

643 datasetType._storageClass = None 

644 del tempStorageClass 

645 # Querying for all dataset types, including components, should include 

646 # at least all non-component dataset types (and I don't want to 

647 # enumerate all of the Exposure components for bias and flat here). 

648 with self.assertWarns(FutureWarning): 

649 with self.assertLogs("lsst.daf.butler.registry", logging.WARN) as cm: 

650 everything = NamedValueSet(registry.queryDatasetTypes(components=True)) 

651 self.assertIn("TempStorageClass", cm.output[0]) 

652 self.assertLess({"bias", "flat", "temporary"}, everything.names) 

653 # It should not include "temporary.columns", because we tried to remove 

654 # the storage class that would tell it about that. So if the next line 

655 # fails (i.e. "temporary.columns" _is_ in everything.names), it means 

656 # this part of the test isn't doing anything, because the _unregister 

657 # call about isn't simulating the real-life case we want it to 

658 # simulate, in which different versions of daf_butler in entirely 

659 # different Python processes interact with the same repo. 

660 self.assertNotIn("temporary.data", everything.names) 

661 # Query for dataset types that start with "temp". This should again 

662 # not include the component, and also not fail. 

663 with self.assertLogs("lsst.daf.butler.registry", logging.WARN) as cm: 

664 startsWithTemp = NamedValueSet(registry.queryDatasetTypes(re.compile("temp.*"), components=True)) 

665 self.assertIn("TempStorageClass", cm.output[0]) 

666 self.assertEqual({"temporary"}, startsWithTemp.names) 

667 # Querying with no components should not warn at all. 

668 with self.assertLogs("lsst.daf.butler.registries", logging.WARN) as cm: 

669 startsWithTemp = NamedValueSet(registry.queryDatasetTypes(re.compile("temp.*"), components=False)) 

670 # Must issue a warning of our own to be captured. 

671 logging.getLogger("lsst.daf.butler.registries").warning("test message") 

672 self.assertEqual(len(cm.output), 1) 

673 self.assertIn("test message", cm.output[0]) 

674 

675 def testComponentLookups(self): 

676 """Test searching for component datasets via their parents. 

677 

678 All of the behavior here is deprecated, so many of these tests are 

679 currently wrapped in a context to check that we get a warning whenever 

680 a component dataset is actually returned. 

681 """ 

682 registry = self.makeRegistry() 

683 self.loadData(registry, "base.yaml") 

684 self.loadData(registry, "datasets.yaml") 

685 # Test getting the child dataset type (which does still exist in the 

686 # Registry), and check for consistency with 

687 # DatasetRef.makeComponentRef. 

688 collection = "imported_g" 

689 parentType = registry.getDatasetType("bias") 

690 childType = registry.getDatasetType("bias.wcs") 

691 parentRefResolved = registry.findDataset( 

692 parentType, collections=collection, instrument="Cam1", detector=1 

693 ) 

694 self.assertIsInstance(parentRefResolved, DatasetRef) 

695 self.assertEqual(childType, parentRefResolved.makeComponentRef("wcs").datasetType) 

696 # Search for a single dataset with findDataset. 

697 childRef1 = registry.findDataset("bias.wcs", collections=collection, dataId=parentRefResolved.dataId) 

698 self.assertEqual(childRef1, parentRefResolved.makeComponentRef("wcs")) 

699 # Search for detector data IDs constrained by component dataset 

700 # existence with queryDataIds. 

701 with self.assertWarns(FutureWarning): 

702 dataIds = registry.queryDataIds( 

703 ["detector"], 

704 datasets=["bias.wcs"], 

705 collections=collection, 

706 ).toSet() 

707 self.assertEqual( 

708 dataIds, 

709 DataCoordinateSet( 

710 { 

711 DataCoordinate.standardize(instrument="Cam1", detector=d, graph=parentType.dimensions) 

712 for d in (1, 2, 3) 

713 }, 

714 parentType.dimensions, 

715 ), 

716 ) 

717 # Search for multiple datasets of a single type with queryDatasets. 

718 with self.assertWarns(FutureWarning): 

719 childRefs2 = set( 

720 registry.queryDatasets( 

721 "bias.wcs", 

722 collections=collection, 

723 ) 

724 ) 

725 self.assertEqual( 

726 {ref.unresolved() for ref in childRefs2}, {DatasetRef(childType, dataId) for dataId in dataIds} 

727 ) 

728 

729 def testCollections(self): 

730 """Tests for registry methods that manage collections.""" 

731 registry = self.makeRegistry() 

732 other_registry = self.makeRegistry(share_repo_with=registry) 

733 self.loadData(registry, "base.yaml") 

734 self.loadData(registry, "datasets.yaml") 

735 run1 = "imported_g" 

736 run2 = "imported_r" 

737 # Test setting a collection docstring after it has been created. 

738 registry.setCollectionDocumentation(run1, "doc for run1") 

739 self.assertEqual(registry.getCollectionDocumentation(run1), "doc for run1") 

740 registry.setCollectionDocumentation(run1, None) 

741 self.assertIsNone(registry.getCollectionDocumentation(run1)) 

742 datasetType = "bias" 

743 # Find some datasets via their run's collection. 

744 dataId1 = {"instrument": "Cam1", "detector": 1} 

745 ref1 = registry.findDataset(datasetType, dataId1, collections=run1) 

746 self.assertIsNotNone(ref1) 

747 dataId2 = {"instrument": "Cam1", "detector": 2} 

748 ref2 = registry.findDataset(datasetType, dataId2, collections=run1) 

749 self.assertIsNotNone(ref2) 

750 # Associate those into a new collection, then look for them there. 

751 tag1 = "tag1" 

752 registry.registerCollection(tag1, type=CollectionType.TAGGED, doc="doc for tag1") 

753 # Check that we can query for old and new collections by type. 

754 self.assertEqual(set(registry.queryCollections(collectionTypes=CollectionType.RUN)), {run1, run2}) 

755 self.assertEqual( 

756 set(registry.queryCollections(collectionTypes={CollectionType.TAGGED, CollectionType.RUN})), 

757 {tag1, run1, run2}, 

758 ) 

759 self.assertEqual(registry.getCollectionDocumentation(tag1), "doc for tag1") 

760 registry.associate(tag1, [ref1, ref2]) 

761 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1) 

762 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

763 # Disassociate one and verify that we can't it there anymore... 

764 registry.disassociate(tag1, [ref1]) 

765 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=tag1)) 

766 # ...but we can still find ref2 in tag1, and ref1 in the run. 

767 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run1), ref1) 

768 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

769 collections = set(registry.queryCollections()) 

770 self.assertEqual(collections, {run1, run2, tag1}) 

771 # Associate both refs into tag1 again; ref2 is already there, but that 

772 # should be a harmless no-op. 

773 registry.associate(tag1, [ref1, ref2]) 

774 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1) 

775 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

776 # Get a different dataset (from a different run) that has the same 

777 # dataset type and data ID as ref2. 

778 ref2b = registry.findDataset(datasetType, dataId2, collections=run2) 

779 self.assertNotEqual(ref2, ref2b) 

780 # Attempting to associate that into tag1 should be an error. 

781 with self.assertRaises(ConflictingDefinitionError): 

782 registry.associate(tag1, [ref2b]) 

783 # That error shouldn't have messed up what we had before. 

784 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1) 

785 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

786 # Attempt to associate the conflicting dataset again, this time with 

787 # a dataset that isn't in the collection and won't cause a conflict. 

788 # Should also fail without modifying anything. 

789 dataId3 = {"instrument": "Cam1", "detector": 3} 

790 ref3 = registry.findDataset(datasetType, dataId3, collections=run1) 

791 with self.assertRaises(ConflictingDefinitionError): 

792 registry.associate(tag1, [ref3, ref2b]) 

793 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1) 

794 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

795 self.assertIsNone(registry.findDataset(datasetType, dataId3, collections=tag1)) 

796 # Register a chained collection that searches [tag1, run2] 

797 chain1 = "chain1" 

798 registry.registerCollection(chain1, type=CollectionType.CHAINED) 

799 self.assertIs(registry.getCollectionType(chain1), CollectionType.CHAINED) 

800 # Chained collection exists, but has no collections in it. 

801 self.assertFalse(registry.getCollectionChain(chain1)) 

802 # If we query for all collections, we should get the chained collection 

803 # only if we don't ask to flatten it (i.e. yield only its children). 

804 self.assertEqual(set(registry.queryCollections(flattenChains=False)), {tag1, run1, run2, chain1}) 

805 self.assertEqual(set(registry.queryCollections(flattenChains=True)), {tag1, run1, run2}) 

806 # Attempt to set its child collections to something circular; that 

807 # should fail. 

808 with self.assertRaises(ValueError): 

809 registry.setCollectionChain(chain1, [tag1, chain1]) 

810 # Add the child collections. 

811 registry.setCollectionChain(chain1, [tag1, run2]) 

812 self.assertEqual(list(registry.getCollectionChain(chain1)), [tag1, run2]) 

813 self.assertEqual(registry.getCollectionParentChains(tag1), {chain1}) 

814 self.assertEqual(registry.getCollectionParentChains(run2), {chain1}) 

815 # Refresh the other registry that points to the same repo, and make 

816 # sure it can see the things we've done (note that this does require 

817 # an explicit refresh(); that's the documented behavior, because 

818 # caching is ~impossible otherwise). 

819 if other_registry is not None: 

820 other_registry.refresh() 

821 self.assertEqual(list(other_registry.getCollectionChain(chain1)), [tag1, run2]) 

822 self.assertEqual(other_registry.getCollectionParentChains(tag1), {chain1}) 

823 self.assertEqual(other_registry.getCollectionParentChains(run2), {chain1}) 

824 # Searching for dataId1 or dataId2 in the chain should return ref1 and 

825 # ref2, because both are in tag1. 

826 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain1), ref1) 

827 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain1), ref2) 

828 # Now disassociate ref2 from tag1. The search (for bias) with 

829 # dataId2 in chain1 should then: 

830 # 1. not find it in tag1 

831 # 2. find a different dataset in run2 

832 registry.disassociate(tag1, [ref2]) 

833 ref2b = registry.findDataset(datasetType, dataId2, collections=chain1) 

834 self.assertNotEqual(ref2b, ref2) 

835 self.assertEqual(ref2b, registry.findDataset(datasetType, dataId2, collections=run2)) 

836 # Define a new chain so we can test recursive chains. 

837 chain2 = "chain2" 

838 registry.registerCollection(chain2, type=CollectionType.CHAINED) 

839 registry.setCollectionChain(chain2, [run2, chain1]) 

840 self.assertEqual(registry.getCollectionParentChains(chain1), {chain2}) 

841 self.assertEqual(registry.getCollectionParentChains(run2), {chain1, chain2}) 

842 # Query for collections matching a regex. 

843 self.assertCountEqual( 

844 list(registry.queryCollections(re.compile("imported_."), flattenChains=False)), 

845 ["imported_r", "imported_g"], 

846 ) 

847 # Query for collections matching a regex or an explicit str. 

848 self.assertCountEqual( 

849 list(registry.queryCollections([re.compile("imported_."), "chain1"], flattenChains=False)), 

850 ["imported_r", "imported_g", "chain1"], 

851 ) 

852 # Search for bias with dataId1 should find it via tag1 in chain2, 

853 # recursing, because is not in run1. 

854 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=run2)) 

855 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain2), ref1) 

856 # Search for bias with dataId2 should find it in run2 (ref2b). 

857 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain2), ref2b) 

858 # Search for a flat that is in run2. That should not be found 

859 # at the front of chain2, because of the restriction to bias 

860 # on run2 there, but it should be found in at the end of chain1. 

861 dataId4 = {"instrument": "Cam1", "detector": 3, "physical_filter": "Cam1-R2"} 

862 ref4 = registry.findDataset("flat", dataId4, collections=run2) 

863 self.assertIsNotNone(ref4) 

864 self.assertEqual(ref4, registry.findDataset("flat", dataId4, collections=chain2)) 

865 # Deleting a collection that's part of a CHAINED collection is not 

866 # allowed, and is exception-safe. 

867 with self.assertRaises(Exception): 

868 registry.removeCollection(run2) 

869 self.assertEqual(registry.getCollectionType(run2), CollectionType.RUN) 

870 with self.assertRaises(Exception): 

871 registry.removeCollection(chain1) 

872 self.assertEqual(registry.getCollectionType(chain1), CollectionType.CHAINED) 

873 # Actually remove chain2, test that it's gone by asking for its type. 

874 registry.removeCollection(chain2) 

875 with self.assertRaises(MissingCollectionError): 

876 registry.getCollectionType(chain2) 

877 # Actually remove run2 and chain1, which should work now. 

878 registry.removeCollection(chain1) 

879 registry.removeCollection(run2) 

880 with self.assertRaises(MissingCollectionError): 

881 registry.getCollectionType(run2) 

882 with self.assertRaises(MissingCollectionError): 

883 registry.getCollectionType(chain1) 

884 # Remove tag1 as well, just to test that we can remove TAGGED 

885 # collections. 

886 registry.removeCollection(tag1) 

887 with self.assertRaises(MissingCollectionError): 

888 registry.getCollectionType(tag1) 

889 

890 def testCollectionChainFlatten(self): 

891 """Test that Registry.setCollectionChain obeys its 'flatten' option.""" 

892 registry = self.makeRegistry() 

893 registry.registerCollection("inner", CollectionType.CHAINED) 

894 registry.registerCollection("innermost", CollectionType.RUN) 

895 registry.setCollectionChain("inner", ["innermost"]) 

896 registry.registerCollection("outer", CollectionType.CHAINED) 

897 registry.setCollectionChain("outer", ["inner"], flatten=False) 

898 self.assertEqual(list(registry.getCollectionChain("outer")), ["inner"]) 

899 registry.setCollectionChain("outer", ["inner"], flatten=True) 

900 self.assertEqual(list(registry.getCollectionChain("outer")), ["innermost"]) 

901 

902 def testBasicTransaction(self): 

903 """Test that all operations within a single transaction block are 

904 rolled back if an exception propagates out of the block. 

905 """ 

906 registry = self.makeRegistry() 

907 storageClass = StorageClass("testDatasetType") 

908 registry.storageClasses.registerStorageClass(storageClass) 

909 with registry.transaction(): 

910 registry.insertDimensionData("instrument", {"name": "Cam1", "class_name": "A"}) 

911 with self.assertRaises(ValueError): 

912 with registry.transaction(): 

913 registry.insertDimensionData("instrument", {"name": "Cam2"}) 

914 raise ValueError("Oops, something went wrong") 

915 # Cam1 should exist 

916 self.assertEqual(registry.expandDataId(instrument="Cam1").records["instrument"].class_name, "A") 

917 # But Cam2 and Cam3 should both not exist 

918 with self.assertRaises(DataIdValueError): 

919 registry.expandDataId(instrument="Cam2") 

920 with self.assertRaises(DataIdValueError): 

921 registry.expandDataId(instrument="Cam3") 

922 

923 def testNestedTransaction(self): 

924 """Test that operations within a transaction block are not rolled back 

925 if an exception propagates out of an inner transaction block and is 

926 then caught. 

927 """ 

928 registry = self.makeRegistry() 

929 dimension = registry.dimensions["instrument"] 

930 dataId1 = {"instrument": "DummyCam"} 

931 dataId2 = {"instrument": "DummyCam2"} 

932 checkpointReached = False 

933 with registry.transaction(): 

934 # This should be added and (ultimately) committed. 

935 registry.insertDimensionData(dimension, dataId1) 

936 with self.assertRaises(sqlalchemy.exc.IntegrityError): 

937 with registry.transaction(savepoint=True): 

938 # This does not conflict, and should succeed (but not 

939 # be committed). 

940 registry.insertDimensionData(dimension, dataId2) 

941 checkpointReached = True 

942 # This should conflict and raise, triggerring a rollback 

943 # of the previous insertion within the same transaction 

944 # context, but not the original insertion in the outer 

945 # block. 

946 registry.insertDimensionData(dimension, dataId1) 

947 self.assertTrue(checkpointReached) 

948 self.assertIsNotNone(registry.expandDataId(dataId1, graph=dimension.graph)) 

949 with self.assertRaises(DataIdValueError): 

950 registry.expandDataId(dataId2, graph=dimension.graph) 

951 

952 def testInstrumentDimensions(self): 

953 """Test queries involving only instrument dimensions, with no joins to 

954 skymap.""" 

955 registry = self.makeRegistry() 

956 

957 # need a bunch of dimensions and datasets for test 

958 registry.insertDimensionData( 

959 "instrument", dict(name="DummyCam", visit_max=25, exposure_max=300, detector_max=6) 

960 ) 

961 registry.insertDimensionData( 

962 "physical_filter", 

963 dict(instrument="DummyCam", name="dummy_r", band="r"), 

964 dict(instrument="DummyCam", name="dummy_i", band="i"), 

965 ) 

966 registry.insertDimensionData( 

967 "detector", *[dict(instrument="DummyCam", id=i, full_name=str(i)) for i in range(1, 6)] 

968 ) 

969 registry.insertDimensionData( 

970 "visit_system", 

971 dict(instrument="DummyCam", id=1, name="default"), 

972 ) 

973 registry.insertDimensionData( 

974 "visit", 

975 dict(instrument="DummyCam", id=10, name="ten", physical_filter="dummy_i", visit_system=1), 

976 dict(instrument="DummyCam", id=11, name="eleven", physical_filter="dummy_r", visit_system=1), 

977 dict(instrument="DummyCam", id=20, name="twelve", physical_filter="dummy_r", visit_system=1), 

978 ) 

979 for i in range(1, 6): 

980 registry.insertDimensionData( 

981 "visit_detector_region", 

982 dict(instrument="DummyCam", visit=10, detector=i), 

983 dict(instrument="DummyCam", visit=11, detector=i), 

984 dict(instrument="DummyCam", visit=20, detector=i), 

985 ) 

986 registry.insertDimensionData( 

987 "exposure", 

988 dict(instrument="DummyCam", id=100, obs_id="100", physical_filter="dummy_i"), 

989 dict(instrument="DummyCam", id=101, obs_id="101", physical_filter="dummy_i"), 

990 dict(instrument="DummyCam", id=110, obs_id="110", physical_filter="dummy_r"), 

991 dict(instrument="DummyCam", id=111, obs_id="111", physical_filter="dummy_r"), 

992 dict(instrument="DummyCam", id=200, obs_id="200", physical_filter="dummy_r"), 

993 dict(instrument="DummyCam", id=201, obs_id="201", physical_filter="dummy_r"), 

994 ) 

995 registry.insertDimensionData( 

996 "visit_definition", 

997 dict(instrument="DummyCam", exposure=100, visit_system=1, visit=10), 

998 dict(instrument="DummyCam", exposure=101, visit_system=1, visit=10), 

999 dict(instrument="DummyCam", exposure=110, visit_system=1, visit=11), 

1000 dict(instrument="DummyCam", exposure=111, visit_system=1, visit=11), 

1001 dict(instrument="DummyCam", exposure=200, visit_system=1, visit=20), 

1002 dict(instrument="DummyCam", exposure=201, visit_system=1, visit=20), 

1003 ) 

1004 # dataset types 

1005 run1 = "test1_r" 

1006 run2 = "test2_r" 

1007 tagged2 = "test2_t" 

1008 registry.registerRun(run1) 

1009 registry.registerRun(run2) 

1010 registry.registerCollection(tagged2) 

1011 storageClass = StorageClass("testDataset") 

1012 registry.storageClasses.registerStorageClass(storageClass) 

1013 rawType = DatasetType( 

1014 name="RAW", 

1015 dimensions=registry.dimensions.extract(("instrument", "exposure", "detector")), 

1016 storageClass=storageClass, 

1017 ) 

1018 registry.registerDatasetType(rawType) 

1019 calexpType = DatasetType( 

1020 name="CALEXP", 

1021 dimensions=registry.dimensions.extract(("instrument", "visit", "detector")), 

1022 storageClass=storageClass, 

1023 ) 

1024 registry.registerDatasetType(calexpType) 

1025 

1026 # add pre-existing datasets 

1027 for exposure in (100, 101, 110, 111): 

1028 for detector in (1, 2, 3): 

1029 # note that only 3 of 5 detectors have datasets 

1030 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector) 

1031 (ref,) = registry.insertDatasets(rawType, dataIds=[dataId], run=run1) 

1032 # exposures 100 and 101 appear in both run1 and tagged2. 

1033 # 100 has different datasets in the different collections 

1034 # 101 has the same dataset in both collections. 

1035 if exposure == 100: 

1036 (ref,) = registry.insertDatasets(rawType, dataIds=[dataId], run=run2) 

1037 if exposure in (100, 101): 

1038 registry.associate(tagged2, [ref]) 

1039 # Add pre-existing datasets to tagged2. 

1040 for exposure in (200, 201): 

1041 for detector in (3, 4, 5): 

1042 # note that only 3 of 5 detectors have datasets 

1043 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector) 

1044 (ref,) = registry.insertDatasets(rawType, dataIds=[dataId], run=run2) 

1045 registry.associate(tagged2, [ref]) 

1046 

1047 dimensions = DimensionGraph( 

1048 registry.dimensions, dimensions=(rawType.dimensions.required | calexpType.dimensions.required) 

1049 ) 

1050 # Test that single dim string works as well as list of str 

1051 rows = registry.queryDataIds("visit", datasets=rawType, collections=run1).expanded().toSet() 

1052 rowsI = registry.queryDataIds(["visit"], datasets=rawType, collections=run1).expanded().toSet() 

1053 self.assertEqual(rows, rowsI) 

1054 # with empty expression 

1055 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1).expanded().toSet() 

1056 self.assertEqual(len(rows), 4 * 3) # 4 exposures times 3 detectors 

1057 for dataId in rows: 

1058 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit")) 

1059 packer1 = registry.dimensions.makePacker("visit_detector", dataId) 

1060 packer2 = registry.dimensions.makePacker("exposure_detector", dataId) 

1061 self.assertEqual( 

1062 packer1.unpack(packer1.pack(dataId)), 

1063 DataCoordinate.standardize(dataId, graph=packer1.dimensions), 

1064 ) 

1065 self.assertEqual( 

1066 packer2.unpack(packer2.pack(dataId)), 

1067 DataCoordinate.standardize(dataId, graph=packer2.dimensions), 

1068 ) 

1069 self.assertNotEqual(packer1.pack(dataId), packer2.pack(dataId)) 

1070 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101, 110, 111)) 

1071 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 11)) 

1072 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3)) 

1073 

1074 # second collection 

1075 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=tagged2).toSet() 

1076 self.assertEqual(len(rows), 4 * 3) # 4 exposures times 3 detectors 

1077 for dataId in rows: 

1078 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit")) 

1079 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101, 200, 201)) 

1080 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 20)) 

1081 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3, 4, 5)) 

1082 

1083 # with two input datasets 

1084 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=[run1, tagged2]).toSet() 

1085 self.assertEqual(len(set(rows)), 6 * 3) # 6 exposures times 3 detectors; set needed to de-dupe 

1086 for dataId in rows: 

1087 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit")) 

1088 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101, 110, 111, 200, 201)) 

1089 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 11, 20)) 

1090 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3, 4, 5)) 

1091 

1092 # limit to single visit 

1093 rows = registry.queryDataIds( 

1094 dimensions, datasets=rawType, collections=run1, where="visit = 10", instrument="DummyCam" 

1095 ).toSet() 

1096 self.assertEqual(len(rows), 2 * 3) # 2 exposures times 3 detectors 

1097 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101)) 

1098 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10,)) 

1099 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3)) 

1100 

1101 # more limiting expression, using link names instead of Table.column 

1102 rows = registry.queryDataIds( 

1103 dimensions, 

1104 datasets=rawType, 

1105 collections=run1, 

1106 where="visit = 10 and detector > 1 and 'DummyCam'=instrument", 

1107 ).toSet() 

1108 self.assertEqual(len(rows), 2 * 2) # 2 exposures times 2 detectors 

1109 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101)) 

1110 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10,)) 

1111 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (2, 3)) 

1112 

1113 # queryDataIds with only one of `datasets` and `collections` is an 

1114 # error. 

1115 with self.assertRaises(CollectionError): 

1116 registry.queryDataIds(dimensions, datasets=rawType) 

1117 with self.assertRaises(ArgumentError): 

1118 registry.queryDataIds(dimensions, collections=run1) 

1119 

1120 # expression excludes everything 

1121 rows = registry.queryDataIds( 

1122 dimensions, datasets=rawType, collections=run1, where="visit > 1000", instrument="DummyCam" 

1123 ).toSet() 

1124 self.assertEqual(len(rows), 0) 

1125 

1126 # Selecting by physical_filter, this is not in the dimensions, but it 

1127 # is a part of the full expression so it should work too. 

1128 rows = registry.queryDataIds( 

1129 dimensions, 

1130 datasets=rawType, 

1131 collections=run1, 

1132 where="physical_filter = 'dummy_r'", 

1133 instrument="DummyCam", 

1134 ).toSet() 

1135 self.assertEqual(len(rows), 2 * 3) # 2 exposures times 3 detectors 

1136 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (110, 111)) 

1137 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (11,)) 

1138 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3)) 

1139 

1140 def testSkyMapDimensions(self): 

1141 """Tests involving only skymap dimensions, no joins to instrument.""" 

1142 registry = self.makeRegistry() 

1143 

1144 # need a bunch of dimensions and datasets for test, we want 

1145 # "band" in the test so also have to add physical_filter 

1146 # dimensions 

1147 registry.insertDimensionData("instrument", dict(instrument="DummyCam")) 

1148 registry.insertDimensionData( 

1149 "physical_filter", 

1150 dict(instrument="DummyCam", name="dummy_r", band="r"), 

1151 dict(instrument="DummyCam", name="dummy_i", band="i"), 

1152 ) 

1153 registry.insertDimensionData("skymap", dict(name="DummyMap", hash="sha!".encode("utf8"))) 

1154 for tract in range(10): 

1155 registry.insertDimensionData("tract", dict(skymap="DummyMap", id=tract)) 

1156 registry.insertDimensionData( 

1157 "patch", 

1158 *[dict(skymap="DummyMap", tract=tract, id=patch, cell_x=0, cell_y=0) for patch in range(10)], 

1159 ) 

1160 

1161 # dataset types 

1162 run = "tésτ" 

1163 registry.registerRun(run) 

1164 storageClass = StorageClass("testDataset") 

1165 registry.storageClasses.registerStorageClass(storageClass) 

1166 calexpType = DatasetType( 

1167 name="deepCoadd_calexp", 

1168 dimensions=registry.dimensions.extract(("skymap", "tract", "patch", "band")), 

1169 storageClass=storageClass, 

1170 ) 

1171 registry.registerDatasetType(calexpType) 

1172 mergeType = DatasetType( 

1173 name="deepCoadd_mergeDet", 

1174 dimensions=registry.dimensions.extract(("skymap", "tract", "patch")), 

1175 storageClass=storageClass, 

1176 ) 

1177 registry.registerDatasetType(mergeType) 

1178 measType = DatasetType( 

1179 name="deepCoadd_meas", 

1180 dimensions=registry.dimensions.extract(("skymap", "tract", "patch", "band")), 

1181 storageClass=storageClass, 

1182 ) 

1183 registry.registerDatasetType(measType) 

1184 

1185 dimensions = DimensionGraph( 

1186 registry.dimensions, 

1187 dimensions=( 

1188 calexpType.dimensions.required | mergeType.dimensions.required | measType.dimensions.required 

1189 ), 

1190 ) 

1191 

1192 # add pre-existing datasets 

1193 for tract in (1, 3, 5): 

1194 for patch in (2, 4, 6, 7): 

1195 dataId = dict(skymap="DummyMap", tract=tract, patch=patch) 

1196 registry.insertDatasets(mergeType, dataIds=[dataId], run=run) 

1197 for aFilter in ("i", "r"): 

1198 dataId = dict(skymap="DummyMap", tract=tract, patch=patch, band=aFilter) 

1199 registry.insertDatasets(calexpType, dataIds=[dataId], run=run) 

1200 

1201 # with empty expression 

1202 rows = registry.queryDataIds(dimensions, datasets=[calexpType, mergeType], collections=run).toSet() 

1203 self.assertEqual(len(rows), 3 * 4 * 2) # 4 tracts x 4 patches x 2 filters 

1204 for dataId in rows: 

1205 self.assertCountEqual(dataId.keys(), ("skymap", "tract", "patch", "band")) 

1206 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 3, 5)) 

1207 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 4, 6, 7)) 

1208 self.assertCountEqual(set(dataId["band"] for dataId in rows), ("i", "r")) 

1209 

1210 # limit to 2 tracts and 2 patches 

1211 rows = registry.queryDataIds( 

1212 dimensions, 

1213 datasets=[calexpType, mergeType], 

1214 collections=run, 

1215 where="tract IN (1, 5) AND patch IN (2, 7)", 

1216 skymap="DummyMap", 

1217 ).toSet() 

1218 self.assertEqual(len(rows), 2 * 2 * 2) # 2 tracts x 2 patches x 2 filters 

1219 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 5)) 

1220 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 7)) 

1221 self.assertCountEqual(set(dataId["band"] for dataId in rows), ("i", "r")) 

1222 

1223 # limit to single filter 

1224 rows = registry.queryDataIds( 

1225 dimensions, datasets=[calexpType, mergeType], collections=run, where="band = 'i'" 

1226 ).toSet() 

1227 self.assertEqual(len(rows), 3 * 4 * 1) # 4 tracts x 4 patches x 2 filters 

1228 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 3, 5)) 

1229 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 4, 6, 7)) 

1230 self.assertCountEqual(set(dataId["band"] for dataId in rows), ("i",)) 

1231 

1232 # Specifying non-existing skymap is an exception 

1233 with self.assertRaisesRegex(DataIdValueError, "Unknown values specified for governor dimension"): 

1234 rows = registry.queryDataIds( 

1235 dimensions, datasets=[calexpType, mergeType], collections=run, where="skymap = 'Mars'" 

1236 ).toSet() 

1237 

1238 def testSpatialJoin(self): 

1239 """Test queries that involve spatial overlap joins.""" 

1240 registry = self.makeRegistry() 

1241 self.loadData(registry, "hsc-rc2-subset.yaml") 

1242 

1243 # Dictionary of spatial DatabaseDimensionElements, keyed by the name of 

1244 # the TopologicalFamily they belong to. We'll relate all elements in 

1245 # each family to all of the elements in each other family. 

1246 families = defaultdict(set) 

1247 # Dictionary of {element.name: {dataId: region}}. 

1248 regions = {} 

1249 for element in registry.dimensions.getDatabaseElements(): 

1250 if element.spatial is not None: 

1251 families[element.spatial.name].add(element) 

1252 regions[element.name] = { 

1253 record.dataId: record.region for record in registry.queryDimensionRecords(element) 

1254 } 

1255 

1256 # If this check fails, it's not necessarily a problem - it may just be 

1257 # a reasonable change to the default dimension definitions - but the 

1258 # test below depends on there being more than one family to do anything 

1259 # useful. 

1260 self.assertEqual(len(families), 2) 

1261 

1262 # Overlap DatabaseDimensionElements with each other. 

1263 for family1, family2 in itertools.combinations(families, 2): 

1264 for element1, element2 in itertools.product(families[family1], families[family2]): 

1265 graph = DimensionGraph.union(element1.graph, element2.graph) 

1266 # Construct expected set of overlapping data IDs via a 

1267 # brute-force comparison of the regions we've already fetched. 

1268 expected = { 

1269 DataCoordinate.standardize({**dataId1.byName(), **dataId2.byName()}, graph=graph) 

1270 for (dataId1, region1), (dataId2, region2) in itertools.product( 

1271 regions[element1.name].items(), regions[element2.name].items() 

1272 ) 

1273 if not region1.isDisjointFrom(region2) 

1274 } 

1275 self.assertGreater(len(expected), 2, msg="Test that we aren't just comparing empty sets.") 

1276 queried = set(registry.queryDataIds(graph)) 

1277 self.assertEqual(expected, queried) 

1278 

1279 # Overlap each DatabaseDimensionElement with the commonSkyPix system. 

1280 commonSkyPix = registry.dimensions.commonSkyPix 

1281 for elementName, regions in regions.items(): 

1282 graph = DimensionGraph.union(registry.dimensions[elementName].graph, commonSkyPix.graph) 

1283 expected = set() 

1284 for dataId, region in regions.items(): 

1285 for begin, end in commonSkyPix.pixelization.envelope(region): 

1286 expected.update( 

1287 DataCoordinate.standardize({commonSkyPix.name: index, **dataId.byName()}, graph=graph) 

1288 for index in range(begin, end) 

1289 ) 

1290 self.assertGreater(len(expected), 2, msg="Test that we aren't just comparing empty sets.") 

1291 queried = set(registry.queryDataIds(graph)) 

1292 self.assertEqual(expected, queried) 

1293 

1294 def testAbstractQuery(self): 

1295 """Test that we can run a query that just lists the known 

1296 bands. This is tricky because band is 

1297 backed by a query against physical_filter. 

1298 """ 

1299 registry = self.makeRegistry() 

1300 registry.insertDimensionData("instrument", dict(name="DummyCam")) 

1301 registry.insertDimensionData( 

1302 "physical_filter", 

1303 dict(instrument="DummyCam", name="dummy_i", band="i"), 

1304 dict(instrument="DummyCam", name="dummy_i2", band="i"), 

1305 dict(instrument="DummyCam", name="dummy_r", band="r"), 

1306 ) 

1307 rows = registry.queryDataIds(["band"]).toSet() 

1308 self.assertCountEqual( 

1309 rows, 

1310 [ 

1311 DataCoordinate.standardize(band="i", universe=registry.dimensions), 

1312 DataCoordinate.standardize(band="r", universe=registry.dimensions), 

1313 ], 

1314 ) 

1315 

1316 def testAttributeManager(self): 

1317 """Test basic functionality of attribute manager.""" 

1318 # number of attributes with schema versions in a fresh database, 

1319 # 6 managers with 3 records per manager, plus config for dimensions 

1320 VERSION_COUNT = 6 * 3 + 1 

1321 

1322 registry = self.makeRegistry() 

1323 attributes = registry._managers.attributes 

1324 

1325 # check what get() returns for non-existing key 

1326 self.assertIsNone(attributes.get("attr")) 

1327 self.assertEqual(attributes.get("attr", ""), "") 

1328 self.assertEqual(attributes.get("attr", "Value"), "Value") 

1329 self.assertEqual(len(list(attributes.items())), VERSION_COUNT) 

1330 

1331 # cannot store empty key or value 

1332 with self.assertRaises(ValueError): 

1333 attributes.set("", "value") 

1334 with self.assertRaises(ValueError): 

1335 attributes.set("attr", "") 

1336 

1337 # set value of non-existing key 

1338 attributes.set("attr", "value") 

1339 self.assertEqual(len(list(attributes.items())), VERSION_COUNT + 1) 

1340 self.assertEqual(attributes.get("attr"), "value") 

1341 

1342 # update value of existing key 

1343 with self.assertRaises(ButlerAttributeExistsError): 

1344 attributes.set("attr", "value2") 

1345 

1346 attributes.set("attr", "value2", force=True) 

1347 self.assertEqual(len(list(attributes.items())), VERSION_COUNT + 1) 

1348 self.assertEqual(attributes.get("attr"), "value2") 

1349 

1350 # delete existing key 

1351 self.assertTrue(attributes.delete("attr")) 

1352 self.assertEqual(len(list(attributes.items())), VERSION_COUNT) 

1353 

1354 # delete non-existing key 

1355 self.assertFalse(attributes.delete("non-attr")) 

1356 

1357 # store bunch of keys and get the list back 

1358 data = [ 

1359 ("version.core", "1.2.3"), 

1360 ("version.dimensions", "3.2.1"), 

1361 ("config.managers.opaque", "ByNameOpaqueTableStorageManager"), 

1362 ] 

1363 for key, value in data: 

1364 attributes.set(key, value) 

1365 items = dict(attributes.items()) 

1366 for key, value in data: 

1367 self.assertEqual(items[key], value) 

1368 

1369 def testQueryDatasetsDeduplication(self): 

1370 """Test that the findFirst option to queryDatasets selects datasets 

1371 from collections in the order given". 

1372 """ 

1373 registry = self.makeRegistry() 

1374 self.loadData(registry, "base.yaml") 

1375 self.loadData(registry, "datasets.yaml") 

1376 self.assertCountEqual( 

1377 list(registry.queryDatasets("bias", collections=["imported_g", "imported_r"])), 

1378 [ 

1379 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"), 

1380 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g"), 

1381 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g"), 

1382 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r"), 

1383 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r"), 

1384 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

1385 ], 

1386 ) 

1387 self.assertCountEqual( 

1388 list(registry.queryDatasets("bias", collections=["imported_g", "imported_r"], findFirst=True)), 

1389 [ 

1390 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"), 

1391 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g"), 

1392 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g"), 

1393 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

1394 ], 

1395 ) 

1396 self.assertCountEqual( 

1397 list(registry.queryDatasets("bias", collections=["imported_r", "imported_g"], findFirst=True)), 

1398 [ 

1399 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"), 

1400 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r"), 

1401 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r"), 

1402 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

1403 ], 

1404 ) 

1405 

1406 def testQueryResults(self): 

1407 """Test querying for data IDs and then manipulating the QueryResults 

1408 object returned to perform other queries. 

1409 """ 

1410 registry = self.makeRegistry() 

1411 self.loadData(registry, "base.yaml") 

1412 self.loadData(registry, "datasets.yaml") 

1413 bias = registry.getDatasetType("bias") 

1414 flat = registry.getDatasetType("flat") 

1415 # Obtain expected results from methods other than those we're testing 

1416 # here. That includes: 

1417 # - the dimensions of the data IDs we want to query: 

1418 expectedGraph = DimensionGraph(registry.dimensions, names=["detector", "physical_filter"]) 

1419 # - the dimensions of some other data IDs we'll extract from that: 

1420 expectedSubsetGraph = DimensionGraph(registry.dimensions, names=["detector"]) 

1421 # - the data IDs we expect to obtain from the first queries: 

1422 expectedDataIds = DataCoordinateSet( 

1423 { 

1424 DataCoordinate.standardize( 

1425 instrument="Cam1", detector=d, physical_filter=p, universe=registry.dimensions 

1426 ) 

1427 for d, p in itertools.product({1, 2, 3}, {"Cam1-G", "Cam1-R1", "Cam1-R2"}) 

1428 }, 

1429 graph=expectedGraph, 

1430 hasFull=False, 

1431 hasRecords=False, 

1432 ) 

1433 # - the flat datasets we expect to find from those data IDs, in just 

1434 # one collection (so deduplication is irrelevant): 

1435 expectedFlats = [ 

1436 registry.findDataset( 

1437 flat, instrument="Cam1", detector=1, physical_filter="Cam1-R1", collections="imported_r" 

1438 ), 

1439 registry.findDataset( 

1440 flat, instrument="Cam1", detector=2, physical_filter="Cam1-R1", collections="imported_r" 

1441 ), 

1442 registry.findDataset( 

1443 flat, instrument="Cam1", detector=3, physical_filter="Cam1-R2", collections="imported_r" 

1444 ), 

1445 ] 

1446 # - the data IDs we expect to extract from that: 

1447 expectedSubsetDataIds = expectedDataIds.subset(expectedSubsetGraph) 

1448 # - the bias datasets we expect to find from those data IDs, after we 

1449 # subset-out the physical_filter dimension, both with duplicates: 

1450 expectedAllBiases = [ 

1451 registry.findDataset(bias, instrument="Cam1", detector=1, collections="imported_g"), 

1452 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_g"), 

1453 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_g"), 

1454 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_r"), 

1455 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_r"), 

1456 ] 

1457 # - ...and without duplicates: 

1458 expectedDeduplicatedBiases = [ 

1459 registry.findDataset(bias, instrument="Cam1", detector=1, collections="imported_g"), 

1460 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_r"), 

1461 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_r"), 

1462 ] 

1463 # Test against those expected results, using a "lazy" query for the 

1464 # data IDs (which re-executes that query each time we use it to do 

1465 # something new). 

1466 dataIds = registry.queryDataIds( 

1467 ["detector", "physical_filter"], 

1468 where="detector.purpose = 'SCIENCE'", # this rejects detector=4 

1469 instrument="Cam1", 

1470 ) 

1471 self.assertEqual(dataIds.graph, expectedGraph) 

1472 self.assertEqual(dataIds.toSet(), expectedDataIds) 

1473 self.assertCountEqual( 

1474 list( 

1475 dataIds.findDatasets( 

1476 flat, 

1477 collections=["imported_r"], 

1478 ) 

1479 ), 

1480 expectedFlats, 

1481 ) 

1482 subsetDataIds = dataIds.subset(expectedSubsetGraph, unique=True) 

1483 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph) 

1484 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds) 

1485 self.assertCountEqual( 

1486 list(subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=False)), 

1487 expectedAllBiases, 

1488 ) 

1489 self.assertCountEqual( 

1490 list(subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=True)), 

1491 expectedDeduplicatedBiases, 

1492 ) 

1493 

1494 # Check dimensions match. 

1495 with self.assertRaises(ValueError): 

1496 subsetDataIds.findDatasets("flat", collections=["imported_r", "imported_g"], findFirst=True) 

1497 

1498 # Use a component dataset type. 

1499 self.assertCountEqual( 

1500 [ 

1501 ref.makeComponentRef("image") 

1502 for ref in subsetDataIds.findDatasets( 

1503 bias, 

1504 collections=["imported_r", "imported_g"], 

1505 findFirst=False, 

1506 ) 

1507 ], 

1508 [ref.makeComponentRef("image") for ref in expectedAllBiases], 

1509 ) 

1510 

1511 # Use a named dataset type that does not exist and a dataset type 

1512 # object that does not exist. 

1513 unknown_type = DatasetType("not_known", dimensions=bias.dimensions, storageClass="Exposure") 

1514 

1515 # Test both string name and dataset type object. 

1516 test_type: Union[str, DatasetType] 

1517 for test_type, test_type_name in ( 

1518 (unknown_type, unknown_type.name), 

1519 (unknown_type.name, unknown_type.name), 

1520 ): 

1521 with self.assertRaisesRegex(DatasetTypeError, expected_regex=test_type_name): 

1522 list( 

1523 subsetDataIds.findDatasets( 

1524 test_type, collections=["imported_r", "imported_g"], findFirst=True 

1525 ) 

1526 ) 

1527 

1528 # Materialize the bias dataset queries (only) by putting the results 

1529 # into temporary tables, then repeat those tests. 

1530 with subsetDataIds.findDatasets( 

1531 bias, collections=["imported_r", "imported_g"], findFirst=False 

1532 ).materialize() as biases: 

1533 self.assertCountEqual(list(biases), expectedAllBiases) 

1534 with subsetDataIds.findDatasets( 

1535 bias, collections=["imported_r", "imported_g"], findFirst=True 

1536 ).materialize() as biases: 

1537 self.assertCountEqual(list(biases), expectedDeduplicatedBiases) 

1538 # Materialize the data ID subset query, but not the dataset queries. 

1539 with subsetDataIds.materialize() as subsetDataIds: 

1540 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph) 

1541 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds) 

1542 self.assertCountEqual( 

1543 list( 

1544 subsetDataIds.findDatasets( 

1545 bias, collections=["imported_r", "imported_g"], findFirst=False 

1546 ) 

1547 ), 

1548 expectedAllBiases, 

1549 ) 

1550 self.assertCountEqual( 

1551 list( 

1552 subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=True) 

1553 ), 

1554 expectedDeduplicatedBiases, 

1555 ) 

1556 # Materialize the dataset queries, too. 

1557 with subsetDataIds.findDatasets( 

1558 bias, collections=["imported_r", "imported_g"], findFirst=False 

1559 ).materialize() as biases: 

1560 self.assertCountEqual(list(biases), expectedAllBiases) 

1561 with subsetDataIds.findDatasets( 

1562 bias, collections=["imported_r", "imported_g"], findFirst=True 

1563 ).materialize() as biases: 

1564 self.assertCountEqual(list(biases), expectedDeduplicatedBiases) 

1565 # Materialize the original query, but none of the follow-up queries. 

1566 with dataIds.materialize() as dataIds: 

1567 self.assertEqual(dataIds.graph, expectedGraph) 

1568 self.assertEqual(dataIds.toSet(), expectedDataIds) 

1569 self.assertCountEqual( 

1570 list( 

1571 dataIds.findDatasets( 

1572 flat, 

1573 collections=["imported_r"], 

1574 ) 

1575 ), 

1576 expectedFlats, 

1577 ) 

1578 subsetDataIds = dataIds.subset(expectedSubsetGraph, unique=True) 

1579 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph) 

1580 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds) 

1581 self.assertCountEqual( 

1582 list( 

1583 subsetDataIds.findDatasets( 

1584 bias, collections=["imported_r", "imported_g"], findFirst=False 

1585 ) 

1586 ), 

1587 expectedAllBiases, 

1588 ) 

1589 self.assertCountEqual( 

1590 list( 

1591 subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=True) 

1592 ), 

1593 expectedDeduplicatedBiases, 

1594 ) 

1595 # Materialize just the bias dataset queries. 

1596 with subsetDataIds.findDatasets( 

1597 bias, collections=["imported_r", "imported_g"], findFirst=False 

1598 ).materialize() as biases: 

1599 self.assertCountEqual(list(biases), expectedAllBiases) 

1600 with subsetDataIds.findDatasets( 

1601 bias, collections=["imported_r", "imported_g"], findFirst=True 

1602 ).materialize() as biases: 

1603 self.assertCountEqual(list(biases), expectedDeduplicatedBiases) 

1604 # Materialize the subset data ID query, but not the dataset 

1605 # queries. 

1606 with subsetDataIds.materialize() as subsetDataIds: 

1607 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph) 

1608 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds) 

1609 self.assertCountEqual( 

1610 list( 

1611 subsetDataIds.findDatasets( 

1612 bias, collections=["imported_r", "imported_g"], findFirst=False 

1613 ) 

1614 ), 

1615 expectedAllBiases, 

1616 ) 

1617 self.assertCountEqual( 

1618 list( 

1619 subsetDataIds.findDatasets( 

1620 bias, collections=["imported_r", "imported_g"], findFirst=True 

1621 ) 

1622 ), 

1623 expectedDeduplicatedBiases, 

1624 ) 

1625 # Materialize the bias dataset queries, too, so now we're 

1626 # materializing every single step. 

1627 with subsetDataIds.findDatasets( 

1628 bias, collections=["imported_r", "imported_g"], findFirst=False 

1629 ).materialize() as biases: 

1630 self.assertCountEqual(list(biases), expectedAllBiases) 

1631 with subsetDataIds.findDatasets( 

1632 bias, collections=["imported_r", "imported_g"], findFirst=True 

1633 ).materialize() as biases: 

1634 self.assertCountEqual(list(biases), expectedDeduplicatedBiases) 

1635 

1636 def testStorageClassPropagation(self): 

1637 """Test that queries for datasets respect the storage class passed in 

1638 as part of a full dataset type. 

1639 """ 

1640 registry = self.makeRegistry() 

1641 self.loadData(registry, "base.yaml") 

1642 dataset_type_in_registry = DatasetType( 

1643 "tbl", dimensions=["instrument"], storageClass="DataFrame", universe=registry.dimensions 

1644 ) 

1645 registry.registerDatasetType(dataset_type_in_registry) 

1646 run = "run1" 

1647 registry.registerRun(run) 

1648 (inserted_ref,) = registry.insertDatasets( 

1649 dataset_type_in_registry, [registry.expandDataId(instrument="Cam1")], run=run 

1650 ) 

1651 self.assertEqual(inserted_ref.datasetType, dataset_type_in_registry) 

1652 query_dataset_type = DatasetType( 

1653 "tbl", dimensions=["instrument"], storageClass="ArrowAstropy", universe=registry.dimensions 

1654 ) 

1655 self.assertNotEqual(dataset_type_in_registry, query_dataset_type) 

1656 query_datasets_result = registry.queryDatasets(query_dataset_type, collections=[run]) 

1657 self.assertEqual(query_datasets_result.parentDatasetType, query_dataset_type) # type: ignore 

1658 (query_datasets_ref,) = query_datasets_result 

1659 self.assertEqual(query_datasets_ref.datasetType, query_dataset_type) 

1660 query_data_ids_find_datasets_result = registry.queryDataIds(["instrument"]).findDatasets( 

1661 query_dataset_type, collections=[run] 

1662 ) 

1663 self.assertEqual(query_data_ids_find_datasets_result.parentDatasetType, query_dataset_type) 

1664 (query_data_ids_find_datasets_ref,) = query_data_ids_find_datasets_result 

1665 self.assertEqual(query_data_ids_find_datasets_ref.datasetType, query_dataset_type) 

1666 query_dataset_types_result = registry.queryDatasetTypes(query_dataset_type) 

1667 self.assertEqual(list(query_dataset_types_result), [query_dataset_type]) 

1668 find_dataset_ref = registry.findDataset(query_dataset_type, instrument="Cam1", collections=[run]) 

1669 self.assertEqual(find_dataset_ref.datasetType, query_dataset_type) 

1670 

1671 def testEmptyDimensionsQueries(self): 

1672 """Test Query and QueryResults objects in the case where there are no 

1673 dimensions. 

1674 """ 

1675 # Set up test data: one dataset type, two runs, one dataset in each. 

1676 registry = self.makeRegistry() 

1677 self.loadData(registry, "base.yaml") 

1678 schema = DatasetType("schema", dimensions=registry.dimensions.empty, storageClass="Catalog") 

1679 registry.registerDatasetType(schema) 

1680 dataId = DataCoordinate.makeEmpty(registry.dimensions) 

1681 run1 = "run1" 

1682 run2 = "run2" 

1683 registry.registerRun(run1) 

1684 registry.registerRun(run2) 

1685 (dataset1,) = registry.insertDatasets(schema, dataIds=[dataId], run=run1) 

1686 (dataset2,) = registry.insertDatasets(schema, dataIds=[dataId], run=run2) 

1687 # Query directly for both of the datasets, and each one, one at a time. 

1688 self.checkQueryResults( 

1689 registry.queryDatasets(schema, collections=[run1, run2], findFirst=False), [dataset1, dataset2] 

1690 ) 

1691 self.checkQueryResults( 

1692 registry.queryDatasets(schema, collections=[run1, run2], findFirst=True), 

1693 [dataset1], 

1694 ) 

1695 self.checkQueryResults( 

1696 registry.queryDatasets(schema, collections=[run2, run1], findFirst=True), 

1697 [dataset2], 

1698 ) 

1699 # Query for data IDs with no dimensions. 

1700 dataIds = registry.queryDataIds([]) 

1701 self.checkQueryResults(dataIds, [dataId]) 

1702 # Use queried data IDs to find the datasets. 

1703 self.checkQueryResults( 

1704 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False), 

1705 [dataset1, dataset2], 

1706 ) 

1707 self.checkQueryResults( 

1708 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), 

1709 [dataset1], 

1710 ) 

1711 self.checkQueryResults( 

1712 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), 

1713 [dataset2], 

1714 ) 

1715 # Now materialize the data ID query results and repeat those tests. 

1716 with dataIds.materialize() as dataIds: 

1717 self.checkQueryResults(dataIds, [dataId]) 

1718 self.checkQueryResults( 

1719 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), 

1720 [dataset1], 

1721 ) 

1722 self.checkQueryResults( 

1723 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), 

1724 [dataset2], 

1725 ) 

1726 # Query for non-empty data IDs, then subset that to get the empty one. 

1727 # Repeat the above tests starting from that. 

1728 dataIds = registry.queryDataIds(["instrument"]).subset(registry.dimensions.empty, unique=True) 

1729 self.checkQueryResults(dataIds, [dataId]) 

1730 self.checkQueryResults( 

1731 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False), 

1732 [dataset1, dataset2], 

1733 ) 

1734 self.checkQueryResults( 

1735 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), 

1736 [dataset1], 

1737 ) 

1738 self.checkQueryResults( 

1739 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), 

1740 [dataset2], 

1741 ) 

1742 with dataIds.materialize() as dataIds: 

1743 self.checkQueryResults(dataIds, [dataId]) 

1744 self.checkQueryResults( 

1745 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False), 

1746 [dataset1, dataset2], 

1747 ) 

1748 self.checkQueryResults( 

1749 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), 

1750 [dataset1], 

1751 ) 

1752 self.checkQueryResults( 

1753 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), 

1754 [dataset2], 

1755 ) 

1756 # Query for non-empty data IDs, then materialize, then subset to get 

1757 # the empty one. Repeat again. 

1758 with registry.queryDataIds(["instrument"]).materialize() as nonEmptyDataIds: 

1759 dataIds = nonEmptyDataIds.subset(registry.dimensions.empty, unique=True) 

1760 self.checkQueryResults(dataIds, [dataId]) 

1761 self.checkQueryResults( 

1762 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False), 

1763 [dataset1, dataset2], 

1764 ) 

1765 self.checkQueryResults( 

1766 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), 

1767 [dataset1], 

1768 ) 

1769 self.checkQueryResults( 

1770 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), 

1771 [dataset2], 

1772 ) 

1773 with dataIds.materialize() as dataIds: 

1774 self.checkQueryResults(dataIds, [dataId]) 

1775 self.checkQueryResults( 

1776 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False), 

1777 [dataset1, dataset2], 

1778 ) 

1779 self.checkQueryResults( 

1780 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), 

1781 [dataset1], 

1782 ) 

1783 self.checkQueryResults( 

1784 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), 

1785 [dataset2], 

1786 ) 

1787 # Query for non-empty data IDs with a constraint on an empty-data-ID 

1788 # dataset that exists. 

1789 dataIds = registry.queryDataIds(["instrument"], datasets="schema", collections=...) 

1790 self.checkQueryResults( 

1791 dataIds.subset(unique=True), 

1792 [DataCoordinate.standardize(instrument="Cam1", universe=registry.dimensions)], 

1793 ) 

1794 # Again query for non-empty data IDs with a constraint on empty-data-ID 

1795 # datasets, but when the datasets don't exist. We delete the existing 

1796 # dataset and query just that collection rather than creating a new 

1797 # empty collection because this is a bit less likely for our build-time 

1798 # logic to shortcut-out (via the collection summaries), and such a 

1799 # shortcut would make this test a bit more trivial than we'd like. 

1800 registry.removeDatasets([dataset2]) 

1801 dataIds = registry.queryDataIds(["instrument"], datasets="schema", collections=run2) 

1802 self.checkQueryResults(dataIds, []) 

1803 

1804 def testDimensionDataModifications(self): 

1805 """Test that modifying dimension records via: 

1806 syncDimensionData(..., update=True) and 

1807 insertDimensionData(..., replace=True) works as expected, even in the 

1808 presence of datasets using those dimensions and spatial overlap 

1809 relationships. 

1810 """ 

1811 

1812 def unpack_range_set(ranges: lsst.sphgeom.RangeSet) -> Iterator[int]: 

1813 """Unpack a sphgeom.RangeSet into the integers it contains.""" 

1814 for begin, end in ranges: 

1815 yield from range(begin, end) 

1816 

1817 def range_set_hull( 

1818 ranges: lsst.sphgeom.RangeSet, 

1819 pixelization: lsst.sphgeom.HtmPixelization, 

1820 ) -> lsst.sphgeom.ConvexPolygon: 

1821 """Create a ConvexPolygon hull of the region defined by a set of 

1822 HTM pixelization index ranges. 

1823 """ 

1824 points = [] 

1825 for index in unpack_range_set(ranges): 

1826 points.extend(pixelization.triangle(index).getVertices()) 

1827 return lsst.sphgeom.ConvexPolygon(points) 

1828 

1829 # Use HTM to set up an initial parent region (one arbitrary trixel) 

1830 # and four child regions (the trixels within the parent at the next 

1831 # level. We'll use the parent as a tract/visit region and the children 

1832 # as its patch/visit_detector regions. 

1833 registry = self.makeRegistry() 

1834 htm6 = registry.dimensions.skypix["htm"][6].pixelization 

1835 commonSkyPix = registry.dimensions.commonSkyPix.pixelization 

1836 index = 12288 

1837 child_ranges_small = lsst.sphgeom.RangeSet(index).scaled(4) 

1838 assert htm6.universe().contains(child_ranges_small) 

1839 child_regions_small = [htm6.triangle(i) for i in unpack_range_set(child_ranges_small)] 

1840 parent_region_small = lsst.sphgeom.ConvexPolygon( 

1841 list(itertools.chain.from_iterable(c.getVertices() for c in child_regions_small)) 

1842 ) 

1843 assert all(parent_region_small.contains(c) for c in child_regions_small) 

1844 # Make a larger version of each child region, defined to be the set of 

1845 # htm6 trixels that overlap the original's bounding circle. Make a new 

1846 # parent that's the convex hull of the new children. 

1847 child_regions_large = [ 

1848 range_set_hull(htm6.envelope(c.getBoundingCircle()), htm6) for c in child_regions_small 

1849 ] 

1850 assert all(large.contains(small) for large, small in zip(child_regions_large, child_regions_small)) 

1851 parent_region_large = lsst.sphgeom.ConvexPolygon( 

1852 list(itertools.chain.from_iterable(c.getVertices() for c in child_regions_large)) 

1853 ) 

1854 assert all(parent_region_large.contains(c) for c in child_regions_large) 

1855 assert parent_region_large.contains(parent_region_small) 

1856 assert not parent_region_small.contains(parent_region_large) 

1857 assert not all(parent_region_small.contains(c) for c in child_regions_large) 

1858 # Find some commonSkyPix indices that overlap the large regions but not 

1859 # overlap the small regions. We use commonSkyPix here to make sure the 

1860 # real tests later involve what's in the database, not just post-query 

1861 # filtering of regions. 

1862 child_difference_indices = [] 

1863 for large, small in zip(child_regions_large, child_regions_small): 

1864 difference = list(unpack_range_set(commonSkyPix.envelope(large) - commonSkyPix.envelope(small))) 

1865 assert difference, "if this is empty, we can't test anything useful with these regions" 

1866 assert all( 

1867 not commonSkyPix.triangle(d).isDisjointFrom(large) 

1868 and commonSkyPix.triangle(d).isDisjointFrom(small) 

1869 for d in difference 

1870 ) 

1871 child_difference_indices.append(difference) 

1872 parent_difference_indices = list( 

1873 unpack_range_set( 

1874 commonSkyPix.envelope(parent_region_large) - commonSkyPix.envelope(parent_region_small) 

1875 ) 

1876 ) 

1877 assert parent_difference_indices, "if this is empty, we can't test anything useful with these regions" 

1878 assert all( 

1879 ( 

1880 not commonSkyPix.triangle(d).isDisjointFrom(parent_region_large) 

1881 and commonSkyPix.triangle(d).isDisjointFrom(parent_region_small) 

1882 ) 

1883 for d in parent_difference_indices 

1884 ) 

1885 # Now that we've finally got those regions, we'll insert the large ones 

1886 # as tract/patch dimension records. 

1887 skymap_name = "testing_v1" 

1888 registry.insertDimensionData( 

1889 "skymap", 

1890 { 

1891 "name": skymap_name, 

1892 "hash": bytes([42]), 

1893 "tract_max": 1, 

1894 "patch_nx_max": 2, 

1895 "patch_ny_max": 2, 

1896 }, 

1897 ) 

1898 registry.insertDimensionData("tract", {"skymap": skymap_name, "id": 0, "region": parent_region_large}) 

1899 registry.insertDimensionData( 

1900 "patch", 

1901 *[ 

1902 {"skymap": skymap_name, "tract": 0, "id": n, "cell_x": n % 2, "cell_y": n // 2, "region": c} 

1903 for n, c in enumerate(child_regions_large) 

1904 ], 

1905 ) 

1906 # Add at dataset that uses these dimensions to make sure that modifying 

1907 # them doesn't disrupt foreign keys (need to make sure DB doesn't 

1908 # implement insert with replace=True as delete-then-insert). 

1909 dataset_type = DatasetType( 

1910 "coadd", 

1911 dimensions=["tract", "patch"], 

1912 universe=registry.dimensions, 

1913 storageClass="Exposure", 

1914 ) 

1915 registry.registerDatasetType(dataset_type) 

1916 registry.registerCollection("the_run", CollectionType.RUN) 

1917 registry.insertDatasets( 

1918 dataset_type, 

1919 [{"skymap": skymap_name, "tract": 0, "patch": 2}], 

1920 run="the_run", 

1921 ) 

1922 # Query for tracts and patches that overlap some "difference" htm9 

1923 # pixels; there should be overlaps, because the database has 

1924 # the "large" suite of regions. 

1925 self.assertEqual( 

1926 {0}, 

1927 { 

1928 data_id["tract"] 

1929 for data_id in registry.queryDataIds( 

1930 ["tract"], 

1931 skymap=skymap_name, 

1932 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]}, 

1933 ) 

1934 }, 

1935 ) 

1936 for patch_id, patch_difference_indices in enumerate(child_difference_indices): 

1937 self.assertIn( 

1938 patch_id, 

1939 { 

1940 data_id["patch"] 

1941 for data_id in registry.queryDataIds( 

1942 ["patch"], 

1943 skymap=skymap_name, 

1944 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]}, 

1945 ) 

1946 }, 

1947 ) 

1948 # Use sync to update the tract region and insert to update the regions 

1949 # of the patches, to the "small" suite. 

1950 updated = registry.syncDimensionData( 

1951 "tract", 

1952 {"skymap": skymap_name, "id": 0, "region": parent_region_small}, 

1953 update=True, 

1954 ) 

1955 self.assertEqual(updated, {"region": parent_region_large}) 

1956 registry.insertDimensionData( 

1957 "patch", 

1958 *[ 

1959 {"skymap": skymap_name, "tract": 0, "id": n, "cell_x": n % 2, "cell_y": n // 2, "region": c} 

1960 for n, c in enumerate(child_regions_small) 

1961 ], 

1962 replace=True, 

1963 ) 

1964 # Query again; there now should be no such overlaps, because the 

1965 # database has the "small" suite of regions. 

1966 self.assertFalse( 

1967 set( 

1968 registry.queryDataIds( 

1969 ["tract"], 

1970 skymap=skymap_name, 

1971 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]}, 

1972 ) 

1973 ) 

1974 ) 

1975 for patch_id, patch_difference_indices in enumerate(child_difference_indices): 

1976 self.assertNotIn( 

1977 patch_id, 

1978 { 

1979 data_id["patch"] 

1980 for data_id in registry.queryDataIds( 

1981 ["patch"], 

1982 skymap=skymap_name, 

1983 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]}, 

1984 ) 

1985 }, 

1986 ) 

1987 # Update back to the large regions and query one more time. 

1988 updated = registry.syncDimensionData( 

1989 "tract", 

1990 {"skymap": skymap_name, "id": 0, "region": parent_region_large}, 

1991 update=True, 

1992 ) 

1993 self.assertEqual(updated, {"region": parent_region_small}) 

1994 registry.insertDimensionData( 

1995 "patch", 

1996 *[ 

1997 {"skymap": skymap_name, "tract": 0, "id": n, "cell_x": n % 2, "cell_y": n // 2, "region": c} 

1998 for n, c in enumerate(child_regions_large) 

1999 ], 

2000 replace=True, 

2001 ) 

2002 self.assertEqual( 

2003 {0}, 

2004 { 

2005 data_id["tract"] 

2006 for data_id in registry.queryDataIds( 

2007 ["tract"], 

2008 skymap=skymap_name, 

2009 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]}, 

2010 ) 

2011 }, 

2012 ) 

2013 for patch_id, patch_difference_indices in enumerate(child_difference_indices): 

2014 self.assertIn( 

2015 patch_id, 

2016 { 

2017 data_id["patch"] 

2018 for data_id in registry.queryDataIds( 

2019 ["patch"], 

2020 skymap=skymap_name, 

2021 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]}, 

2022 ) 

2023 }, 

2024 ) 

2025 

2026 def testCalibrationCollections(self): 

2027 """Test operations on `~CollectionType.CALIBRATION` collections, 

2028 including `Registry.certify`, `Registry.decertify`, and 

2029 `Registry.findDataset`. 

2030 """ 

2031 # Setup - make a Registry, fill it with some datasets in 

2032 # non-calibration collections. 

2033 registry = self.makeRegistry() 

2034 self.loadData(registry, "base.yaml") 

2035 self.loadData(registry, "datasets.yaml") 

2036 # Set up some timestamps. 

2037 t1 = astropy.time.Time("2020-01-01T01:00:00", format="isot", scale="tai") 

2038 t2 = astropy.time.Time("2020-01-01T02:00:00", format="isot", scale="tai") 

2039 t3 = astropy.time.Time("2020-01-01T03:00:00", format="isot", scale="tai") 

2040 t4 = astropy.time.Time("2020-01-01T04:00:00", format="isot", scale="tai") 

2041 t5 = astropy.time.Time("2020-01-01T05:00:00", format="isot", scale="tai") 

2042 allTimespans = [ 

2043 Timespan(a, b) for a, b in itertools.combinations([None, t1, t2, t3, t4, t5, None], r=2) 

2044 ] 

2045 # Get references to some datasets. 

2046 bias2a = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g") 

2047 bias3a = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g") 

2048 bias2b = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r") 

2049 bias3b = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r") 

2050 # Register the main calibration collection we'll be working with. 

2051 collection = "Cam1/calibs/default" 

2052 registry.registerCollection(collection, type=CollectionType.CALIBRATION) 

2053 # Cannot associate into a calibration collection (no timespan). 

2054 with self.assertRaises(CollectionTypeError): 

2055 registry.associate(collection, [bias2a]) 

2056 # Certify 2a dataset with [t2, t4) validity. 

2057 registry.certify(collection, [bias2a], Timespan(begin=t2, end=t4)) 

2058 # Test that we can query for this dataset via the new collection, both 

2059 # on its own and with a RUN collection, as long as we don't try to join 

2060 # in temporal dimensions or use findFirst=True. 

2061 self.assertEqual( 

2062 set(registry.queryDatasets("bias", findFirst=False, collections=collection)), 

2063 {bias2a}, 

2064 ) 

2065 self.assertEqual( 

2066 set(registry.queryDatasets("bias", findFirst=False, collections=[collection, "imported_r"])), 

2067 { 

2068 bias2a, 

2069 bias2b, 

2070 bias3b, 

2071 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

2072 }, 

2073 ) 

2074 self.assertEqual( 

2075 set(registry.queryDataIds("detector", datasets="bias", collections=collection)), 

2076 {registry.expandDataId(instrument="Cam1", detector=2)}, 

2077 ) 

2078 self.assertEqual( 

2079 set(registry.queryDataIds("detector", datasets="bias", collections=[collection, "imported_r"])), 

2080 { 

2081 registry.expandDataId(instrument="Cam1", detector=2), 

2082 registry.expandDataId(instrument="Cam1", detector=3), 

2083 registry.expandDataId(instrument="Cam1", detector=4), 

2084 }, 

2085 ) 

2086 

2087 # We should not be able to certify 2b with anything overlapping that 

2088 # window. 

2089 with self.assertRaises(ConflictingDefinitionError): 

2090 registry.certify(collection, [bias2b], Timespan(begin=None, end=t3)) 

2091 with self.assertRaises(ConflictingDefinitionError): 

2092 registry.certify(collection, [bias2b], Timespan(begin=None, end=t5)) 

2093 with self.assertRaises(ConflictingDefinitionError): 

2094 registry.certify(collection, [bias2b], Timespan(begin=t1, end=t3)) 

2095 with self.assertRaises(ConflictingDefinitionError): 

2096 registry.certify(collection, [bias2b], Timespan(begin=t1, end=t5)) 

2097 with self.assertRaises(ConflictingDefinitionError): 

2098 registry.certify(collection, [bias2b], Timespan(begin=t1, end=None)) 

2099 with self.assertRaises(ConflictingDefinitionError): 

2100 registry.certify(collection, [bias2b], Timespan(begin=t2, end=t3)) 

2101 with self.assertRaises(ConflictingDefinitionError): 

2102 registry.certify(collection, [bias2b], Timespan(begin=t2, end=t5)) 

2103 with self.assertRaises(ConflictingDefinitionError): 

2104 registry.certify(collection, [bias2b], Timespan(begin=t2, end=None)) 

2105 # We should be able to certify 3a with a range overlapping that window, 

2106 # because it's for a different detector. 

2107 # We'll certify 3a over [t1, t3). 

2108 registry.certify(collection, [bias3a], Timespan(begin=t1, end=t3)) 

2109 # Now we'll certify 2b and 3b together over [t4, ∞). 

2110 registry.certify(collection, [bias2b, bias3b], Timespan(begin=t4, end=None)) 

2111 

2112 # Fetch all associations and check that they are what we expect. 

2113 self.assertCountEqual( 

2114 list( 

2115 registry.queryDatasetAssociations( 

2116 "bias", 

2117 collections=[collection, "imported_g", "imported_r"], 

2118 ) 

2119 ), 

2120 [ 

2121 DatasetAssociation( 

2122 ref=registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"), 

2123 collection="imported_g", 

2124 timespan=None, 

2125 ), 

2126 DatasetAssociation( 

2127 ref=registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

2128 collection="imported_r", 

2129 timespan=None, 

2130 ), 

2131 DatasetAssociation(ref=bias2a, collection="imported_g", timespan=None), 

2132 DatasetAssociation(ref=bias3a, collection="imported_g", timespan=None), 

2133 DatasetAssociation(ref=bias2b, collection="imported_r", timespan=None), 

2134 DatasetAssociation(ref=bias3b, collection="imported_r", timespan=None), 

2135 DatasetAssociation(ref=bias2a, collection=collection, timespan=Timespan(begin=t2, end=t4)), 

2136 DatasetAssociation(ref=bias3a, collection=collection, timespan=Timespan(begin=t1, end=t3)), 

2137 DatasetAssociation(ref=bias2b, collection=collection, timespan=Timespan(begin=t4, end=None)), 

2138 DatasetAssociation(ref=bias3b, collection=collection, timespan=Timespan(begin=t4, end=None)), 

2139 ], 

2140 ) 

2141 

2142 class Ambiguous: 

2143 """Tag class to denote lookups that should be ambiguous.""" 

2144 

2145 pass 

2146 

2147 def assertLookup( 

2148 detector: int, timespan: Timespan, expected: Optional[Union[DatasetRef, Type[Ambiguous]]] 

2149 ) -> None: 

2150 """Local function that asserts that a bias lookup returns the given 

2151 expected result. 

2152 """ 

2153 if expected is Ambiguous: 

2154 with self.assertRaises((DatasetTypeError, LookupError)): 

2155 registry.findDataset( 

2156 "bias", 

2157 collections=collection, 

2158 instrument="Cam1", 

2159 detector=detector, 

2160 timespan=timespan, 

2161 ) 

2162 else: 

2163 self.assertEqual( 

2164 expected, 

2165 registry.findDataset( 

2166 "bias", 

2167 collections=collection, 

2168 instrument="Cam1", 

2169 detector=detector, 

2170 timespan=timespan, 

2171 ), 

2172 ) 

2173 

2174 # Systematically test lookups against expected results. 

2175 assertLookup(detector=2, timespan=Timespan(None, t1), expected=None) 

2176 assertLookup(detector=2, timespan=Timespan(None, t2), expected=None) 

2177 assertLookup(detector=2, timespan=Timespan(None, t3), expected=bias2a) 

2178 assertLookup(detector=2, timespan=Timespan(None, t4), expected=bias2a) 

2179 assertLookup(detector=2, timespan=Timespan(None, t5), expected=Ambiguous) 

2180 assertLookup(detector=2, timespan=Timespan(None, None), expected=Ambiguous) 

2181 assertLookup(detector=2, timespan=Timespan(t1, t2), expected=None) 

2182 assertLookup(detector=2, timespan=Timespan(t1, t3), expected=bias2a) 

2183 assertLookup(detector=2, timespan=Timespan(t1, t4), expected=bias2a) 

2184 assertLookup(detector=2, timespan=Timespan(t1, t5), expected=Ambiguous) 

2185 assertLookup(detector=2, timespan=Timespan(t1, None), expected=Ambiguous) 

2186 assertLookup(detector=2, timespan=Timespan(t2, t3), expected=bias2a) 

2187 assertLookup(detector=2, timespan=Timespan(t2, t4), expected=bias2a) 

2188 assertLookup(detector=2, timespan=Timespan(t2, t5), expected=Ambiguous) 

2189 assertLookup(detector=2, timespan=Timespan(t2, None), expected=Ambiguous) 

2190 assertLookup(detector=2, timespan=Timespan(t3, t4), expected=bias2a) 

2191 assertLookup(detector=2, timespan=Timespan(t3, t5), expected=Ambiguous) 

2192 assertLookup(detector=2, timespan=Timespan(t3, None), expected=Ambiguous) 

2193 assertLookup(detector=2, timespan=Timespan(t4, t5), expected=bias2b) 

2194 assertLookup(detector=2, timespan=Timespan(t4, None), expected=bias2b) 

2195 assertLookup(detector=2, timespan=Timespan(t5, None), expected=bias2b) 

2196 assertLookup(detector=3, timespan=Timespan(None, t1), expected=None) 

2197 assertLookup(detector=3, timespan=Timespan(None, t2), expected=bias3a) 

2198 assertLookup(detector=3, timespan=Timespan(None, t3), expected=bias3a) 

2199 assertLookup(detector=3, timespan=Timespan(None, t4), expected=bias3a) 

2200 assertLookup(detector=3, timespan=Timespan(None, t5), expected=Ambiguous) 

2201 assertLookup(detector=3, timespan=Timespan(None, None), expected=Ambiguous) 

2202 assertLookup(detector=3, timespan=Timespan(t1, t2), expected=bias3a) 

2203 assertLookup(detector=3, timespan=Timespan(t1, t3), expected=bias3a) 

2204 assertLookup(detector=3, timespan=Timespan(t1, t4), expected=bias3a) 

2205 assertLookup(detector=3, timespan=Timespan(t1, t5), expected=Ambiguous) 

2206 assertLookup(detector=3, timespan=Timespan(t1, None), expected=Ambiguous) 

2207 assertLookup(detector=3, timespan=Timespan(t2, t3), expected=bias3a) 

2208 assertLookup(detector=3, timespan=Timespan(t2, t4), expected=bias3a) 

2209 assertLookup(detector=3, timespan=Timespan(t2, t5), expected=Ambiguous) 

2210 assertLookup(detector=3, timespan=Timespan(t2, None), expected=Ambiguous) 

2211 assertLookup(detector=3, timespan=Timespan(t3, t4), expected=None) 

2212 assertLookup(detector=3, timespan=Timespan(t3, t5), expected=bias3b) 

2213 assertLookup(detector=3, timespan=Timespan(t3, None), expected=bias3b) 

2214 assertLookup(detector=3, timespan=Timespan(t4, t5), expected=bias3b) 

2215 assertLookup(detector=3, timespan=Timespan(t4, None), expected=bias3b) 

2216 assertLookup(detector=3, timespan=Timespan(t5, None), expected=bias3b) 

2217 

2218 # Decertify [t3, t5) for all data IDs, and do test lookups again. 

2219 # This should truncate bias2a to [t2, t3), leave bias3a unchanged at 

2220 # [t1, t3), and truncate bias2b and bias3b to [t5, ∞). 

2221 registry.decertify(collection=collection, datasetType="bias", timespan=Timespan(t3, t5)) 

2222 assertLookup(detector=2, timespan=Timespan(None, t1), expected=None) 

2223 assertLookup(detector=2, timespan=Timespan(None, t2), expected=None) 

2224 assertLookup(detector=2, timespan=Timespan(None, t3), expected=bias2a) 

2225 assertLookup(detector=2, timespan=Timespan(None, t4), expected=bias2a) 

2226 assertLookup(detector=2, timespan=Timespan(None, t5), expected=bias2a) 

2227 assertLookup(detector=2, timespan=Timespan(None, None), expected=Ambiguous) 

2228 assertLookup(detector=2, timespan=Timespan(t1, t2), expected=None) 

2229 assertLookup(detector=2, timespan=Timespan(t1, t3), expected=bias2a) 

2230 assertLookup(detector=2, timespan=Timespan(t1, t4), expected=bias2a) 

2231 assertLookup(detector=2, timespan=Timespan(t1, t5), expected=bias2a) 

2232 assertLookup(detector=2, timespan=Timespan(t1, None), expected=Ambiguous) 

2233 assertLookup(detector=2, timespan=Timespan(t2, t3), expected=bias2a) 

2234 assertLookup(detector=2, timespan=Timespan(t2, t4), expected=bias2a) 

2235 assertLookup(detector=2, timespan=Timespan(t2, t5), expected=bias2a) 

2236 assertLookup(detector=2, timespan=Timespan(t2, None), expected=Ambiguous) 

2237 assertLookup(detector=2, timespan=Timespan(t3, t4), expected=None) 

2238 assertLookup(detector=2, timespan=Timespan(t3, t5), expected=None) 

2239 assertLookup(detector=2, timespan=Timespan(t3, None), expected=bias2b) 

2240 assertLookup(detector=2, timespan=Timespan(t4, t5), expected=None) 

2241 assertLookup(detector=2, timespan=Timespan(t4, None), expected=bias2b) 

2242 assertLookup(detector=2, timespan=Timespan(t5, None), expected=bias2b) 

2243 assertLookup(detector=3, timespan=Timespan(None, t1), expected=None) 

2244 assertLookup(detector=3, timespan=Timespan(None, t2), expected=bias3a) 

2245 assertLookup(detector=3, timespan=Timespan(None, t3), expected=bias3a) 

2246 assertLookup(detector=3, timespan=Timespan(None, t4), expected=bias3a) 

2247 assertLookup(detector=3, timespan=Timespan(None, t5), expected=bias3a) 

2248 assertLookup(detector=3, timespan=Timespan(None, None), expected=Ambiguous) 

2249 assertLookup(detector=3, timespan=Timespan(t1, t2), expected=bias3a) 

2250 assertLookup(detector=3, timespan=Timespan(t1, t3), expected=bias3a) 

2251 assertLookup(detector=3, timespan=Timespan(t1, t4), expected=bias3a) 

2252 assertLookup(detector=3, timespan=Timespan(t1, t5), expected=bias3a) 

2253 assertLookup(detector=3, timespan=Timespan(t1, None), expected=Ambiguous) 

2254 assertLookup(detector=3, timespan=Timespan(t2, t3), expected=bias3a) 

2255 assertLookup(detector=3, timespan=Timespan(t2, t4), expected=bias3a) 

2256 assertLookup(detector=3, timespan=Timespan(t2, t5), expected=bias3a) 

2257 assertLookup(detector=3, timespan=Timespan(t2, None), expected=Ambiguous) 

2258 assertLookup(detector=3, timespan=Timespan(t3, t4), expected=None) 

2259 assertLookup(detector=3, timespan=Timespan(t3, t5), expected=None) 

2260 assertLookup(detector=3, timespan=Timespan(t3, None), expected=bias3b) 

2261 assertLookup(detector=3, timespan=Timespan(t4, t5), expected=None) 

2262 assertLookup(detector=3, timespan=Timespan(t4, None), expected=bias3b) 

2263 assertLookup(detector=3, timespan=Timespan(t5, None), expected=bias3b) 

2264 

2265 # Decertify everything, this time with explicit data IDs, then check 

2266 # that no lookups succeed. 

2267 registry.decertify( 

2268 collection, 

2269 "bias", 

2270 Timespan(None, None), 

2271 dataIds=[ 

2272 dict(instrument="Cam1", detector=2), 

2273 dict(instrument="Cam1", detector=3), 

2274 ], 

2275 ) 

2276 for detector in (2, 3): 

2277 for timespan in allTimespans: 

2278 assertLookup(detector=detector, timespan=timespan, expected=None) 

2279 # Certify bias2a and bias3a over (-∞, ∞), check that all lookups return 

2280 # those. 

2281 registry.certify( 

2282 collection, 

2283 [bias2a, bias3a], 

2284 Timespan(None, None), 

2285 ) 

2286 for timespan in allTimespans: 

2287 assertLookup(detector=2, timespan=timespan, expected=bias2a) 

2288 assertLookup(detector=3, timespan=timespan, expected=bias3a) 

2289 # Decertify just bias2 over [t2, t4). 

2290 # This should split a single certification row into two (and leave the 

2291 # other existing row, for bias3a, alone). 

2292 registry.decertify( 

2293 collection, "bias", Timespan(t2, t4), dataIds=[dict(instrument="Cam1", detector=2)] 

2294 ) 

2295 for timespan in allTimespans: 

2296 assertLookup(detector=3, timespan=timespan, expected=bias3a) 

2297 overlapsBefore = timespan.overlaps(Timespan(None, t2)) 

2298 overlapsAfter = timespan.overlaps(Timespan(t4, None)) 

2299 if overlapsBefore and overlapsAfter: 

2300 expected = Ambiguous 

2301 elif overlapsBefore or overlapsAfter: 

2302 expected = bias2a 

2303 else: 

2304 expected = None 

2305 assertLookup(detector=2, timespan=timespan, expected=expected) 

2306 

2307 def testSkipCalibs(self): 

2308 """Test how queries handle skipping of calibration collections.""" 

2309 registry = self.makeRegistry() 

2310 self.loadData(registry, "base.yaml") 

2311 self.loadData(registry, "datasets.yaml") 

2312 

2313 coll_calib = "Cam1/calibs/default" 

2314 registry.registerCollection(coll_calib, type=CollectionType.CALIBRATION) 

2315 

2316 # Add all biases to the calibration collection. 

2317 # Without this, the logic that prunes dataset subqueries based on 

2318 # datasetType-collection summary information will fire before the logic 

2319 # we want to test below. This is a good thing (it avoids the dreaded 

2320 # NotImplementedError a bit more often) everywhere but here. 

2321 registry.certify(coll_calib, registry.queryDatasets("bias", collections=...), Timespan(None, None)) 

2322 

2323 coll_list = [coll_calib, "imported_g", "imported_r"] 

2324 chain = "Cam1/chain" 

2325 registry.registerCollection(chain, type=CollectionType.CHAINED) 

2326 registry.setCollectionChain(chain, coll_list) 

2327 

2328 # explicit list will raise if findFirst=True or there are temporal 

2329 # dimensions 

2330 with self.assertRaises(NotImplementedError): 

2331 registry.queryDatasets("bias", collections=coll_list, findFirst=True) 

2332 with self.assertRaises(NotImplementedError): 

2333 registry.queryDataIds( 

2334 ["instrument", "detector", "exposure"], datasets="bias", collections=coll_list 

2335 ).count() 

2336 

2337 # chain will skip 

2338 datasets = list(registry.queryDatasets("bias", collections=chain)) 

2339 self.assertGreater(len(datasets), 0) 

2340 

2341 dataIds = list(registry.queryDataIds(["instrument", "detector"], datasets="bias", collections=chain)) 

2342 self.assertGreater(len(dataIds), 0) 

2343 

2344 # glob will skip too 

2345 datasets = list(registry.queryDatasets("bias", collections="*d*")) 

2346 self.assertGreater(len(datasets), 0) 

2347 

2348 # regular expression will skip too 

2349 pattern = re.compile(".*") 

2350 datasets = list(registry.queryDatasets("bias", collections=pattern)) 

2351 self.assertGreater(len(datasets), 0) 

2352 

2353 # ellipsis should work as usual 

2354 datasets = list(registry.queryDatasets("bias", collections=...)) 

2355 self.assertGreater(len(datasets), 0) 

2356 

2357 # few tests with findFirst 

2358 datasets = list(registry.queryDatasets("bias", collections=chain, findFirst=True)) 

2359 self.assertGreater(len(datasets), 0) 

2360 

2361 def testIngestTimeQuery(self): 

2362 registry = self.makeRegistry() 

2363 self.loadData(registry, "base.yaml") 

2364 dt0 = datetime.utcnow() 

2365 self.loadData(registry, "datasets.yaml") 

2366 dt1 = datetime.utcnow() 

2367 

2368 datasets = list(registry.queryDatasets(..., collections=...)) 

2369 len0 = len(datasets) 

2370 self.assertGreater(len0, 0) 

2371 

2372 where = "ingest_date > T'2000-01-01'" 

2373 datasets = list(registry.queryDatasets(..., collections=..., where=where)) 

2374 len1 = len(datasets) 

2375 self.assertEqual(len0, len1) 

2376 

2377 # no one will ever use this piece of software in 30 years 

2378 where = "ingest_date > T'2050-01-01'" 

2379 datasets = list(registry.queryDatasets(..., collections=..., where=where)) 

2380 len2 = len(datasets) 

2381 self.assertEqual(len2, 0) 

2382 

2383 # Check more exact timing to make sure there is no 37 seconds offset 

2384 # (after fixing DM-30124). SQLite time precision is 1 second, make 

2385 # sure that we don't test with higher precision. 

2386 tests = [ 

2387 # format: (timestamp, operator, expected_len) 

2388 (dt0 - timedelta(seconds=1), ">", len0), 

2389 (dt0 - timedelta(seconds=1), "<", 0), 

2390 (dt1 + timedelta(seconds=1), "<", len0), 

2391 (dt1 + timedelta(seconds=1), ">", 0), 

2392 ] 

2393 for dt, op, expect_len in tests: 

2394 dt_str = dt.isoformat(sep=" ") 

2395 

2396 where = f"ingest_date {op} T'{dt_str}'" 

2397 datasets = list(registry.queryDatasets(..., collections=..., where=where)) 

2398 self.assertEqual(len(datasets), expect_len) 

2399 

2400 # same with bind using datetime or astropy Time 

2401 where = f"ingest_date {op} ingest_time" 

2402 datasets = list( 

2403 registry.queryDatasets(..., collections=..., where=where, bind={"ingest_time": dt}) 

2404 ) 

2405 self.assertEqual(len(datasets), expect_len) 

2406 

2407 dt_astropy = astropy.time.Time(dt, format="datetime") 

2408 datasets = list( 

2409 registry.queryDatasets(..., collections=..., where=where, bind={"ingest_time": dt_astropy}) 

2410 ) 

2411 self.assertEqual(len(datasets), expect_len) 

2412 

2413 def testTimespanQueries(self): 

2414 """Test query expressions involving timespans.""" 

2415 registry = self.makeRegistry() 

2416 self.loadData(registry, "hsc-rc2-subset.yaml") 

2417 # All exposures in the database; mapping from ID to timespan. 

2418 visits = {record.id: record.timespan for record in registry.queryDimensionRecords("visit")} 

2419 # Just those IDs, sorted (which is also temporal sorting, because HSC 

2420 # exposure IDs are monotonically increasing). 

2421 ids = sorted(visits.keys()) 

2422 self.assertGreater(len(ids), 20) 

2423 # Pick some quasi-random indexes into `ids` to play with. 

2424 i1 = int(len(ids) * 0.1) 

2425 i2 = int(len(ids) * 0.3) 

2426 i3 = int(len(ids) * 0.6) 

2427 i4 = int(len(ids) * 0.8) 

2428 # Extract some times from those: just before the beginning of i1 (which 

2429 # should be after the end of the exposure before), exactly the 

2430 # beginning of i2, just after the beginning of i3 (and before its end), 

2431 # and the exact end of i4. 

2432 t1 = visits[ids[i1]].begin - astropy.time.TimeDelta(1.0, format="sec") 

2433 self.assertGreater(t1, visits[ids[i1 - 1]].end) 

2434 t2 = visits[ids[i2]].begin 

2435 t3 = visits[ids[i3]].begin + astropy.time.TimeDelta(1.0, format="sec") 

2436 self.assertLess(t3, visits[ids[i3]].end) 

2437 t4 = visits[ids[i4]].end 

2438 # Make sure those are actually in order. 

2439 self.assertEqual([t1, t2, t3, t4], sorted([t4, t3, t2, t1])) 

2440 

2441 bind = { 

2442 "t1": t1, 

2443 "t2": t2, 

2444 "t3": t3, 

2445 "t4": t4, 

2446 "ts23": Timespan(t2, t3), 

2447 } 

2448 

2449 def query(where): 

2450 """Helper function that queries for visit data IDs and returns 

2451 results as a sorted, deduplicated list of visit IDs. 

2452 """ 

2453 return sorted( 

2454 { 

2455 dataId["visit"] 

2456 for dataId in registry.queryDataIds("visit", instrument="HSC", bind=bind, where=where) 

2457 } 

2458 ) 

2459 

2460 # Try a bunch of timespan queries, mixing up the bounds themselves, 

2461 # where they appear in the expression, and how we get the timespan into 

2462 # the expression. 

2463 

2464 # t1 is before the start of i1, so this should not include i1. 

2465 self.assertEqual(ids[:i1], query("visit.timespan OVERLAPS (null, t1)")) 

2466 # t2 is exactly at the start of i2, but ends are exclusive, so these 

2467 # should not include i2. 

2468 self.assertEqual(ids[i1:i2], query("(t1, t2) OVERLAPS visit.timespan")) 

2469 self.assertEqual(ids[:i2], query("visit.timespan < (t2, t4)")) 

2470 # t3 is in the middle of i3, so this should include i3. 

2471 self.assertEqual(ids[i2 : i3 + 1], query("visit.timespan OVERLAPS ts23")) 

2472 # This one should not include t3 by the same reasoning. 

2473 self.assertEqual(ids[i3 + 1 :], query("visit.timespan > (t1, t3)")) 

2474 # t4 is exactly at the end of i4, so this should include i4. 

2475 self.assertEqual(ids[i3 : i4 + 1], query(f"visit.timespan OVERLAPS (T'{t3.tai.isot}', t4)")) 

2476 # i4's upper bound of t4 is exclusive so this should not include t4. 

2477 self.assertEqual(ids[i4 + 1 :], query("visit.timespan OVERLAPS (t4, NULL)")) 

2478 

2479 # Now some timespan vs. time scalar queries. 

2480 self.assertEqual(ids[:i2], query("visit.timespan < t2")) 

2481 self.assertEqual(ids[:i2], query("t2 > visit.timespan")) 

2482 self.assertEqual(ids[i3 + 1 :], query("visit.timespan > t3")) 

2483 self.assertEqual(ids[i3 + 1 :], query("t3 < visit.timespan")) 

2484 self.assertEqual(ids[i3 : i3 + 1], query("visit.timespan OVERLAPS t3")) 

2485 self.assertEqual(ids[i3 : i3 + 1], query(f"T'{t3.tai.isot}' OVERLAPS visit.timespan")) 

2486 

2487 # Empty timespans should not overlap anything. 

2488 self.assertEqual([], query("visit.timespan OVERLAPS (t3, t2)")) 

2489 

2490 def testCollectionSummaries(self): 

2491 """Test recording and retrieval of collection summaries.""" 

2492 self.maxDiff = None 

2493 registry = self.makeRegistry() 

2494 # Importing datasets from yaml should go through the code path where 

2495 # we update collection summaries as we insert datasets. 

2496 self.loadData(registry, "base.yaml") 

2497 self.loadData(registry, "datasets.yaml") 

2498 flat = registry.getDatasetType("flat") 

2499 expected1 = CollectionSummary() 

2500 expected1.dataset_types.add(registry.getDatasetType("bias")) 

2501 expected1.add_data_ids( 

2502 flat, [DataCoordinate.standardize(instrument="Cam1", universe=registry.dimensions)] 

2503 ) 

2504 self.assertEqual(registry.getCollectionSummary("imported_g"), expected1) 

2505 self.assertEqual(registry.getCollectionSummary("imported_r"), expected1) 

2506 # Create a chained collection with both of the imported runs; the 

2507 # summary should be the same, because it's a union with itself. 

2508 chain = "chain" 

2509 registry.registerCollection(chain, CollectionType.CHAINED) 

2510 registry.setCollectionChain(chain, ["imported_r", "imported_g"]) 

2511 self.assertEqual(registry.getCollectionSummary(chain), expected1) 

2512 # Associate flats only into a tagged collection and a calibration 

2513 # collection to check summaries of those. 

2514 tag = "tag" 

2515 registry.registerCollection(tag, CollectionType.TAGGED) 

2516 registry.associate(tag, registry.queryDatasets(flat, collections="imported_g")) 

2517 calibs = "calibs" 

2518 registry.registerCollection(calibs, CollectionType.CALIBRATION) 

2519 registry.certify( 

2520 calibs, registry.queryDatasets(flat, collections="imported_g"), timespan=Timespan(None, None) 

2521 ) 

2522 expected2 = expected1.copy() 

2523 expected2.dataset_types.discard("bias") 

2524 self.assertEqual(registry.getCollectionSummary(tag), expected2) 

2525 self.assertEqual(registry.getCollectionSummary(calibs), expected2) 

2526 # Explicitly calling Registry.refresh() should load those same 

2527 # summaries, via a totally different code path. 

2528 registry.refresh() 

2529 self.assertEqual(registry.getCollectionSummary("imported_g"), expected1) 

2530 self.assertEqual(registry.getCollectionSummary("imported_r"), expected1) 

2531 self.assertEqual(registry.getCollectionSummary(tag), expected2) 

2532 self.assertEqual(registry.getCollectionSummary(calibs), expected2) 

2533 

2534 def testBindInQueryDatasets(self): 

2535 """Test that the bind parameter is correctly forwarded in 

2536 queryDatasets recursion. 

2537 """ 

2538 registry = self.makeRegistry() 

2539 # Importing datasets from yaml should go through the code path where 

2540 # we update collection summaries as we insert datasets. 

2541 self.loadData(registry, "base.yaml") 

2542 self.loadData(registry, "datasets.yaml") 

2543 self.assertEqual( 

2544 set(registry.queryDatasets("flat", band="r", collections=...)), 

2545 set(registry.queryDatasets("flat", where="band=my_band", bind={"my_band": "r"}, collections=...)), 

2546 ) 

2547 

2548 def testQueryIntRangeExpressions(self): 

2549 """Test integer range expressions in ``where`` arguments. 

2550 

2551 Note that our expressions use inclusive stop values, unlike Python's. 

2552 """ 

2553 registry = self.makeRegistry() 

2554 self.loadData(registry, "base.yaml") 

2555 self.assertEqual( 

2556 set(registry.queryDataIds(["detector"], instrument="Cam1", where="detector IN (1..2)")), 

2557 {registry.expandDataId(instrument="Cam1", detector=n) for n in [1, 2]}, 

2558 ) 

2559 self.assertEqual( 

2560 set(registry.queryDataIds(["detector"], instrument="Cam1", where="detector IN (1..4:2)")), 

2561 {registry.expandDataId(instrument="Cam1", detector=n) for n in [1, 3]}, 

2562 ) 

2563 self.assertEqual( 

2564 set(registry.queryDataIds(["detector"], instrument="Cam1", where="detector IN (2..4:2)")), 

2565 {registry.expandDataId(instrument="Cam1", detector=n) for n in [2, 4]}, 

2566 ) 

2567 

2568 def testQueryResultSummaries(self): 

2569 """Test summary methods like `count`, `any`, and `explain_no_results` 

2570 on `DataCoordinateQueryResults` and `DatasetQueryResults` 

2571 """ 

2572 registry = self.makeRegistry() 

2573 self.loadData(registry, "base.yaml") 

2574 self.loadData(registry, "datasets.yaml") 

2575 self.loadData(registry, "spatial.yaml") 

2576 # Default test dataset has two collections, each with both flats and 

2577 # biases. Add a new collection with only biases. 

2578 registry.registerCollection("biases", CollectionType.TAGGED) 

2579 registry.associate("biases", registry.queryDatasets("bias", collections=["imported_g"])) 

2580 # First query yields two results, and involves no postprocessing. 

2581 query1 = registry.queryDataIds(["physical_filter"], band="r") 

2582 self.assertTrue(query1.any(execute=False, exact=False)) 

2583 self.assertTrue(query1.any(execute=True, exact=False)) 

2584 self.assertTrue(query1.any(execute=True, exact=True)) 

2585 self.assertEqual(query1.count(exact=False), 2) 

2586 self.assertEqual(query1.count(exact=True), 2) 

2587 self.assertFalse(list(query1.explain_no_results())) 

2588 # Second query should yield no results, which we should see when 

2589 # we attempt to expand the data ID. 

2590 query2 = registry.queryDataIds(["physical_filter"], band="h") 

2591 # There's no execute=False, exact=Fals test here because the behavior 

2592 # not something we want to guarantee in this case (and exact=False 

2593 # says either answer is legal). 

2594 self.assertFalse(query2.any(execute=True, exact=False)) 

2595 self.assertFalse(query2.any(execute=True, exact=True)) 

2596 self.assertEqual(query2.count(exact=False), 0) 

2597 self.assertEqual(query2.count(exact=True), 0) 

2598 self.assertTrue(list(query2.explain_no_results())) 

2599 # These queries yield no results due to various problems that can be 

2600 # spotted prior to execution, yielding helpful diagnostics. 

2601 base_query = registry.queryDataIds(["detector", "physical_filter"]) 

2602 queries_and_snippets = [ 

2603 ( 

2604 # Dataset type name doesn't match any existing dataset types. 

2605 registry.queryDatasets("nonexistent", collections=...), 

2606 ["nonexistent"], 

2607 ), 

2608 ( 

2609 # Dataset type object isn't registered. 

2610 registry.queryDatasets( 

2611 DatasetType( 

2612 "nonexistent", 

2613 dimensions=["instrument"], 

2614 universe=registry.dimensions, 

2615 storageClass="Image", 

2616 ), 

2617 collections=..., 

2618 ), 

2619 ["nonexistent"], 

2620 ), 

2621 ( 

2622 # No datasets of this type in this collection. 

2623 registry.queryDatasets("flat", collections=["biases"]), 

2624 ["flat", "biases"], 

2625 ), 

2626 ( 

2627 # No datasets of this type in this collection. 

2628 base_query.findDatasets("flat", collections=["biases"]), 

2629 ["flat", "biases"], 

2630 ), 

2631 ( 

2632 # No collections matching at all. 

2633 registry.queryDatasets("flat", collections=re.compile("potato.+")), 

2634 ["potato"], 

2635 ), 

2636 ] 

2637 # The behavior of these additional queries is slated to change in the 

2638 # future, so we also check for deprecation warnings. 

2639 with self.assertWarns(FutureWarning): 

2640 queries_and_snippets.append( 

2641 ( 

2642 # Dataset type name doesn't match any existing dataset 

2643 # types. 

2644 registry.queryDataIds(["detector"], datasets=["nonexistent"], collections=...), 

2645 ["nonexistent"], 

2646 ) 

2647 ) 

2648 with self.assertWarns(FutureWarning): 

2649 queries_and_snippets.append( 

2650 ( 

2651 # Dataset type name doesn't match any existing dataset 

2652 # types. 

2653 registry.queryDimensionRecords("detector", datasets=["nonexistent"], collections=...), 

2654 ["nonexistent"], 

2655 ) 

2656 ) 

2657 for query, snippets in queries_and_snippets: 

2658 self.assertFalse(query.any(execute=False, exact=False)) 

2659 self.assertFalse(query.any(execute=True, exact=False)) 

2660 self.assertFalse(query.any(execute=True, exact=True)) 

2661 self.assertEqual(query.count(exact=False), 0) 

2662 self.assertEqual(query.count(exact=True), 0) 

2663 messages = list(query.explain_no_results()) 

2664 self.assertTrue(messages) 

2665 # Want all expected snippets to appear in at least one message. 

2666 self.assertTrue( 

2667 any( 

2668 all(snippet in message for snippet in snippets) for message in query.explain_no_results() 

2669 ), 

2670 messages, 

2671 ) 

2672 

2673 # This query does yield results, but should also emit a warning because 

2674 # dataset type patterns to queryDataIds is deprecated; just look for 

2675 # the warning. 

2676 with self.assertWarns(FutureWarning): 

2677 registry.queryDataIds(["detector"], datasets=re.compile("^nonexistent$"), collections=...) 

2678 

2679 # These queries yield no results due to problems that can be identified 

2680 # by cheap follow-up queries, yielding helpful diagnostics. 

2681 for query, snippets in [ 

2682 ( 

2683 # No records for one of the involved dimensions. 

2684 registry.queryDataIds(["subfilter"]), 

2685 ["no rows", "subfilter"], 

2686 ), 

2687 ( 

2688 # No records for one of the involved dimensions. 

2689 registry.queryDimensionRecords("subfilter"), 

2690 ["no rows", "subfilter"], 

2691 ), 

2692 ]: 

2693 self.assertFalse(query.any(execute=True, exact=False)) 

2694 self.assertFalse(query.any(execute=True, exact=True)) 

2695 self.assertEqual(query.count(exact=True), 0) 

2696 messages = list(query.explain_no_results()) 

2697 self.assertTrue(messages) 

2698 # Want all expected snippets to appear in at least one message. 

2699 self.assertTrue( 

2700 any( 

2701 all(snippet in message for snippet in snippets) for message in query.explain_no_results() 

2702 ), 

2703 messages, 

2704 ) 

2705 

2706 # This query yields four overlaps in the database, but one is filtered 

2707 # out in postprocessing. The count queries aren't accurate because 

2708 # they don't account for duplication that happens due to an internal 

2709 # join against commonSkyPix. 

2710 query3 = registry.queryDataIds(["visit", "tract"], instrument="Cam1", skymap="SkyMap1") 

2711 self.assertEqual( 

2712 { 

2713 DataCoordinate.standardize( 

2714 instrument="Cam1", 

2715 skymap="SkyMap1", 

2716 visit=v, 

2717 tract=t, 

2718 universe=registry.dimensions, 

2719 ) 

2720 for v, t in [(1, 0), (2, 0), (2, 1)] 

2721 }, 

2722 set(query3), 

2723 ) 

2724 self.assertTrue(query3.any(execute=False, exact=False)) 

2725 self.assertTrue(query3.any(execute=True, exact=False)) 

2726 self.assertTrue(query3.any(execute=True, exact=True)) 

2727 self.assertGreaterEqual(query3.count(exact=False), 4) 

2728 self.assertGreaterEqual(query3.count(exact=True, discard=True), 3) 

2729 self.assertFalse(list(query3.explain_no_results())) 

2730 # This query yields overlaps in the database, but all are filtered 

2731 # out in postprocessing. The count queries again aren't very useful. 

2732 # We have to use `where=` here to avoid an optimization that 

2733 # (currently) skips the spatial postprocess-filtering because it 

2734 # recognizes that no spatial join is necessary. That's not ideal, but 

2735 # fixing it is out of scope for this ticket. 

2736 query4 = registry.queryDataIds( 

2737 ["visit", "tract"], 

2738 instrument="Cam1", 

2739 skymap="SkyMap1", 

2740 where="visit=1 AND detector=1 AND tract=0 AND patch=4", 

2741 ) 

2742 self.assertFalse(set(query4)) 

2743 self.assertTrue(query4.any(execute=False, exact=False)) 

2744 self.assertTrue(query4.any(execute=True, exact=False)) 

2745 self.assertFalse(query4.any(execute=True, exact=True)) 

2746 self.assertGreaterEqual(query4.count(exact=False), 1) 

2747 self.assertEqual(query4.count(exact=True, discard=True), 0) 

2748 messages = query4.explain_no_results() 

2749 self.assertTrue(messages) 

2750 self.assertTrue(any("overlap" in message for message in messages)) 

2751 # This query should yield results from one dataset type but not the 

2752 # other, which is not registered. 

2753 query5 = registry.queryDatasets(["bias", "nonexistent"], collections=["biases"]) 

2754 self.assertTrue(set(query5)) 

2755 self.assertTrue(query5.any(execute=False, exact=False)) 

2756 self.assertTrue(query5.any(execute=True, exact=False)) 

2757 self.assertTrue(query5.any(execute=True, exact=True)) 

2758 self.assertGreaterEqual(query5.count(exact=False), 1) 

2759 self.assertGreaterEqual(query5.count(exact=True), 1) 

2760 self.assertFalse(list(query5.explain_no_results())) 

2761 # This query applies a selection that yields no results, fully in the 

2762 # database. Explaining why it fails involves traversing the relation 

2763 # tree and running a LIMIT 1 query at each level that has the potential 

2764 # to remove rows. 

2765 query6 = registry.queryDimensionRecords( 

2766 "detector", where="detector.purpose = 'no-purpose'", instrument="Cam1" 

2767 ) 

2768 self.assertEqual(query6.count(exact=True), 0) 

2769 messages = query6.explain_no_results() 

2770 self.assertTrue(messages) 

2771 self.assertTrue(any("no-purpose" in message for message in messages)) 

2772 

2773 def testQueryDataIdsOrderBy(self): 

2774 """Test order_by and limit on result returned by queryDataIds().""" 

2775 registry = self.makeRegistry() 

2776 self.loadData(registry, "base.yaml") 

2777 self.loadData(registry, "datasets.yaml") 

2778 self.loadData(registry, "spatial.yaml") 

2779 

2780 def do_query(dimensions=("visit", "tract"), datasets=None, collections=None): 

2781 return registry.queryDataIds( 

2782 dimensions, datasets=datasets, collections=collections, instrument="Cam1", skymap="SkyMap1" 

2783 ) 

2784 

2785 Test = namedtuple( 

2786 "testQueryDataIdsOrderByTest", 

2787 ("order_by", "keys", "result", "limit", "datasets", "collections"), 

2788 defaults=(None, None, None), 

2789 ) 

2790 

2791 test_data = ( 

2792 Test("tract,visit", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))), 

2793 Test("-tract,visit", "tract,visit", ((1, 2), (1, 2), (0, 1), (0, 1), (0, 2), (0, 2))), 

2794 Test("tract,-visit", "tract,visit", ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2))), 

2795 Test("-tract,-visit", "tract,visit", ((1, 2), (1, 2), (0, 2), (0, 2), (0, 1), (0, 1))), 

2796 Test( 

2797 "tract.id,visit.id", 

2798 "tract,visit", 

2799 ((0, 1), (0, 1), (0, 2)), 

2800 limit=(3,), 

2801 ), 

2802 Test("-tract,-visit", "tract,visit", ((1, 2), (1, 2), (0, 2)), limit=(3,)), 

2803 Test("tract,visit", "tract,visit", ((0, 2), (1, 2), (1, 2)), limit=(3, 3)), 

2804 Test("-tract,-visit", "tract,visit", ((0, 1),), limit=(3, 5)), 

2805 Test( 

2806 "tract,visit.exposure_time", "tract,visit", ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2)) 

2807 ), 

2808 Test( 

2809 "-tract,-visit.exposure_time", "tract,visit", ((1, 2), (1, 2), (0, 1), (0, 1), (0, 2), (0, 2)) 

2810 ), 

2811 Test("tract,-exposure_time", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))), 

2812 Test("tract,visit.name", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))), 

2813 Test( 

2814 "tract,-timespan.begin,timespan.end", 

2815 "tract,visit", 

2816 ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2)), 

2817 ), 

2818 Test("visit.day_obs,exposure.day_obs", "visit,exposure", ()), 

2819 Test("visit.timespan.begin,-exposure.timespan.begin", "visit,exposure", ()), 

2820 Test( 

2821 "tract,detector", 

2822 "tract,detector", 

2823 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)), 

2824 datasets="flat", 

2825 collections="imported_r", 

2826 ), 

2827 Test( 

2828 "tract,detector.full_name", 

2829 "tract,detector", 

2830 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)), 

2831 datasets="flat", 

2832 collections="imported_r", 

2833 ), 

2834 Test( 

2835 "tract,detector.raft,detector.name_in_raft", 

2836 "tract,detector", 

2837 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)), 

2838 datasets="flat", 

2839 collections="imported_r", 

2840 ), 

2841 ) 

2842 

2843 for test in test_data: 

2844 order_by = test.order_by.split(",") 

2845 keys = test.keys.split(",") 

2846 query = do_query(keys, test.datasets, test.collections).order_by(*order_by) 

2847 if test.limit is not None: 

2848 query = query.limit(*test.limit) 

2849 dataIds = tuple(tuple(dataId[k] for k in keys) for dataId in query) 

2850 self.assertEqual(dataIds, test.result) 

2851 

2852 # and materialize 

2853 query = do_query(keys).order_by(*order_by) 

2854 if test.limit is not None: 

2855 query = query.limit(*test.limit) 

2856 with self.assertRaises(RelationalAlgebraError): 

2857 with query.materialize(): 

2858 pass 

2859 

2860 # errors in a name 

2861 for order_by in ("", "-"): 

2862 with self.assertRaisesRegex(ValueError, "Empty dimension name in ORDER BY"): 

2863 list(do_query().order_by(order_by)) 

2864 

2865 for order_by in ("undimension.name", "-undimension.name"): 

2866 with self.assertRaisesRegex(ValueError, "Unknown dimension element name 'undimension'"): 

2867 list(do_query().order_by(order_by)) 

2868 

2869 for order_by in ("attract", "-attract"): 

2870 with self.assertRaisesRegex(ValueError, "Metadata 'attract' cannot be found in any dimension"): 

2871 list(do_query().order_by(order_by)) 

2872 

2873 with self.assertRaisesRegex(ValueError, "Metadata 'exposure_time' exists in more than one dimension"): 

2874 list(do_query(("exposure", "visit")).order_by("exposure_time")) 

2875 

2876 with self.assertRaisesRegex(ValueError, "Timespan exists in more than one dimesion"): 

2877 list(do_query(("exposure", "visit")).order_by("timespan.begin")) 

2878 

2879 with self.assertRaisesRegex( 

2880 ValueError, "Cannot find any temporal dimension element for 'timespan.begin'" 

2881 ): 

2882 list(do_query("tract").order_by("timespan.begin")) 

2883 

2884 with self.assertRaisesRegex(ValueError, "Cannot use 'timespan.begin' with non-temporal element"): 

2885 list(do_query("tract").order_by("tract.timespan.begin")) 

2886 

2887 with self.assertRaisesRegex(ValueError, "Field 'name' does not exist in 'tract'."): 

2888 list(do_query("tract").order_by("tract.name")) 

2889 

2890 def testQueryDataIdsGovernorExceptions(self): 

2891 """Test exceptions raised by queryDataIds() for incorrect governors.""" 

2892 registry = self.makeRegistry() 

2893 self.loadData(registry, "base.yaml") 

2894 self.loadData(registry, "datasets.yaml") 

2895 self.loadData(registry, "spatial.yaml") 

2896 

2897 def do_query(dimensions, dataId=None, where="", bind=None, **kwargs): 

2898 return registry.queryDataIds(dimensions, dataId=dataId, where=where, bind=bind, **kwargs) 

2899 

2900 Test = namedtuple( 

2901 "testQueryDataIdExceptionsTest", 

2902 ("dimensions", "dataId", "where", "bind", "kwargs", "exception", "count"), 

2903 defaults=(None, None, None, {}, None, 0), 

2904 ) 

2905 

2906 test_data = ( 

2907 Test("tract,visit", count=6), 

2908 Test("tract,visit", kwargs={"instrument": "Cam1", "skymap": "SkyMap1"}, count=6), 

2909 Test( 

2910 "tract,visit", kwargs={"instrument": "Cam2", "skymap": "SkyMap1"}, exception=DataIdValueError 

2911 ), 

2912 Test("tract,visit", dataId={"instrument": "Cam1", "skymap": "SkyMap1"}, count=6), 

2913 Test( 

2914 "tract,visit", dataId={"instrument": "Cam1", "skymap": "SkyMap2"}, exception=DataIdValueError 

2915 ), 

2916 Test("tract,visit", where="instrument='Cam1' AND skymap='SkyMap1'", count=6), 

2917 Test("tract,visit", where="instrument='Cam1' AND skymap='SkyMap5'", exception=DataIdValueError), 

2918 Test( 

2919 "tract,visit", 

2920 where="instrument=cam AND skymap=map", 

2921 bind={"cam": "Cam1", "map": "SkyMap1"}, 

2922 count=6, 

2923 ), 

2924 Test( 

2925 "tract,visit", 

2926 where="instrument=cam AND skymap=map", 

2927 bind={"cam": "Cam", "map": "SkyMap"}, 

2928 exception=DataIdValueError, 

2929 ), 

2930 ) 

2931 

2932 for test in test_data: 

2933 dimensions = test.dimensions.split(",") 

2934 if test.exception: 

2935 with self.assertRaises(test.exception): 

2936 do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs).count() 

2937 else: 

2938 query = do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs) 

2939 self.assertEqual(query.count(discard=True), test.count) 

2940 

2941 # and materialize 

2942 if test.exception: 

2943 with self.assertRaises(test.exception): 

2944 query = do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs) 

2945 with query.materialize() as materialized: 

2946 materialized.count(discard=True) 

2947 else: 

2948 query = do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs) 

2949 with query.materialize() as materialized: 

2950 self.assertEqual(materialized.count(discard=True), test.count) 

2951 

2952 def testQueryDimensionRecordsOrderBy(self): 

2953 """Test order_by and limit on result returned by 

2954 queryDimensionRecords(). 

2955 """ 

2956 registry = self.makeRegistry() 

2957 self.loadData(registry, "base.yaml") 

2958 self.loadData(registry, "datasets.yaml") 

2959 self.loadData(registry, "spatial.yaml") 

2960 

2961 def do_query(element, datasets=None, collections=None): 

2962 return registry.queryDimensionRecords( 

2963 element, instrument="Cam1", datasets=datasets, collections=collections 

2964 ) 

2965 

2966 query = do_query("detector") 

2967 self.assertEqual(len(list(query)), 4) 

2968 

2969 Test = namedtuple( 

2970 "testQueryDataIdsOrderByTest", 

2971 ("element", "order_by", "result", "limit", "datasets", "collections"), 

2972 defaults=(None, None, None), 

2973 ) 

2974 

2975 test_data = ( 

2976 Test("detector", "detector", (1, 2, 3, 4)), 

2977 Test("detector", "-detector", (4, 3, 2, 1)), 

2978 Test("detector", "raft,-name_in_raft", (2, 1, 4, 3)), 

2979 Test("detector", "-detector.purpose", (4,), limit=(1,)), 

2980 Test("detector", "-purpose,detector.raft,name_in_raft", (2, 3), limit=(2, 2)), 

2981 Test("visit", "visit", (1, 2)), 

2982 Test("visit", "-visit.id", (2, 1)), 

2983 Test("visit", "zenith_angle", (1, 2)), 

2984 Test("visit", "-visit.name", (2, 1)), 

2985 Test("visit", "day_obs,-timespan.begin", (2, 1)), 

2986 ) 

2987 

2988 for test in test_data: 

2989 order_by = test.order_by.split(",") 

2990 query = do_query(test.element).order_by(*order_by) 

2991 if test.limit is not None: 

2992 query = query.limit(*test.limit) 

2993 dataIds = tuple(rec.id for rec in query) 

2994 self.assertEqual(dataIds, test.result) 

2995 

2996 # errors in a name 

2997 for order_by in ("", "-"): 

2998 with self.assertRaisesRegex(ValueError, "Empty dimension name in ORDER BY"): 

2999 list(do_query("detector").order_by(order_by)) 

3000 

3001 for order_by in ("undimension.name", "-undimension.name"): 

3002 with self.assertRaisesRegex(ValueError, "Element name mismatch: 'undimension'"): 

3003 list(do_query("detector").order_by(order_by)) 

3004 

3005 for order_by in ("attract", "-attract"): 

3006 with self.assertRaisesRegex(ValueError, "Field 'attract' does not exist in 'detector'."): 

3007 list(do_query("detector").order_by(order_by)) 

3008 

3009 def testQueryDimensionRecordsExceptions(self): 

3010 """Test exceptions raised by queryDimensionRecords().""" 

3011 registry = self.makeRegistry() 

3012 self.loadData(registry, "base.yaml") 

3013 self.loadData(registry, "datasets.yaml") 

3014 self.loadData(registry, "spatial.yaml") 

3015 

3016 result = registry.queryDimensionRecords("detector") 

3017 self.assertEqual(result.count(), 4) 

3018 result = registry.queryDimensionRecords("detector", instrument="Cam1") 

3019 self.assertEqual(result.count(), 4) 

3020 result = registry.queryDimensionRecords("detector", dataId={"instrument": "Cam1"}) 

3021 self.assertEqual(result.count(), 4) 

3022 result = registry.queryDimensionRecords("detector", where="instrument='Cam1'") 

3023 self.assertEqual(result.count(), 4) 

3024 result = registry.queryDimensionRecords("detector", where="instrument=instr", bind={"instr": "Cam1"}) 

3025 self.assertEqual(result.count(), 4) 

3026 

3027 with self.assertRaisesRegex(DataIdValueError, "dimension instrument"): 

3028 result = registry.queryDimensionRecords("detector", instrument="NotCam1") 

3029 result.count() 

3030 

3031 with self.assertRaisesRegex(DataIdValueError, "dimension instrument"): 

3032 result = registry.queryDimensionRecords("detector", dataId={"instrument": "NotCam1"}) 

3033 result.count() 

3034 

3035 with self.assertRaisesRegex(DataIdValueError, "Unknown values specified for governor dimension"): 

3036 result = registry.queryDimensionRecords("detector", where="instrument='NotCam1'") 

3037 result.count() 

3038 

3039 with self.assertRaisesRegex(DataIdValueError, "Unknown values specified for governor dimension"): 

3040 result = registry.queryDimensionRecords( 

3041 "detector", where="instrument=instr", bind={"instr": "NotCam1"} 

3042 ) 

3043 result.count() 

3044 

3045 def testDatasetConstrainedDimensionRecordQueries(self): 

3046 """Test that queryDimensionRecords works even when given a dataset 

3047 constraint whose dimensions extend beyond the requested dimension 

3048 element's. 

3049 """ 

3050 registry = self.makeRegistry() 

3051 self.loadData(registry, "base.yaml") 

3052 self.loadData(registry, "datasets.yaml") 

3053 # Query for physical_filter dimension records, using a dataset that 

3054 # has both physical_filter and dataset dimensions. 

3055 records = registry.queryDimensionRecords( 

3056 "physical_filter", 

3057 datasets=["flat"], 

3058 collections="imported_r", 

3059 ) 

3060 self.assertEqual({record.name for record in records}, {"Cam1-R1", "Cam1-R2"}) 

3061 # Trying to constrain by all dataset types is an error. 

3062 with self.assertRaises(TypeError): 

3063 list(registry.queryDimensionRecords("physical_filter", datasets=..., collections="imported_r")) 

3064 

3065 def testSkyPixDatasetQueries(self): 

3066 """Test that we can build queries involving skypix dimensions as long 

3067 as a dataset type that uses those dimensions is included. 

3068 """ 

3069 registry = self.makeRegistry() 

3070 self.loadData(registry, "base.yaml") 

3071 dataset_type = DatasetType( 

3072 "a", dimensions=["htm7", "instrument"], universe=registry.dimensions, storageClass="int" 

3073 ) 

3074 registry.registerDatasetType(dataset_type) 

3075 run = "r" 

3076 registry.registerRun(run) 

3077 # First try queries where there are no datasets; the concern is whether 

3078 # we can even build and execute these queries without raising, even 

3079 # when "doomed" query shortcuts are in play. 

3080 self.assertFalse( 

3081 list(registry.queryDataIds(["htm7", "instrument"], datasets=dataset_type, collections=run)) 

3082 ) 

3083 self.assertFalse(list(registry.queryDatasets(dataset_type, collections=run))) 

3084 # Now add a dataset and see that we can get it back. 

3085 htm7 = registry.dimensions.skypix["htm"][7].pixelization 

3086 data_id = registry.expandDataId(instrument="Cam1", htm7=htm7.universe()[0][0]) 

3087 (ref,) = registry.insertDatasets(dataset_type, [data_id], run=run) 

3088 self.assertEqual( 

3089 set(registry.queryDataIds(["htm7", "instrument"], datasets=dataset_type, collections=run)), 

3090 {data_id}, 

3091 ) 

3092 self.assertEqual(set(registry.queryDatasets(dataset_type, collections=run)), {ref}) 

3093 

3094 def testDatasetIdFactory(self): 

3095 """Simple test for DatasetIdFactory, mostly to catch potential changes 

3096 in its API. 

3097 """ 

3098 registry = self.makeRegistry() 

3099 factory = registry.datasetIdFactory 

3100 dataset_type = DatasetType( 

3101 "datasetType", 

3102 dimensions=["detector", "instrument"], 

3103 universe=registry.dimensions, 

3104 storageClass="int", 

3105 ) 

3106 run = "run" 

3107 data_id = DataCoordinate.standardize(instrument="Cam1", detector=1, graph=dataset_type.dimensions) 

3108 

3109 datasetId = factory.makeDatasetId(run, dataset_type, data_id, DatasetIdGenEnum.UNIQUE) 

3110 self.assertIsInstance(datasetId, uuid.UUID) 

3111 self.assertEqual(datasetId.version, 4) 

3112 

3113 datasetId = factory.makeDatasetId(run, dataset_type, data_id, DatasetIdGenEnum.DATAID_TYPE) 

3114 self.assertIsInstance(datasetId, uuid.UUID) 

3115 self.assertEqual(datasetId.version, 5) 

3116 

3117 datasetId = factory.makeDatasetId(run, dataset_type, data_id, DatasetIdGenEnum.DATAID_TYPE_RUN) 

3118 self.assertIsInstance(datasetId, uuid.UUID) 

3119 self.assertEqual(datasetId.version, 5) 

3120 

3121 def testExposureQueries(self): 

3122 """Test query methods using arguments sourced from the exposure log 

3123 service. 

3124 

3125 The most complete test dataset currently available to daf_butler tests 

3126 is hsc-rc2-subset.yaml export (which is unfortunately distinct from the 

3127 the lsst/rc2_subset GitHub repo), but that does not have 'exposure' 

3128 dimension records as it was focused on providing nontrivial spatial 

3129 overlaps between visit+detector and tract+patch. So in this test we 

3130 need to translate queries that originally used the exposure dimension 

3131 to use the (very similar) visit dimension instead. 

3132 """ 

3133 registry = self.makeRegistry() 

3134 self.loadData(registry, "hsc-rc2-subset.yaml") 

3135 self.assertEqual( 

3136 [ 

3137 record.id 

3138 for record in registry.queryDimensionRecords("visit", instrument="HSC") 

3139 .order_by("id") 

3140 .limit(5) 

3141 ], 

3142 [318, 322, 326, 330, 332], 

3143 ) 

3144 self.assertEqual( 

3145 [ 

3146 data_id["visit"] 

3147 for data_id in registry.queryDataIds(["visit"], instrument="HSC").order_by("id").limit(5) 

3148 ], 

3149 [318, 322, 326, 330, 332], 

3150 ) 

3151 self.assertEqual( 

3152 [ 

3153 record.id 

3154 for record in registry.queryDimensionRecords("detector", instrument="HSC") 

3155 .order_by("full_name") 

3156 .limit(5) 

3157 ], 

3158 [73, 72, 71, 70, 65], 

3159 ) 

3160 self.assertEqual( 

3161 [ 

3162 data_id["detector"] 

3163 for data_id in registry.queryDataIds(["detector"], instrument="HSC") 

3164 .order_by("full_name") 

3165 .limit(5) 

3166 ], 

3167 [73, 72, 71, 70, 65], 

3168 ) 

3169 

3170 def test_long_query_names(self) -> None: 

3171 """Test that queries involving very long names are handled correctly. 

3172 

3173 This is especially important for PostgreSQL, which truncates symbols 

3174 longer than 64 chars, but it's worth testing for all DBs. 

3175 """ 

3176 registry = self.makeRegistry() 

3177 name = "abcd" * 17 

3178 registry.registerDatasetType( 

3179 DatasetType( 

3180 name, 

3181 dimensions=(), 

3182 storageClass="Exposure", 

3183 universe=registry.dimensions, 

3184 ) 

3185 ) 

3186 # Need to search more than one collection actually containing a 

3187 # matching dataset to avoid optimizations that sidestep bugs due to 

3188 # truncation by making findFirst=True a no-op. 

3189 run1 = "run1" 

3190 registry.registerRun(run1) 

3191 run2 = "run2" 

3192 registry.registerRun(run2) 

3193 (ref1,) = registry.insertDatasets(name, [DataCoordinate.makeEmpty(registry.dimensions)], run1) 

3194 registry.insertDatasets(name, [DataCoordinate.makeEmpty(registry.dimensions)], run2) 

3195 self.assertEqual( 

3196 set(registry.queryDatasets(name, collections=[run1, run2], findFirst=True)), 

3197 {ref1}, 

3198 ) 

3199 

3200 def test_skypix_constraint_queries(self) -> None: 

3201 """Test queries spatially constrained by a skypix data ID.""" 

3202 registry = self.makeRegistry() 

3203 self.loadData(registry, "hsc-rc2-subset.yaml") 

3204 patch_regions = { 

3205 (data_id["tract"], data_id["patch"]): data_id.region 

3206 for data_id in registry.queryDataIds(["patch"]).expanded() 

3207 } 

3208 skypix_dimension: SkyPixDimension = registry.dimensions["htm11"] 

3209 # This check ensures the test doesn't become trivial due to a config 

3210 # change; if it does, just pick a different HTML level. 

3211 self.assertNotEqual(skypix_dimension, registry.dimensions.commonSkyPix) 

3212 # Gather all skypix IDs that definitely overlap at least one of these 

3213 # patches. 

3214 relevant_skypix_ids = lsst.sphgeom.RangeSet() 

3215 for patch_region in patch_regions.values(): 

3216 relevant_skypix_ids |= skypix_dimension.pixelization.interior(patch_region) 

3217 # Look for a "nontrivial" skypix_id that overlaps at least one patch 

3218 # and does not overlap at least one other patch. 

3219 for skypix_id in itertools.chain.from_iterable( 

3220 range(begin, end) for begin, end in relevant_skypix_ids 

3221 ): 

3222 skypix_region = skypix_dimension.pixelization.pixel(skypix_id) 

3223 overlapping_patches = { 

3224 patch_key 

3225 for patch_key, patch_region in patch_regions.items() 

3226 if not patch_region.isDisjointFrom(skypix_region) 

3227 } 

3228 if overlapping_patches and overlapping_patches != patch_regions.keys(): 

3229 break 

3230 else: 

3231 raise RuntimeError("Could not find usable skypix ID for this dimension configuration.") 

3232 self.assertEqual( 

3233 { 

3234 (data_id["tract"], data_id["patch"]) 

3235 for data_id in registry.queryDataIds( 

3236 ["patch"], 

3237 dataId={skypix_dimension.name: skypix_id}, 

3238 ) 

3239 }, 

3240 overlapping_patches, 

3241 ) 

3242 

3243 def test_spatial_constraint_queries(self) -> None: 

3244 """Test queries in which one spatial dimension in the constraint (data 

3245 ID or ``where`` string) constrains a different spatial dimension in the 

3246 query result columns. 

3247 """ 

3248 registry = self.makeRegistry() 

3249 self.loadData(registry, "hsc-rc2-subset.yaml") 

3250 patch_regions = { 

3251 (data_id["tract"], data_id["patch"]): data_id.region 

3252 for data_id in registry.queryDataIds(["patch"]).expanded() 

3253 } 

3254 observation_regions = { 

3255 (data_id["visit"], data_id["detector"]): data_id.region 

3256 for data_id in registry.queryDataIds(["visit", "detector"]).expanded() 

3257 } 

3258 all_combos = { 

3259 (patch_key, observation_key) 

3260 for patch_key, observation_key in itertools.product(patch_regions, observation_regions) 

3261 } 

3262 overlapping_combos = { 

3263 (patch_key, observation_key) 

3264 for patch_key, observation_key in all_combos 

3265 if not patch_regions[patch_key].isDisjointFrom(observation_regions[observation_key]) 

3266 } 

3267 # Check a direct spatial join with no constraint first. 

3268 self.assertEqual( 

3269 { 

3270 ((data_id["tract"], data_id["patch"]), (data_id["visit"], data_id["detector"])) 

3271 for data_id in registry.queryDataIds(["patch", "visit", "detector"]) 

3272 }, 

3273 overlapping_combos, 

3274 ) 

3275 overlaps_by_patch: defaultdict[tuple[int, int], set[tuple[str, str]]] = defaultdict(set) 

3276 overlaps_by_observation: defaultdict[tuple[int, int], set[tuple[str, str]]] = defaultdict(set) 

3277 for patch_key, observation_key in overlapping_combos: 

3278 overlaps_by_patch[patch_key].add(observation_key) 

3279 overlaps_by_observation[observation_key].add(patch_key) 

3280 # Find patches and observations that overlap at least one of the other 

3281 # but not all of the other. 

3282 nontrivial_patch = next( 

3283 iter( 

3284 patch_key 

3285 for patch_key, observation_keys in overlaps_by_patch.items() 

3286 if observation_keys and observation_keys != observation_regions.keys() 

3287 ) 

3288 ) 

3289 nontrivial_observation = next( 

3290 iter( 

3291 observation_key 

3292 for observation_key, patch_keys in overlaps_by_observation.items() 

3293 if patch_keys and patch_keys != patch_regions.keys() 

3294 ) 

3295 ) 

3296 # Use the nontrivial patches and observations as constraints on the 

3297 # other dimensions in various ways, first via a 'where' expression. 

3298 # It's better in general to us 'bind' instead of f-strings, but these 

3299 # all integers so there are no quoting concerns. 

3300 self.assertEqual( 

3301 { 

3302 (data_id["visit"], data_id["detector"]) 

3303 for data_id in registry.queryDataIds( 

3304 ["visit", "detector"], 

3305 where=f"tract={nontrivial_patch[0]} AND patch={nontrivial_patch[1]}", 

3306 skymap="hsc_rings_v1", 

3307 ) 

3308 }, 

3309 overlaps_by_patch[nontrivial_patch], 

3310 ) 

3311 self.assertEqual( 

3312 { 

3313 (data_id["tract"], data_id["patch"]) 

3314 for data_id in registry.queryDataIds( 

3315 ["patch"], 

3316 where=f"visit={nontrivial_observation[0]} AND detector={nontrivial_observation[1]}", 

3317 instrument="HSC", 

3318 ) 

3319 }, 

3320 overlaps_by_observation[nontrivial_observation], 

3321 ) 

3322 # and then via the dataId argument. 

3323 self.assertEqual( 

3324 { 

3325 (data_id["visit"], data_id["detector"]) 

3326 for data_id in registry.queryDataIds( 

3327 ["visit", "detector"], 

3328 dataId={ 

3329 "tract": nontrivial_patch[0], 

3330 "patch": nontrivial_patch[1], 

3331 }, 

3332 skymap="hsc_rings_v1", 

3333 ) 

3334 }, 

3335 overlaps_by_patch[nontrivial_patch], 

3336 ) 

3337 self.assertEqual( 

3338 { 

3339 (data_id["tract"], data_id["patch"]) 

3340 for data_id in registry.queryDataIds( 

3341 ["patch"], 

3342 dataId={ 

3343 "visit": nontrivial_observation[0], 

3344 "detector": nontrivial_observation[1], 

3345 }, 

3346 instrument="HSC", 

3347 ) 

3348 }, 

3349 overlaps_by_observation[nontrivial_observation], 

3350 )