Coverage for python/lsst/daf/butler/registry/tests/_registry.py: 5%

1520 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-12-01 11:00 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27from __future__ import annotations 

28 

29from ... import ddl 

30 

31__all__ = ["RegistryTests"] 

32 

33import datetime 

34import itertools 

35import logging 

36import os 

37import re 

38import unittest 

39import uuid 

40from abc import ABC, abstractmethod 

41from collections import defaultdict, namedtuple 

42from collections.abc import Iterator 

43from datetime import timedelta 

44from typing import TYPE_CHECKING 

45 

46import astropy.time 

47import sqlalchemy 

48 

49try: 

50 import numpy as np 

51except ImportError: 

52 np = None 

53 

54import lsst.sphgeom 

55from lsst.daf.relation import Relation, RelationalAlgebraError, Transfer, iteration, sql 

56 

57from ..._dataset_association import DatasetAssociation 

58from ..._dataset_ref import DatasetIdFactory, DatasetIdGenEnum, DatasetRef 

59from ..._dataset_type import DatasetType 

60from ..._named import NamedValueSet 

61from ..._storage_class import StorageClass 

62from ..._timespan import Timespan 

63from ...dimensions import DataCoordinate, DataCoordinateSet, SkyPixDimension 

64from .._collection_summary import CollectionSummary 

65from .._collection_type import CollectionType 

66from .._config import RegistryConfig 

67from .._exceptions import ( 

68 ArgumentError, 

69 CollectionError, 

70 CollectionTypeError, 

71 ConflictingDefinitionError, 

72 DataIdValueError, 

73 DatasetTypeError, 

74 InconsistentDataIdError, 

75 MissingCollectionError, 

76 MissingDatasetTypeError, 

77 NoDefaultCollectionError, 

78 OrphanedRecordError, 

79) 

80from ..interfaces import ButlerAttributeExistsError 

81 

82if TYPE_CHECKING: 

83 from ..sql_registry import SqlRegistry 

84 

85 

86class RegistryTests(ABC): 

87 """Generic tests for the `SqlRegistry` class that can be subclassed to 

88 generate tests for different configurations. 

89 """ 

90 

91 collectionsManager: str | None = None 

92 """Name of the collections manager class, if subclass provides value for 

93 this member then it overrides name specified in default configuration 

94 (`str`). 

95 """ 

96 

97 datasetsManager: str | dict[str, str] | None = None 

98 """Name or configuration dictionary of the datasets manager class, if 

99 subclass provides value for this member then it overrides name specified 

100 in default configuration (`str` or `dict`). 

101 """ 

102 

103 @classmethod 

104 @abstractmethod 

105 def getDataDir(cls) -> str: 

106 """Return the root directory containing test data YAML files.""" 

107 raise NotImplementedError() 

108 

109 def makeRegistryConfig(self) -> RegistryConfig: 

110 """Create RegistryConfig used to create a registry. 

111 

112 This method should be called by a subclass from `makeRegistry`. 

113 Returned instance will be pre-configured based on the values of class 

114 members, and default-configured for all other parameters. Subclasses 

115 that need default configuration should just instantiate 

116 `RegistryConfig` directly. 

117 """ 

118 config = RegistryConfig() 

119 if self.collectionsManager: 

120 config["managers", "collections"] = self.collectionsManager 

121 if self.datasetsManager: 

122 config["managers", "datasets"] = self.datasetsManager 

123 return config 

124 

125 @abstractmethod 

126 def makeRegistry(self, share_repo_with: SqlRegistry | None = None) -> SqlRegistry | None: 

127 """Return the SqlRegistry instance to be tested. 

128 

129 Parameters 

130 ---------- 

131 share_repo_with : `SqlRegistry`, optional 

132 If provided, the new registry should point to the same data 

133 repository as this existing registry. 

134 

135 Returns 

136 ------- 

137 registry : `SqlRegistry` 

138 New `SqlRegistry` instance, or `None` *only* if `share_repo_with` 

139 is not `None` and this test case does not support that argument 

140 (e.g. it is impossible with in-memory SQLite DBs). 

141 """ 

142 raise NotImplementedError() 

143 

144 def loadData(self, registry: SqlRegistry, filename: str): 

145 """Load registry test data from ``getDataDir/<filename>``, 

146 which should be a YAML import/export file. 

147 """ 

148 from ...transfers import YamlRepoImportBackend 

149 

150 with open(os.path.join(self.getDataDir(), filename)) as stream: 

151 backend = YamlRepoImportBackend(stream, registry) 

152 backend.register() 

153 backend.load(datastore=None) 

154 

155 def checkQueryResults(self, results, expected): 

156 """Check that a query results object contains expected values. 

157 

158 Parameters 

159 ---------- 

160 results : `DataCoordinateQueryResults` or `DatasetQueryResults` 

161 A lazy-evaluation query results object. 

162 expected : `list` 

163 A list of `DataCoordinate` o `DatasetRef` objects that should be 

164 equal to results of the query, aside from ordering. 

165 """ 

166 self.assertCountEqual(list(results), expected) 

167 self.assertEqual(results.count(), len(expected)) 

168 if expected: 

169 self.assertTrue(results.any()) 

170 else: 

171 self.assertFalse(results.any()) 

172 

173 def testOpaque(self): 

174 """Tests for `SqlRegistry.registerOpaqueTable`, 

175 `SqlRegistry.insertOpaqueData`, `SqlRegistry.fetchOpaqueData`, and 

176 `SqlRegistry.deleteOpaqueData`. 

177 """ 

178 registry = self.makeRegistry() 

179 table = "opaque_table_for_testing" 

180 registry.registerOpaqueTable( 

181 table, 

182 spec=ddl.TableSpec( 

183 fields=[ 

184 ddl.FieldSpec("id", dtype=sqlalchemy.BigInteger, primaryKey=True), 

185 ddl.FieldSpec("name", dtype=sqlalchemy.String, length=16, nullable=False), 

186 ddl.FieldSpec("count", dtype=sqlalchemy.SmallInteger, nullable=True), 

187 ], 

188 ), 

189 ) 

190 rows = [ 

191 {"id": 1, "name": "one", "count": None}, 

192 {"id": 2, "name": "two", "count": 5}, 

193 {"id": 3, "name": "three", "count": 6}, 

194 ] 

195 registry.insertOpaqueData(table, *rows) 

196 self.assertCountEqual(rows, list(registry.fetchOpaqueData(table))) 

197 self.assertEqual(rows[0:1], list(registry.fetchOpaqueData(table, id=1))) 

198 self.assertEqual(rows[1:2], list(registry.fetchOpaqueData(table, name="two"))) 

199 self.assertEqual(rows[0:1], list(registry.fetchOpaqueData(table, id=(1, 3), name=("one", "two")))) 

200 self.assertEqual(rows, list(registry.fetchOpaqueData(table, id=(1, 2, 3)))) 

201 # Test very long IN clause which exceeds sqlite limit on number of 

202 # parameters. SQLite says the limit is 32k but it looks like it is 

203 # much higher. 

204 self.assertEqual(rows, list(registry.fetchOpaqueData(table, id=list(range(300_000))))) 

205 # Two IN clauses, each longer than 1k batch size, first with 

206 # duplicates, second has matching elements in different batches (after 

207 # sorting). 

208 self.assertEqual( 

209 rows[0:2], 

210 list( 

211 registry.fetchOpaqueData( 

212 table, 

213 id=list(range(1000)) + list(range(100, 0, -1)), 

214 name=["one"] + [f"q{i}" for i in range(2200)] + ["two"], 

215 ) 

216 ), 

217 ) 

218 self.assertEqual([], list(registry.fetchOpaqueData(table, id=1, name="two"))) 

219 registry.deleteOpaqueData(table, id=3) 

220 self.assertCountEqual(rows[:2], list(registry.fetchOpaqueData(table))) 

221 registry.deleteOpaqueData(table) 

222 self.assertEqual([], list(registry.fetchOpaqueData(table))) 

223 

224 def testDatasetType(self): 

225 """Tests for `SqlRegistry.registerDatasetType` and 

226 `SqlRegistry.getDatasetType`. 

227 """ 

228 registry = self.makeRegistry() 

229 # Check valid insert 

230 datasetTypeName = "test" 

231 storageClass = StorageClass("testDatasetType") 

232 registry.storageClasses.registerStorageClass(storageClass) 

233 dimensions = registry.dimensions.conform(("instrument", "visit")) 

234 differentDimensions = registry.dimensions.conform(("instrument", "patch")) 

235 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

236 # Inserting for the first time should return True 

237 self.assertTrue(registry.registerDatasetType(inDatasetType)) 

238 outDatasetType1 = registry.getDatasetType(datasetTypeName) 

239 self.assertEqual(outDatasetType1, inDatasetType) 

240 

241 # Re-inserting should work 

242 self.assertFalse(registry.registerDatasetType(inDatasetType)) 

243 # Except when they are not identical 

244 with self.assertRaises(ConflictingDefinitionError): 

245 nonIdenticalDatasetType = DatasetType(datasetTypeName, differentDimensions, storageClass) 

246 registry.registerDatasetType(nonIdenticalDatasetType) 

247 

248 # Template can be None 

249 datasetTypeName = "testNoneTemplate" 

250 storageClass = StorageClass("testDatasetType2") 

251 registry.storageClasses.registerStorageClass(storageClass) 

252 dimensions = registry.dimensions.conform(("instrument", "visit")) 

253 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

254 registry.registerDatasetType(inDatasetType) 

255 outDatasetType2 = registry.getDatasetType(datasetTypeName) 

256 self.assertEqual(outDatasetType2, inDatasetType) 

257 

258 allTypes = set(registry.queryDatasetTypes()) 

259 self.assertEqual(allTypes, {outDatasetType1, outDatasetType2}) 

260 

261 def testDimensions(self): 

262 """Tests for `SqlRegistry.insertDimensionData`, 

263 `SqlRegistry.syncDimensionData`, and `SqlRegistry.expandDataId`. 

264 """ 

265 registry = self.makeRegistry() 

266 dimensionName = "instrument" 

267 dimension = registry.dimensions[dimensionName] 

268 dimensionValue = { 

269 "name": "DummyCam", 

270 "visit_max": 10, 

271 "visit_system": 0, 

272 "exposure_max": 10, 

273 "detector_max": 2, 

274 "class_name": "lsst.pipe.base.Instrument", 

275 } 

276 registry.insertDimensionData(dimensionName, dimensionValue) 

277 # Inserting the same value twice should fail 

278 with self.assertRaises(sqlalchemy.exc.IntegrityError): 

279 registry.insertDimensionData(dimensionName, dimensionValue) 

280 # expandDataId should retrieve the record we just inserted 

281 self.assertEqual( 

282 registry.expandDataId(instrument="DummyCam", dimensions=dimension.minimal_group) 

283 .records[dimensionName] 

284 .toDict(), 

285 dimensionValue, 

286 ) 

287 # expandDataId should raise if there is no record with the given ID. 

288 with self.assertRaises(DataIdValueError): 

289 registry.expandDataId({"instrument": "Unknown"}, dimensions=dimension.minimal_group) 

290 # band doesn't have a table; insert should fail. 

291 with self.assertRaises(TypeError): 

292 registry.insertDimensionData("band", {"band": "i"}) 

293 dimensionName2 = "physical_filter" 

294 dimension2 = registry.dimensions[dimensionName2] 

295 dimensionValue2 = {"name": "DummyCam_i", "band": "i"} 

296 # Missing required dependency ("instrument") should fail 

297 with self.assertRaises(KeyError): 

298 registry.insertDimensionData(dimensionName2, dimensionValue2) 

299 # Adding required dependency should fix the failure 

300 dimensionValue2["instrument"] = "DummyCam" 

301 registry.insertDimensionData(dimensionName2, dimensionValue2) 

302 # expandDataId should retrieve the record we just inserted. 

303 self.assertEqual( 

304 registry.expandDataId( 

305 instrument="DummyCam", physical_filter="DummyCam_i", dimensions=dimension2.minimal_group 

306 ) 

307 .records[dimensionName2] 

308 .toDict(), 

309 dimensionValue2, 

310 ) 

311 # Use syncDimensionData to insert a new record successfully. 

312 dimensionName3 = "detector" 

313 dimensionValue3 = { 

314 "instrument": "DummyCam", 

315 "id": 1, 

316 "full_name": "one", 

317 "name_in_raft": "zero", 

318 "purpose": "SCIENCE", 

319 } 

320 self.assertTrue(registry.syncDimensionData(dimensionName3, dimensionValue3)) 

321 # Sync that again. Note that one field ("raft") is NULL, and that 

322 # should be okay. 

323 self.assertFalse(registry.syncDimensionData(dimensionName3, dimensionValue3)) 

324 # Now try that sync with the same primary key but a different value. 

325 # This should fail. 

326 with self.assertRaises(ConflictingDefinitionError): 

327 registry.syncDimensionData( 

328 dimensionName3, 

329 { 

330 "instrument": "DummyCam", 

331 "id": 1, 

332 "full_name": "one", 

333 "name_in_raft": "four", 

334 "purpose": "SCIENCE", 

335 }, 

336 ) 

337 

338 @unittest.skipIf(np is None, "numpy not available.") 

339 def testNumpyDataId(self): 

340 """Test that we can use a numpy int in a dataId.""" 

341 registry = self.makeRegistry() 

342 dimensionEntries = [ 

343 ("instrument", {"instrument": "DummyCam"}), 

344 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}), 

345 # Using an np.int64 here fails unless Records.fromDict is also 

346 # patched to look for numbers.Integral 

347 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}), 

348 ] 

349 for args in dimensionEntries: 

350 registry.insertDimensionData(*args) 

351 

352 # Try a normal integer and something that looks like an int but 

353 # is not. 

354 for visit_id in (42, np.int64(42)): 

355 with self.subTest(visit_id=visit_id, id_type=type(visit_id).__name__): 

356 expanded = registry.expandDataId({"instrument": "DummyCam", "visit": visit_id}) 

357 self.assertEqual(expanded["visit"], int(visit_id)) 

358 self.assertIsInstance(expanded["visit"], int) 

359 

360 def testDataIdRelationships(self): 

361 """Test that `SqlRegistry.expandDataId` raises an exception when the 

362 given keys are inconsistent. 

363 """ 

364 registry = self.makeRegistry() 

365 self.loadData(registry, "base.yaml") 

366 # Insert a few more dimension records for the next test. 

367 registry.insertDimensionData( 

368 "exposure", 

369 {"instrument": "Cam1", "id": 1, "obs_id": "one", "physical_filter": "Cam1-G"}, 

370 ) 

371 registry.insertDimensionData( 

372 "exposure", 

373 {"instrument": "Cam1", "id": 2, "obs_id": "two", "physical_filter": "Cam1-G"}, 

374 ) 

375 registry.insertDimensionData( 

376 "visit_system", 

377 {"instrument": "Cam1", "id": 0, "name": "one-to-one"}, 

378 ) 

379 registry.insertDimensionData( 

380 "visit", 

381 {"instrument": "Cam1", "id": 1, "name": "one", "physical_filter": "Cam1-G"}, 

382 ) 

383 registry.insertDimensionData( 

384 "visit_definition", 

385 {"instrument": "Cam1", "visit": 1, "exposure": 1}, 

386 ) 

387 with self.assertRaises(InconsistentDataIdError): 

388 registry.expandDataId( 

389 {"instrument": "Cam1", "visit": 1, "exposure": 2}, 

390 ) 

391 

392 def testDataset(self): 

393 """Basic tests for `SqlRegistry.insertDatasets`, 

394 `SqlRegistry.getDataset`, and `SqlRegistry.removeDatasets`. 

395 """ 

396 registry = self.makeRegistry() 

397 self.loadData(registry, "base.yaml") 

398 run = "tésτ" 

399 registry.registerRun(run) 

400 datasetType = registry.getDatasetType("bias") 

401 dataId = {"instrument": "Cam1", "detector": 2} 

402 (ref,) = registry.insertDatasets(datasetType, dataIds=[dataId], run=run) 

403 outRef = registry.getDataset(ref.id) 

404 self.assertIsNotNone(ref.id) 

405 self.assertEqual(ref, outRef) 

406 with self.assertRaises(ConflictingDefinitionError): 

407 registry.insertDatasets(datasetType, dataIds=[dataId], run=run) 

408 registry.removeDatasets([ref]) 

409 self.assertIsNone(registry.findDataset(datasetType, dataId, collections=[run])) 

410 

411 def testFindDataset(self): 

412 """Tests for `SqlRegistry.findDataset`.""" 

413 registry = self.makeRegistry() 

414 self.loadData(registry, "base.yaml") 

415 run = "tésτ" 

416 datasetType = registry.getDatasetType("bias") 

417 dataId = {"instrument": "Cam1", "detector": 4} 

418 registry.registerRun(run) 

419 (inputRef,) = registry.insertDatasets(datasetType, dataIds=[dataId], run=run) 

420 outputRef = registry.findDataset(datasetType, dataId, collections=[run]) 

421 self.assertEqual(outputRef, inputRef) 

422 # Check that retrieval with invalid dataId raises 

423 with self.assertRaises(LookupError): 

424 dataId = {"instrument": "Cam1"} # no detector 

425 registry.findDataset(datasetType, dataId, collections=run) 

426 # Check that different dataIds match to different datasets 

427 dataId1 = {"instrument": "Cam1", "detector": 1} 

428 (inputRef1,) = registry.insertDatasets(datasetType, dataIds=[dataId1], run=run) 

429 dataId2 = {"instrument": "Cam1", "detector": 2} 

430 (inputRef2,) = registry.insertDatasets(datasetType, dataIds=[dataId2], run=run) 

431 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef1) 

432 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef2) 

433 self.assertNotEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef2) 

434 self.assertNotEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef1) 

435 # Check that requesting a non-existing dataId returns None 

436 nonExistingDataId = {"instrument": "Cam1", "detector": 3} 

437 self.assertIsNone(registry.findDataset(datasetType, nonExistingDataId, collections=run)) 

438 # Search more than one collection, in which two have the right 

439 # dataset type and another does not. 

440 registry.registerRun("empty") 

441 self.loadData(registry, "datasets-uuid.yaml") 

442 bias1 = registry.findDataset("bias", instrument="Cam1", detector=2, collections=["imported_g"]) 

443 self.assertIsNotNone(bias1) 

444 bias2 = registry.findDataset("bias", instrument="Cam1", detector=2, collections=["imported_r"]) 

445 self.assertIsNotNone(bias2) 

446 self.assertEqual( 

447 bias1, 

448 registry.findDataset( 

449 "bias", instrument="Cam1", detector=2, collections=["empty", "imported_g", "imported_r"] 

450 ), 

451 ) 

452 self.assertEqual( 

453 bias2, 

454 registry.findDataset( 

455 "bias", instrument="Cam1", detector=2, collections=["empty", "imported_r", "imported_g"] 

456 ), 

457 ) 

458 # Search more than one collection, with one of them a CALIBRATION 

459 # collection. 

460 registry.registerCollection("Cam1/calib", CollectionType.CALIBRATION) 

461 timespan = Timespan( 

462 begin=astropy.time.Time("2020-01-01T01:00:00", format="isot", scale="tai"), 

463 end=astropy.time.Time("2020-01-01T02:00:00", format="isot", scale="tai"), 

464 ) 

465 registry.certify("Cam1/calib", [bias2], timespan=timespan) 

466 self.assertEqual( 

467 bias1, 

468 registry.findDataset( 

469 "bias", 

470 instrument="Cam1", 

471 detector=2, 

472 collections=["empty", "imported_g", "Cam1/calib"], 

473 timespan=timespan, 

474 ), 

475 ) 

476 self.assertEqual( 

477 bias2, 

478 registry.findDataset( 

479 "bias", 

480 instrument="Cam1", 

481 detector=2, 

482 collections=["empty", "Cam1/calib", "imported_g"], 

483 timespan=timespan, 

484 ), 

485 ) 

486 # If we try to search those same collections without a timespan, it 

487 # should still work, since the CALIBRATION collection is ignored. 

488 self.assertEqual( 

489 bias1, 

490 registry.findDataset( 

491 "bias", instrument="Cam1", detector=2, collections=["empty", "imported_g", "Cam1/calib"] 

492 ), 

493 ) 

494 self.assertEqual( 

495 bias1, 

496 registry.findDataset( 

497 "bias", instrument="Cam1", detector=2, collections=["empty", "Cam1/calib", "imported_g"] 

498 ), 

499 ) 

500 

501 def testRemoveDatasetTypeSuccess(self): 

502 """Test that SqlRegistry.removeDatasetType works when there are no 

503 datasets of that type present. 

504 """ 

505 registry = self.makeRegistry() 

506 self.loadData(registry, "base.yaml") 

507 registry.removeDatasetType("flat") 

508 with self.assertRaises(MissingDatasetTypeError): 

509 registry.getDatasetType("flat") 

510 

511 def testRemoveDatasetTypeFailure(self): 

512 """Test that SqlRegistry.removeDatasetType raises when there are 

513 datasets of that type present or if the dataset type is for a 

514 component. 

515 """ 

516 registry = self.makeRegistry() 

517 self.loadData(registry, "base.yaml") 

518 self.loadData(registry, "datasets.yaml") 

519 with self.assertRaises(OrphanedRecordError): 

520 registry.removeDatasetType("flat") 

521 with self.assertRaises(ValueError): 

522 registry.removeDatasetType(DatasetType.nameWithComponent("flat", "image")) 

523 

524 def testImportDatasetsUUID(self): 

525 """Test for `SqlRegistry._importDatasets` with UUID dataset ID.""" 

526 if isinstance(self.datasetsManager, str): 

527 if not self.datasetsManager.endswith(".ByDimensionsDatasetRecordStorageManagerUUID"): 

528 self.skipTest(f"Unexpected dataset manager {self.datasetsManager}") 

529 elif isinstance(self.datasetsManager, dict) and not self.datasetsManager["cls"].endswith( 

530 ".ByDimensionsDatasetRecordStorageManagerUUID" 

531 ): 

532 self.skipTest(f"Unexpected dataset manager {self.datasetsManager['cls']}") 

533 

534 registry = self.makeRegistry() 

535 self.loadData(registry, "base.yaml") 

536 for run in range(6): 

537 registry.registerRun(f"run{run}") 

538 datasetTypeBias = registry.getDatasetType("bias") 

539 datasetTypeFlat = registry.getDatasetType("flat") 

540 dataIdBias1 = {"instrument": "Cam1", "detector": 1} 

541 dataIdBias2 = {"instrument": "Cam1", "detector": 2} 

542 dataIdFlat1 = {"instrument": "Cam1", "detector": 1, "physical_filter": "Cam1-G", "band": "g"} 

543 

544 ref = DatasetRef(datasetTypeBias, dataIdBias1, run="run0") 

545 (ref1,) = registry._importDatasets([ref]) 

546 # UUID is used without change 

547 self.assertEqual(ref.id, ref1.id) 

548 

549 # All different failure modes 

550 refs = ( 

551 # Importing same DatasetRef with different dataset ID is an error 

552 DatasetRef(datasetTypeBias, dataIdBias1, run="run0"), 

553 # Same DatasetId but different DataId 

554 DatasetRef(datasetTypeBias, dataIdBias2, id=ref1.id, run="run0"), 

555 DatasetRef(datasetTypeFlat, dataIdFlat1, id=ref1.id, run="run0"), 

556 # Same DatasetRef and DatasetId but different run 

557 DatasetRef(datasetTypeBias, dataIdBias1, id=ref1.id, run="run1"), 

558 ) 

559 for ref in refs: 

560 with self.assertRaises(ConflictingDefinitionError): 

561 registry._importDatasets([ref]) 

562 

563 # Test for non-unique IDs, they can be re-imported multiple times. 

564 for run, idGenMode in ((2, DatasetIdGenEnum.DATAID_TYPE), (4, DatasetIdGenEnum.DATAID_TYPE_RUN)): 

565 with self.subTest(idGenMode=idGenMode): 

566 # Make dataset ref with reproducible dataset ID. 

567 ref = DatasetRef(datasetTypeBias, dataIdBias1, run=f"run{run}", id_generation_mode=idGenMode) 

568 (ref1,) = registry._importDatasets([ref]) 

569 self.assertIsInstance(ref1.id, uuid.UUID) 

570 self.assertEqual(ref1.id.version, 5) 

571 self.assertEqual(ref1.id, ref.id) 

572 

573 # Importing it again is OK 

574 (ref2,) = registry._importDatasets([ref1]) 

575 self.assertEqual(ref2.id, ref1.id) 

576 

577 # Cannot import to different run with the same ID 

578 ref = DatasetRef(datasetTypeBias, dataIdBias1, id=ref1.id, run=f"run{run+1}") 

579 with self.assertRaises(ConflictingDefinitionError): 

580 registry._importDatasets([ref]) 

581 

582 ref = DatasetRef( 

583 datasetTypeBias, dataIdBias1, run=f"run{run+1}", id_generation_mode=idGenMode 

584 ) 

585 if idGenMode is DatasetIdGenEnum.DATAID_TYPE: 

586 # Cannot import same DATAID_TYPE ref into a new run 

587 with self.assertRaises(ConflictingDefinitionError): 

588 (ref2,) = registry._importDatasets([ref]) 

589 else: 

590 # DATAID_TYPE_RUN ref can be imported into a new run 

591 (ref2,) = registry._importDatasets([ref]) 

592 

593 def testDatasetTypeComponentQueries(self): 

594 """Test component options when querying for dataset types. 

595 

596 All of the behavior here is deprecated, so many of these tests are 

597 currently wrapped in a context to check that we get a warning whenever 

598 a component dataset is actually returned. 

599 """ 

600 registry = self.makeRegistry() 

601 self.loadData(registry, "base.yaml") 

602 self.loadData(registry, "datasets.yaml") 

603 # Test querying for dataset types with different inputs. 

604 # First query for all dataset types; components should only be included 

605 # when components=True. 

606 self.assertEqual({"bias", "flat"}, NamedValueSet(registry.queryDatasetTypes()).names) 

607 self.assertEqual({"bias", "flat"}, NamedValueSet(registry.queryDatasetTypes(components=False)).names) 

608 with self.assertWarns(FutureWarning): 

609 self.assertLess( 

610 {"bias", "flat", "bias.wcs", "flat.photoCalib"}, 

611 NamedValueSet(registry.queryDatasetTypes(components=True)).names, 

612 ) 

613 # Use a pattern that can match either parent or components. Again, 

614 # components are only returned if components=True. 

615 self.assertEqual({"bias"}, NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"))).names) 

616 self.assertEqual( 

617 {"bias"}, NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"), components=False)).names 

618 ) 

619 with self.assertWarns(FutureWarning): 

620 self.assertLess( 

621 {"bias", "bias.wcs"}, 

622 NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"), components=True)).names, 

623 ) 

624 # This pattern matches only a component. In this case we also return 

625 # that component dataset type if components=None. 

626 with self.assertWarns(FutureWarning): 

627 self.assertEqual( 

628 {"bias.wcs"}, 

629 NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"), components=None)).names, 

630 ) 

631 self.assertEqual( 

632 set(), 

633 NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"), components=False)).names, 

634 ) 

635 with self.assertWarns(FutureWarning): 

636 self.assertEqual( 

637 {"bias.wcs"}, 

638 NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"), components=True)).names, 

639 ) 

640 # Add a dataset type using a StorageClass that we'll then remove; check 

641 # that this does not affect our ability to query for dataset types 

642 # (though it will warn). 

643 tempStorageClass = StorageClass( 

644 name="TempStorageClass", 

645 components={ 

646 "data1": registry.storageClasses.getStorageClass("StructuredDataDict"), 

647 "data2": registry.storageClasses.getStorageClass("StructuredDataDict"), 

648 }, 

649 ) 

650 registry.storageClasses.registerStorageClass(tempStorageClass) 

651 datasetType = DatasetType( 

652 "temporary", 

653 dimensions=["instrument"], 

654 storageClass=tempStorageClass, 

655 universe=registry.dimensions, 

656 ) 

657 registry.registerDatasetType(datasetType) 

658 registry.storageClasses._unregisterStorageClass(tempStorageClass.name) 

659 datasetType._storageClass = None 

660 del tempStorageClass 

661 # Querying for all dataset types, including components, should include 

662 # at least all non-component dataset types (and I don't want to 

663 # enumerate all of the Exposure components for bias and flat here). 

664 with self.assertWarns(FutureWarning): 

665 with self.assertLogs("lsst.daf.butler.registry", logging.WARN) as cm: 

666 everything = NamedValueSet(registry.queryDatasetTypes(components=True)) 

667 self.assertIn("TempStorageClass", cm.output[0]) 

668 self.assertLess({"bias", "flat", "temporary"}, everything.names) 

669 # It should not include "temporary.columns", because we tried to remove 

670 # the storage class that would tell it about that. So if the next line 

671 # fails (i.e. "temporary.columns" _is_ in everything.names), it means 

672 # this part of the test isn't doing anything, because the _unregister 

673 # call about isn't simulating the real-life case we want it to 

674 # simulate, in which different versions of daf_butler in entirely 

675 # different Python processes interact with the same repo. 

676 self.assertNotIn("temporary.data", everything.names) 

677 # Query for dataset types that start with "temp". This should again 

678 # not include the component, and also not fail. 

679 with self.assertLogs("lsst.daf.butler.registry", logging.WARN) as cm: 

680 startsWithTemp = NamedValueSet(registry.queryDatasetTypes(re.compile("temp.*"), components=True)) 

681 self.assertIn("TempStorageClass", cm.output[0]) 

682 self.assertEqual({"temporary"}, startsWithTemp.names) 

683 # Querying with no components should not warn at all. 

684 with self.assertLogs("lsst.daf.butler.registries", logging.WARN) as cm: 

685 startsWithTemp = NamedValueSet(registry.queryDatasetTypes(re.compile("temp.*"), components=False)) 

686 # Must issue a warning of our own to be captured. 

687 logging.getLogger("lsst.daf.butler.registries").warning("test message") 

688 self.assertEqual(len(cm.output), 1) 

689 self.assertIn("test message", cm.output[0]) 

690 

691 def testComponentLookups(self): 

692 """Test searching for component datasets via their parents. 

693 

694 All of the behavior here is deprecated, so many of these tests are 

695 currently wrapped in a context to check that we get a warning whenever 

696 a component dataset is actually returned. 

697 """ 

698 registry = self.makeRegistry() 

699 self.loadData(registry, "base.yaml") 

700 self.loadData(registry, "datasets.yaml") 

701 # Test getting the child dataset type (which does still exist in the 

702 # Registry), and check for consistency with 

703 # DatasetRef.makeComponentRef. 

704 collection = "imported_g" 

705 parentType = registry.getDatasetType("bias") 

706 childType = registry.getDatasetType("bias.wcs") 

707 parentRefResolved = registry.findDataset( 

708 parentType, collections=collection, instrument="Cam1", detector=1 

709 ) 

710 self.assertIsInstance(parentRefResolved, DatasetRef) 

711 self.assertEqual(childType, parentRefResolved.makeComponentRef("wcs").datasetType) 

712 # Search for a single dataset with findDataset. 

713 childRef1 = registry.findDataset("bias.wcs", collections=collection, dataId=parentRefResolved.dataId) 

714 self.assertEqual(childRef1, parentRefResolved.makeComponentRef("wcs")) 

715 # Search for detector data IDs constrained by component dataset 

716 # existence with queryDataIds. 

717 with self.assertWarns(FutureWarning): 

718 dataIds = registry.queryDataIds( 

719 ["detector"], 

720 datasets=["bias.wcs"], 

721 collections=collection, 

722 ).toSet() 

723 self.assertEqual( 

724 dataIds, 

725 DataCoordinateSet( 

726 { 

727 DataCoordinate.standardize( 

728 instrument="Cam1", detector=d, dimensions=parentType.dimensions 

729 ) 

730 for d in (1, 2, 3) 

731 }, 

732 dimensions=parentType.dimensions, 

733 ), 

734 ) 

735 # Search for multiple datasets of a single type with queryDatasets. 

736 with self.assertWarns(FutureWarning): 

737 childRefs2 = set( 

738 registry.queryDatasets( 

739 "bias.wcs", 

740 collections=collection, 

741 ) 

742 ) 

743 self.assertEqual({ref.datasetType for ref in childRefs2}, {childType}) 

744 self.assertEqual({ref.dataId for ref in childRefs2}, set(dataIds)) 

745 

746 def testCollections(self): 

747 """Tests for registry methods that manage collections.""" 

748 registry = self.makeRegistry() 

749 other_registry = self.makeRegistry(share_repo_with=registry) 

750 self.loadData(registry, "base.yaml") 

751 self.loadData(registry, "datasets.yaml") 

752 run1 = "imported_g" 

753 run2 = "imported_r" 

754 # Test setting a collection docstring after it has been created. 

755 registry.setCollectionDocumentation(run1, "doc for run1") 

756 self.assertEqual(registry.getCollectionDocumentation(run1), "doc for run1") 

757 registry.setCollectionDocumentation(run1, None) 

758 self.assertIsNone(registry.getCollectionDocumentation(run1)) 

759 datasetType = "bias" 

760 # Find some datasets via their run's collection. 

761 dataId1 = {"instrument": "Cam1", "detector": 1} 

762 ref1 = registry.findDataset(datasetType, dataId1, collections=run1) 

763 self.assertIsNotNone(ref1) 

764 dataId2 = {"instrument": "Cam1", "detector": 2} 

765 ref2 = registry.findDataset(datasetType, dataId2, collections=run1) 

766 self.assertIsNotNone(ref2) 

767 # Associate those into a new collection, then look for them there. 

768 tag1 = "tag1" 

769 registry.registerCollection(tag1, type=CollectionType.TAGGED, doc="doc for tag1") 

770 # Check that we can query for old and new collections by type. 

771 self.assertEqual(set(registry.queryCollections(collectionTypes=CollectionType.RUN)), {run1, run2}) 

772 self.assertEqual( 

773 set(registry.queryCollections(collectionTypes={CollectionType.TAGGED, CollectionType.RUN})), 

774 {tag1, run1, run2}, 

775 ) 

776 self.assertEqual(registry.getCollectionDocumentation(tag1), "doc for tag1") 

777 registry.associate(tag1, [ref1, ref2]) 

778 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1) 

779 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

780 # Disassociate one and verify that we can't it there anymore... 

781 registry.disassociate(tag1, [ref1]) 

782 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=tag1)) 

783 # ...but we can still find ref2 in tag1, and ref1 in the run. 

784 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run1), ref1) 

785 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

786 collections = set(registry.queryCollections()) 

787 self.assertEqual(collections, {run1, run2, tag1}) 

788 # Associate both refs into tag1 again; ref2 is already there, but that 

789 # should be a harmless no-op. 

790 registry.associate(tag1, [ref1, ref2]) 

791 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1) 

792 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

793 # Get a different dataset (from a different run) that has the same 

794 # dataset type and data ID as ref2. 

795 ref2b = registry.findDataset(datasetType, dataId2, collections=run2) 

796 self.assertNotEqual(ref2, ref2b) 

797 # Attempting to associate that into tag1 should be an error. 

798 with self.assertRaises(ConflictingDefinitionError): 

799 registry.associate(tag1, [ref2b]) 

800 # That error shouldn't have messed up what we had before. 

801 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1) 

802 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

803 # Attempt to associate the conflicting dataset again, this time with 

804 # a dataset that isn't in the collection and won't cause a conflict. 

805 # Should also fail without modifying anything. 

806 dataId3 = {"instrument": "Cam1", "detector": 3} 

807 ref3 = registry.findDataset(datasetType, dataId3, collections=run1) 

808 with self.assertRaises(ConflictingDefinitionError): 

809 registry.associate(tag1, [ref3, ref2b]) 

810 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1) 

811 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

812 self.assertIsNone(registry.findDataset(datasetType, dataId3, collections=tag1)) 

813 # Register a chained collection that searches [tag1, run2] 

814 chain1 = "chain1" 

815 registry.registerCollection(chain1, type=CollectionType.CHAINED) 

816 self.assertIs(registry.getCollectionType(chain1), CollectionType.CHAINED) 

817 # Chained collection exists, but has no collections in it. 

818 self.assertFalse(registry.getCollectionChain(chain1)) 

819 # If we query for all collections, we should get the chained collection 

820 # only if we don't ask to flatten it (i.e. yield only its children). 

821 self.assertEqual(set(registry.queryCollections(flattenChains=False)), {tag1, run1, run2, chain1}) 

822 self.assertEqual(set(registry.queryCollections(flattenChains=True)), {tag1, run1, run2}) 

823 # Attempt to set its child collections to something circular; that 

824 # should fail. 

825 with self.assertRaises(ValueError): 

826 registry.setCollectionChain(chain1, [tag1, chain1]) 

827 # Add the child collections. 

828 registry.setCollectionChain(chain1, [tag1, run2]) 

829 self.assertEqual(list(registry.getCollectionChain(chain1)), [tag1, run2]) 

830 self.assertEqual(registry.getCollectionParentChains(tag1), {chain1}) 

831 self.assertEqual(registry.getCollectionParentChains(run2), {chain1}) 

832 # Refresh the other registry that points to the same repo, and make 

833 # sure it can see the things we've done (note that this does require 

834 # an explicit refresh(); that's the documented behavior, because 

835 # caching is ~impossible otherwise). 

836 if other_registry is not None: 

837 other_registry.refresh() 

838 self.assertEqual(list(other_registry.getCollectionChain(chain1)), [tag1, run2]) 

839 self.assertEqual(other_registry.getCollectionParentChains(tag1), {chain1}) 

840 self.assertEqual(other_registry.getCollectionParentChains(run2), {chain1}) 

841 # Searching for dataId1 or dataId2 in the chain should return ref1 and 

842 # ref2, because both are in tag1. 

843 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain1), ref1) 

844 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain1), ref2) 

845 # Now disassociate ref2 from tag1. The search (for bias) with 

846 # dataId2 in chain1 should then: 

847 # 1. not find it in tag1 

848 # 2. find a different dataset in run2 

849 registry.disassociate(tag1, [ref2]) 

850 ref2b = registry.findDataset(datasetType, dataId2, collections=chain1) 

851 self.assertNotEqual(ref2b, ref2) 

852 self.assertEqual(ref2b, registry.findDataset(datasetType, dataId2, collections=run2)) 

853 # Define a new chain so we can test recursive chains. 

854 chain2 = "chain2" 

855 registry.registerCollection(chain2, type=CollectionType.CHAINED) 

856 registry.setCollectionChain(chain2, [run2, chain1]) 

857 self.assertEqual(registry.getCollectionParentChains(chain1), {chain2}) 

858 self.assertEqual(registry.getCollectionParentChains(run2), {chain1, chain2}) 

859 # Query for collections matching a regex. 

860 self.assertCountEqual( 

861 list(registry.queryCollections(re.compile("imported_."), flattenChains=False)), 

862 ["imported_r", "imported_g"], 

863 ) 

864 # Query for collections matching a regex or an explicit str. 

865 self.assertCountEqual( 

866 list(registry.queryCollections([re.compile("imported_."), "chain1"], flattenChains=False)), 

867 ["imported_r", "imported_g", "chain1"], 

868 ) 

869 # Search for bias with dataId1 should find it via tag1 in chain2, 

870 # recursing, because is not in run1. 

871 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=run2)) 

872 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain2), ref1) 

873 # Search for bias with dataId2 should find it in run2 (ref2b). 

874 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain2), ref2b) 

875 # Search for a flat that is in run2. That should not be found 

876 # at the front of chain2, because of the restriction to bias 

877 # on run2 there, but it should be found in at the end of chain1. 

878 dataId4 = {"instrument": "Cam1", "detector": 3, "physical_filter": "Cam1-R2"} 

879 ref4 = registry.findDataset("flat", dataId4, collections=run2) 

880 self.assertIsNotNone(ref4) 

881 self.assertEqual(ref4, registry.findDataset("flat", dataId4, collections=chain2)) 

882 # Deleting a collection that's part of a CHAINED collection is not 

883 # allowed, and is exception-safe. 

884 with self.assertRaises(sqlalchemy.exc.IntegrityError): 

885 registry.removeCollection(run2) 

886 self.assertEqual(registry.getCollectionType(run2), CollectionType.RUN) 

887 with self.assertRaises(sqlalchemy.exc.IntegrityError): 

888 registry.removeCollection(chain1) 

889 self.assertEqual(registry.getCollectionType(chain1), CollectionType.CHAINED) 

890 # Actually remove chain2, test that it's gone by asking for its type. 

891 registry.removeCollection(chain2) 

892 with self.assertRaises(MissingCollectionError): 

893 registry.getCollectionType(chain2) 

894 # Actually remove run2 and chain1, which should work now. 

895 registry.removeCollection(chain1) 

896 registry.removeCollection(run2) 

897 with self.assertRaises(MissingCollectionError): 

898 registry.getCollectionType(run2) 

899 with self.assertRaises(MissingCollectionError): 

900 registry.getCollectionType(chain1) 

901 # Remove tag1 as well, just to test that we can remove TAGGED 

902 # collections. 

903 registry.removeCollection(tag1) 

904 with self.assertRaises(MissingCollectionError): 

905 registry.getCollectionType(tag1) 

906 

907 def testCollectionChainFlatten(self): 

908 """Test that `SqlRegistry.setCollectionChain` obeys its 'flatten' 

909 option. 

910 """ 

911 registry = self.makeRegistry() 

912 registry.registerCollection("inner", CollectionType.CHAINED) 

913 registry.registerCollection("innermost", CollectionType.RUN) 

914 registry.setCollectionChain("inner", ["innermost"]) 

915 registry.registerCollection("outer", CollectionType.CHAINED) 

916 registry.setCollectionChain("outer", ["inner"], flatten=False) 

917 self.assertEqual(list(registry.getCollectionChain("outer")), ["inner"]) 

918 registry.setCollectionChain("outer", ["inner"], flatten=True) 

919 self.assertEqual(list(registry.getCollectionChain("outer")), ["innermost"]) 

920 

921 def testBasicTransaction(self): 

922 """Test that all operations within a single transaction block are 

923 rolled back if an exception propagates out of the block. 

924 """ 

925 registry = self.makeRegistry() 

926 storageClass = StorageClass("testDatasetType") 

927 registry.storageClasses.registerStorageClass(storageClass) 

928 with registry.transaction(): 

929 registry.insertDimensionData("instrument", {"name": "Cam1", "class_name": "A"}) 

930 with self.assertRaises(ValueError): 

931 with registry.transaction(): 

932 registry.insertDimensionData("instrument", {"name": "Cam2"}) 

933 raise ValueError("Oops, something went wrong") 

934 # Cam1 should exist 

935 self.assertEqual(registry.expandDataId(instrument="Cam1").records["instrument"].class_name, "A") 

936 # But Cam2 and Cam3 should both not exist 

937 with self.assertRaises(DataIdValueError): 

938 registry.expandDataId(instrument="Cam2") 

939 with self.assertRaises(DataIdValueError): 

940 registry.expandDataId(instrument="Cam3") 

941 

942 def testNestedTransaction(self): 

943 """Test that operations within a transaction block are not rolled back 

944 if an exception propagates out of an inner transaction block and is 

945 then caught. 

946 """ 

947 registry = self.makeRegistry() 

948 dimension = registry.dimensions["instrument"] 

949 dataId1 = {"instrument": "DummyCam"} 

950 dataId2 = {"instrument": "DummyCam2"} 

951 checkpointReached = False 

952 with registry.transaction(): 

953 # This should be added and (ultimately) committed. 

954 registry.insertDimensionData(dimension, dataId1) 

955 with self.assertRaises(sqlalchemy.exc.IntegrityError): 

956 with registry.transaction(savepoint=True): 

957 # This does not conflict, and should succeed (but not 

958 # be committed). 

959 registry.insertDimensionData(dimension, dataId2) 

960 checkpointReached = True 

961 # This should conflict and raise, triggerring a rollback 

962 # of the previous insertion within the same transaction 

963 # context, but not the original insertion in the outer 

964 # block. 

965 registry.insertDimensionData(dimension, dataId1) 

966 self.assertTrue(checkpointReached) 

967 self.assertIsNotNone(registry.expandDataId(dataId1, dimensions=dimension.minimal_group)) 

968 with self.assertRaises(DataIdValueError): 

969 registry.expandDataId(dataId2, dimensions=dimension.minimal_group) 

970 

971 def testInstrumentDimensions(self): 

972 """Test queries involving only instrument dimensions, with no joins to 

973 skymap. 

974 """ 

975 registry = self.makeRegistry() 

976 

977 # need a bunch of dimensions and datasets for test 

978 registry.insertDimensionData( 

979 "instrument", dict(name="DummyCam", visit_max=25, exposure_max=300, detector_max=6) 

980 ) 

981 registry.insertDimensionData( 

982 "physical_filter", 

983 dict(instrument="DummyCam", name="dummy_r", band="r"), 

984 dict(instrument="DummyCam", name="dummy_i", band="i"), 

985 ) 

986 registry.insertDimensionData( 

987 "detector", *[dict(instrument="DummyCam", id=i, full_name=str(i)) for i in range(1, 6)] 

988 ) 

989 registry.insertDimensionData( 

990 "visit", 

991 dict(instrument="DummyCam", id=10, name="ten", physical_filter="dummy_i"), 

992 dict(instrument="DummyCam", id=11, name="eleven", physical_filter="dummy_r"), 

993 dict(instrument="DummyCam", id=20, name="twelve", physical_filter="dummy_r"), 

994 ) 

995 for i in range(1, 6): 

996 registry.insertDimensionData( 

997 "visit_detector_region", 

998 dict(instrument="DummyCam", visit=10, detector=i), 

999 dict(instrument="DummyCam", visit=11, detector=i), 

1000 dict(instrument="DummyCam", visit=20, detector=i), 

1001 ) 

1002 registry.insertDimensionData( 

1003 "exposure", 

1004 dict(instrument="DummyCam", id=100, obs_id="100", physical_filter="dummy_i"), 

1005 dict(instrument="DummyCam", id=101, obs_id="101", physical_filter="dummy_i"), 

1006 dict(instrument="DummyCam", id=110, obs_id="110", physical_filter="dummy_r"), 

1007 dict(instrument="DummyCam", id=111, obs_id="111", physical_filter="dummy_r"), 

1008 dict(instrument="DummyCam", id=200, obs_id="200", physical_filter="dummy_r"), 

1009 dict(instrument="DummyCam", id=201, obs_id="201", physical_filter="dummy_r"), 

1010 ) 

1011 registry.insertDimensionData( 

1012 "visit_definition", 

1013 dict(instrument="DummyCam", exposure=100, visit=10), 

1014 dict(instrument="DummyCam", exposure=101, visit=10), 

1015 dict(instrument="DummyCam", exposure=110, visit=11), 

1016 dict(instrument="DummyCam", exposure=111, visit=11), 

1017 dict(instrument="DummyCam", exposure=200, visit=20), 

1018 dict(instrument="DummyCam", exposure=201, visit=20), 

1019 ) 

1020 # dataset types 

1021 run1 = "test1_r" 

1022 run2 = "test2_r" 

1023 tagged2 = "test2_t" 

1024 registry.registerRun(run1) 

1025 registry.registerRun(run2) 

1026 registry.registerCollection(tagged2) 

1027 storageClass = StorageClass("testDataset") 

1028 registry.storageClasses.registerStorageClass(storageClass) 

1029 rawType = DatasetType( 

1030 name="RAW", 

1031 dimensions=registry.dimensions.conform(("instrument", "exposure", "detector")), 

1032 storageClass=storageClass, 

1033 ) 

1034 registry.registerDatasetType(rawType) 

1035 calexpType = DatasetType( 

1036 name="CALEXP", 

1037 dimensions=registry.dimensions.conform(("instrument", "visit", "detector")), 

1038 storageClass=storageClass, 

1039 ) 

1040 registry.registerDatasetType(calexpType) 

1041 

1042 # add pre-existing datasets 

1043 for exposure in (100, 101, 110, 111): 

1044 for detector in (1, 2, 3): 

1045 # note that only 3 of 5 detectors have datasets 

1046 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector) 

1047 (ref,) = registry.insertDatasets(rawType, dataIds=[dataId], run=run1) 

1048 # exposures 100 and 101 appear in both run1 and tagged2. 

1049 # 100 has different datasets in the different collections 

1050 # 101 has the same dataset in both collections. 

1051 if exposure == 100: 

1052 (ref,) = registry.insertDatasets(rawType, dataIds=[dataId], run=run2) 

1053 if exposure in (100, 101): 

1054 registry.associate(tagged2, [ref]) 

1055 # Add pre-existing datasets to tagged2. 

1056 for exposure in (200, 201): 

1057 for detector in (3, 4, 5): 

1058 # note that only 3 of 5 detectors have datasets 

1059 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector) 

1060 (ref,) = registry.insertDatasets(rawType, dataIds=[dataId], run=run2) 

1061 registry.associate(tagged2, [ref]) 

1062 

1063 dimensions = registry.dimensions.conform( 

1064 rawType.dimensions.required.names | calexpType.dimensions.required.names 

1065 ) 

1066 # Test that single dim string works as well as list of str 

1067 rows = registry.queryDataIds("visit", datasets=rawType, collections=run1).expanded().toSet() 

1068 rowsI = registry.queryDataIds(["visit"], datasets=rawType, collections=run1).expanded().toSet() 

1069 self.assertEqual(rows, rowsI) 

1070 # with empty expression 

1071 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1).expanded().toSet() 

1072 self.assertEqual(len(rows), 4 * 3) # 4 exposures times 3 detectors 

1073 self.assertCountEqual({dataId["exposure"] for dataId in rows}, (100, 101, 110, 111)) 

1074 self.assertCountEqual({dataId["visit"] for dataId in rows}, (10, 11)) 

1075 self.assertCountEqual({dataId["detector"] for dataId in rows}, (1, 2, 3)) 

1076 

1077 # second collection 

1078 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=tagged2).toSet() 

1079 self.assertEqual(len(rows), 4 * 3) # 4 exposures times 3 detectors 

1080 for dataId in rows: 

1081 self.assertCountEqual(dataId.dimensions.required, ("instrument", "detector", "exposure", "visit")) 

1082 self.assertCountEqual({dataId["exposure"] for dataId in rows}, (100, 101, 200, 201)) 

1083 self.assertCountEqual({dataId["visit"] for dataId in rows}, (10, 20)) 

1084 self.assertCountEqual({dataId["detector"] for dataId in rows}, (1, 2, 3, 4, 5)) 

1085 

1086 # with two input datasets 

1087 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=[run1, tagged2]).toSet() 

1088 self.assertEqual(len(set(rows)), 6 * 3) # 6 exposures times 3 detectors; set needed to de-dupe 

1089 for dataId in rows: 

1090 self.assertCountEqual(dataId.dimensions.required, ("instrument", "detector", "exposure", "visit")) 

1091 self.assertCountEqual({dataId["exposure"] for dataId in rows}, (100, 101, 110, 111, 200, 201)) 

1092 self.assertCountEqual({dataId["visit"] for dataId in rows}, (10, 11, 20)) 

1093 self.assertCountEqual({dataId["detector"] for dataId in rows}, (1, 2, 3, 4, 5)) 

1094 

1095 # limit to single visit 

1096 rows = registry.queryDataIds( 

1097 dimensions, datasets=rawType, collections=run1, where="visit = 10", instrument="DummyCam" 

1098 ).toSet() 

1099 self.assertEqual(len(rows), 2 * 3) # 2 exposures times 3 detectors 

1100 self.assertCountEqual({dataId["exposure"] for dataId in rows}, (100, 101)) 

1101 self.assertCountEqual({dataId["visit"] for dataId in rows}, (10,)) 

1102 self.assertCountEqual({dataId["detector"] for dataId in rows}, (1, 2, 3)) 

1103 

1104 # more limiting expression, using link names instead of Table.column 

1105 rows = registry.queryDataIds( 

1106 dimensions, 

1107 datasets=rawType, 

1108 collections=run1, 

1109 where="visit = 10 and detector > 1 and 'DummyCam'=instrument", 

1110 ).toSet() 

1111 self.assertEqual(len(rows), 2 * 2) # 2 exposures times 2 detectors 

1112 self.assertCountEqual({dataId["exposure"] for dataId in rows}, (100, 101)) 

1113 self.assertCountEqual({dataId["visit"] for dataId in rows}, (10,)) 

1114 self.assertCountEqual({dataId["detector"] for dataId in rows}, (2, 3)) 

1115 

1116 # queryDataIds with only one of `datasets` and `collections` is an 

1117 # error. 

1118 with self.assertRaises(CollectionError): 

1119 registry.queryDataIds(dimensions, datasets=rawType) 

1120 with self.assertRaises(ArgumentError): 

1121 registry.queryDataIds(dimensions, collections=run1) 

1122 

1123 # expression excludes everything 

1124 rows = registry.queryDataIds( 

1125 dimensions, datasets=rawType, collections=run1, where="visit > 1000", instrument="DummyCam" 

1126 ).toSet() 

1127 self.assertEqual(len(rows), 0) 

1128 

1129 # Selecting by physical_filter, this is not in the dimensions, but it 

1130 # is a part of the full expression so it should work too. 

1131 rows = registry.queryDataIds( 

1132 dimensions, 

1133 datasets=rawType, 

1134 collections=run1, 

1135 where="physical_filter = 'dummy_r'", 

1136 instrument="DummyCam", 

1137 ).toSet() 

1138 self.assertEqual(len(rows), 2 * 3) # 2 exposures times 3 detectors 

1139 self.assertCountEqual({dataId["exposure"] for dataId in rows}, (110, 111)) 

1140 self.assertCountEqual({dataId["visit"] for dataId in rows}, (11,)) 

1141 self.assertCountEqual({dataId["detector"] for dataId in rows}, (1, 2, 3)) 

1142 

1143 def testSkyMapDimensions(self): 

1144 """Tests involving only skymap dimensions, no joins to instrument.""" 

1145 registry = self.makeRegistry() 

1146 

1147 # need a bunch of dimensions and datasets for test, we want 

1148 # "band" in the test so also have to add physical_filter 

1149 # dimensions 

1150 registry.insertDimensionData("instrument", dict(instrument="DummyCam")) 

1151 registry.insertDimensionData( 

1152 "physical_filter", 

1153 dict(instrument="DummyCam", name="dummy_r", band="r"), 

1154 dict(instrument="DummyCam", name="dummy_i", band="i"), 

1155 ) 

1156 registry.insertDimensionData("skymap", dict(name="DummyMap", hash=b"sha!")) 

1157 for tract in range(10): 

1158 registry.insertDimensionData("tract", dict(skymap="DummyMap", id=tract)) 

1159 registry.insertDimensionData( 

1160 "patch", 

1161 *[dict(skymap="DummyMap", tract=tract, id=patch, cell_x=0, cell_y=0) for patch in range(10)], 

1162 ) 

1163 

1164 # dataset types 

1165 run = "tésτ" 

1166 registry.registerRun(run) 

1167 storageClass = StorageClass("testDataset") 

1168 registry.storageClasses.registerStorageClass(storageClass) 

1169 calexpType = DatasetType( 

1170 name="deepCoadd_calexp", 

1171 dimensions=registry.dimensions.conform(("skymap", "tract", "patch", "band")), 

1172 storageClass=storageClass, 

1173 ) 

1174 registry.registerDatasetType(calexpType) 

1175 mergeType = DatasetType( 

1176 name="deepCoadd_mergeDet", 

1177 dimensions=registry.dimensions.conform(("skymap", "tract", "patch")), 

1178 storageClass=storageClass, 

1179 ) 

1180 registry.registerDatasetType(mergeType) 

1181 measType = DatasetType( 

1182 name="deepCoadd_meas", 

1183 dimensions=registry.dimensions.conform(("skymap", "tract", "patch", "band")), 

1184 storageClass=storageClass, 

1185 ) 

1186 registry.registerDatasetType(measType) 

1187 

1188 dimensions = registry.dimensions.conform( 

1189 calexpType.dimensions.required.names 

1190 | mergeType.dimensions.required.names 

1191 | measType.dimensions.required.names 

1192 ) 

1193 

1194 # add pre-existing datasets 

1195 for tract in (1, 3, 5): 

1196 for patch in (2, 4, 6, 7): 

1197 dataId = dict(skymap="DummyMap", tract=tract, patch=patch) 

1198 registry.insertDatasets(mergeType, dataIds=[dataId], run=run) 

1199 for aFilter in ("i", "r"): 

1200 dataId = dict(skymap="DummyMap", tract=tract, patch=patch, band=aFilter) 

1201 registry.insertDatasets(calexpType, dataIds=[dataId], run=run) 

1202 

1203 # with empty expression 

1204 rows = registry.queryDataIds(dimensions, datasets=[calexpType, mergeType], collections=run).toSet() 

1205 self.assertEqual(len(rows), 3 * 4 * 2) # 4 tracts x 4 patches x 2 filters 

1206 for dataId in rows: 

1207 self.assertCountEqual(dataId.dimensions.required, ("skymap", "tract", "patch", "band")) 

1208 self.assertCountEqual({dataId["tract"] for dataId in rows}, (1, 3, 5)) 

1209 self.assertCountEqual({dataId["patch"] for dataId in rows}, (2, 4, 6, 7)) 

1210 self.assertCountEqual({dataId["band"] for dataId in rows}, ("i", "r")) 

1211 

1212 # limit to 2 tracts and 2 patches 

1213 rows = registry.queryDataIds( 

1214 dimensions, 

1215 datasets=[calexpType, mergeType], 

1216 collections=run, 

1217 where="tract IN (1, 5) AND patch IN (2, 7)", 

1218 skymap="DummyMap", 

1219 ).toSet() 

1220 self.assertEqual(len(rows), 2 * 2 * 2) # 2 tracts x 2 patches x 2 filters 

1221 self.assertCountEqual({dataId["tract"] for dataId in rows}, (1, 5)) 

1222 self.assertCountEqual({dataId["patch"] for dataId in rows}, (2, 7)) 

1223 self.assertCountEqual({dataId["band"] for dataId in rows}, ("i", "r")) 

1224 

1225 # limit to single filter 

1226 rows = registry.queryDataIds( 

1227 dimensions, datasets=[calexpType, mergeType], collections=run, where="band = 'i'" 

1228 ).toSet() 

1229 self.assertEqual(len(rows), 3 * 4 * 1) # 4 tracts x 4 patches x 2 filters 

1230 self.assertCountEqual({dataId["tract"] for dataId in rows}, (1, 3, 5)) 

1231 self.assertCountEqual({dataId["patch"] for dataId in rows}, (2, 4, 6, 7)) 

1232 self.assertCountEqual({dataId["band"] for dataId in rows}, ("i",)) 

1233 

1234 # Specifying non-existing skymap is an exception 

1235 with self.assertRaisesRegex(DataIdValueError, "Unknown values specified for governor dimension"): 

1236 rows = registry.queryDataIds( 

1237 dimensions, datasets=[calexpType, mergeType], collections=run, where="skymap = 'Mars'" 

1238 ).toSet() 

1239 

1240 def testSpatialJoin(self): 

1241 """Test queries that involve spatial overlap joins.""" 

1242 registry = self.makeRegistry() 

1243 self.loadData(registry, "hsc-rc2-subset.yaml") 

1244 

1245 # Dictionary of spatial DatabaseDimensionElements, keyed by the name of 

1246 # the TopologicalFamily they belong to. We'll relate all elements in 

1247 # each family to all of the elements in each other family. 

1248 families = defaultdict(set) 

1249 # Dictionary of {element.name: {dataId: region}}. 

1250 regions = {} 

1251 for element in registry.dimensions.database_elements: 

1252 if element.spatial is not None: 

1253 families[element.spatial.name].add(element) 

1254 regions[element.name] = { 

1255 record.dataId: record.region for record in registry.queryDimensionRecords(element) 

1256 } 

1257 

1258 # If this check fails, it's not necessarily a problem - it may just be 

1259 # a reasonable change to the default dimension definitions - but the 

1260 # test below depends on there being more than one family to do anything 

1261 # useful. 

1262 self.assertEqual(len(families), 2) 

1263 

1264 # Overlap DatabaseDimensionElements with each other. 

1265 for family1, family2 in itertools.combinations(families, 2): 

1266 for element1, element2 in itertools.product(families[family1], families[family2]): 

1267 dimensions = element1.minimal_group | element2.minimal_group 

1268 # Construct expected set of overlapping data IDs via a 

1269 # brute-force comparison of the regions we've already fetched. 

1270 expected = { 

1271 DataCoordinate.standardize( 

1272 {**dataId1.required, **dataId2.required}, dimensions=dimensions 

1273 ) 

1274 for (dataId1, region1), (dataId2, region2) in itertools.product( 

1275 regions[element1.name].items(), regions[element2.name].items() 

1276 ) 

1277 if not region1.isDisjointFrom(region2) 

1278 } 

1279 self.assertGreater(len(expected), 2, msg="Test that we aren't just comparing empty sets.") 

1280 queried = set(registry.queryDataIds(dimensions)) 

1281 self.assertEqual(expected, queried) 

1282 

1283 # Overlap each DatabaseDimensionElement with the commonSkyPix system. 

1284 commonSkyPix = registry.dimensions.commonSkyPix 

1285 for elementName, these_regions in regions.items(): 

1286 dimensions = registry.dimensions[elementName].minimal_group | commonSkyPix.minimal_group 

1287 expected = set() 

1288 for dataId, region in these_regions.items(): 

1289 for begin, end in commonSkyPix.pixelization.envelope(region): 

1290 expected.update( 

1291 DataCoordinate.standardize( 

1292 {commonSkyPix.name: index, **dataId.required}, dimensions=dimensions 

1293 ) 

1294 for index in range(begin, end) 

1295 ) 

1296 self.assertGreater(len(expected), 2, msg="Test that we aren't just comparing empty sets.") 

1297 queried = set(registry.queryDataIds(dimensions)) 

1298 self.assertEqual(expected, queried) 

1299 

1300 def testAbstractQuery(self): 

1301 """Test that we can run a query that just lists the known 

1302 bands. This is tricky because band is 

1303 backed by a query against physical_filter. 

1304 """ 

1305 registry = self.makeRegistry() 

1306 registry.insertDimensionData("instrument", dict(name="DummyCam")) 

1307 registry.insertDimensionData( 

1308 "physical_filter", 

1309 dict(instrument="DummyCam", name="dummy_i", band="i"), 

1310 dict(instrument="DummyCam", name="dummy_i2", band="i"), 

1311 dict(instrument="DummyCam", name="dummy_r", band="r"), 

1312 ) 

1313 rows = registry.queryDataIds(["band"]).toSet() 

1314 self.assertCountEqual( 

1315 rows, 

1316 [ 

1317 DataCoordinate.standardize(band="i", universe=registry.dimensions), 

1318 DataCoordinate.standardize(band="r", universe=registry.dimensions), 

1319 ], 

1320 ) 

1321 

1322 def testAttributeManager(self): 

1323 """Test basic functionality of attribute manager.""" 

1324 # number of attributes with schema versions in a fresh database, 

1325 # 6 managers with 2 records per manager, plus config for dimensions 

1326 VERSION_COUNT = 6 * 2 + 1 

1327 

1328 registry = self.makeRegistry() 

1329 attributes = registry._managers.attributes 

1330 

1331 # check what get() returns for non-existing key 

1332 self.assertIsNone(attributes.get("attr")) 

1333 self.assertEqual(attributes.get("attr", ""), "") 

1334 self.assertEqual(attributes.get("attr", "Value"), "Value") 

1335 self.assertEqual(len(list(attributes.items())), VERSION_COUNT) 

1336 

1337 # cannot store empty key or value 

1338 with self.assertRaises(ValueError): 

1339 attributes.set("", "value") 

1340 with self.assertRaises(ValueError): 

1341 attributes.set("attr", "") 

1342 

1343 # set value of non-existing key 

1344 attributes.set("attr", "value") 

1345 self.assertEqual(len(list(attributes.items())), VERSION_COUNT + 1) 

1346 self.assertEqual(attributes.get("attr"), "value") 

1347 

1348 # update value of existing key 

1349 with self.assertRaises(ButlerAttributeExistsError): 

1350 attributes.set("attr", "value2") 

1351 

1352 attributes.set("attr", "value2", force=True) 

1353 self.assertEqual(len(list(attributes.items())), VERSION_COUNT + 1) 

1354 self.assertEqual(attributes.get("attr"), "value2") 

1355 

1356 # delete existing key 

1357 self.assertTrue(attributes.delete("attr")) 

1358 self.assertEqual(len(list(attributes.items())), VERSION_COUNT) 

1359 

1360 # delete non-existing key 

1361 self.assertFalse(attributes.delete("non-attr")) 

1362 

1363 # store bunch of keys and get the list back 

1364 data = [ 

1365 ("version.core", "1.2.3"), 

1366 ("version.dimensions", "3.2.1"), 

1367 ("config.managers.opaque", "ByNameOpaqueTableStorageManager"), 

1368 ] 

1369 for key, value in data: 

1370 attributes.set(key, value) 

1371 items = dict(attributes.items()) 

1372 for key, value in data: 

1373 self.assertEqual(items[key], value) 

1374 

1375 def testQueryDatasetsDeduplication(self): 

1376 """Test that the findFirst option to queryDatasets selects datasets 

1377 from collections in the order given". 

1378 """ 

1379 registry = self.makeRegistry() 

1380 self.loadData(registry, "base.yaml") 

1381 self.loadData(registry, "datasets.yaml") 

1382 self.assertCountEqual( 

1383 list(registry.queryDatasets("bias", collections=["imported_g", "imported_r"])), 

1384 [ 

1385 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"), 

1386 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g"), 

1387 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g"), 

1388 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r"), 

1389 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r"), 

1390 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

1391 ], 

1392 ) 

1393 self.assertCountEqual( 

1394 list(registry.queryDatasets("bias", collections=["imported_g", "imported_r"], findFirst=True)), 

1395 [ 

1396 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"), 

1397 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g"), 

1398 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g"), 

1399 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

1400 ], 

1401 ) 

1402 self.assertCountEqual( 

1403 list(registry.queryDatasets("bias", collections=["imported_r", "imported_g"], findFirst=True)), 

1404 [ 

1405 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"), 

1406 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r"), 

1407 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r"), 

1408 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

1409 ], 

1410 ) 

1411 

1412 def testQueryResults(self): 

1413 """Test querying for data IDs and then manipulating the QueryResults 

1414 object returned to perform other queries. 

1415 """ 

1416 registry = self.makeRegistry() 

1417 self.loadData(registry, "base.yaml") 

1418 self.loadData(registry, "datasets.yaml") 

1419 bias = registry.getDatasetType("bias") 

1420 flat = registry.getDatasetType("flat") 

1421 # Obtain expected results from methods other than those we're testing 

1422 # here. That includes: 

1423 # - the dimensions of the data IDs we want to query: 

1424 expected_dimensions = registry.dimensions.conform(["detector", "physical_filter"]) 

1425 # - the dimensions of some other data IDs we'll extract from that: 

1426 expected_subset_dimensions = registry.dimensions.conform(["detector"]) 

1427 # - the data IDs we expect to obtain from the first queries: 

1428 expectedDataIds = DataCoordinateSet( 

1429 { 

1430 DataCoordinate.standardize( 

1431 instrument="Cam1", detector=d, physical_filter=p, universe=registry.dimensions 

1432 ) 

1433 for d, p in itertools.product({1, 2, 3}, {"Cam1-G", "Cam1-R1", "Cam1-R2"}) 

1434 }, 

1435 dimensions=expected_dimensions, 

1436 hasFull=False, 

1437 hasRecords=False, 

1438 ) 

1439 # - the flat datasets we expect to find from those data IDs, in just 

1440 # one collection (so deduplication is irrelevant): 

1441 expectedFlats = [ 

1442 registry.findDataset( 

1443 flat, instrument="Cam1", detector=1, physical_filter="Cam1-R1", collections="imported_r" 

1444 ), 

1445 registry.findDataset( 

1446 flat, instrument="Cam1", detector=2, physical_filter="Cam1-R1", collections="imported_r" 

1447 ), 

1448 registry.findDataset( 

1449 flat, instrument="Cam1", detector=3, physical_filter="Cam1-R2", collections="imported_r" 

1450 ), 

1451 ] 

1452 # - the data IDs we expect to extract from that: 

1453 expectedSubsetDataIds = expectedDataIds.subset(expected_subset_dimensions) 

1454 # - the bias datasets we expect to find from those data IDs, after we 

1455 # subset-out the physical_filter dimension, both with duplicates: 

1456 expectedAllBiases = [ 

1457 registry.findDataset(bias, instrument="Cam1", detector=1, collections="imported_g"), 

1458 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_g"), 

1459 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_g"), 

1460 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_r"), 

1461 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_r"), 

1462 ] 

1463 # - ...and without duplicates: 

1464 expectedDeduplicatedBiases = [ 

1465 registry.findDataset(bias, instrument="Cam1", detector=1, collections="imported_g"), 

1466 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_r"), 

1467 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_r"), 

1468 ] 

1469 # Test against those expected results, using a "lazy" query for the 

1470 # data IDs (which re-executes that query each time we use it to do 

1471 # something new). 

1472 dataIds = registry.queryDataIds( 

1473 ["detector", "physical_filter"], 

1474 where="detector.purpose = 'SCIENCE'", # this rejects detector=4 

1475 instrument="Cam1", 

1476 ) 

1477 self.assertEqual(dataIds.dimensions, expected_dimensions) 

1478 self.assertEqual(dataIds.toSet(), expectedDataIds) 

1479 self.assertCountEqual( 

1480 list( 

1481 dataIds.findDatasets( 

1482 flat, 

1483 collections=["imported_r"], 

1484 ) 

1485 ), 

1486 expectedFlats, 

1487 ) 

1488 subsetDataIds = dataIds.subset(expected_subset_dimensions, unique=True) 

1489 self.assertEqual(subsetDataIds.dimensions, expected_subset_dimensions) 

1490 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds) 

1491 self.assertCountEqual( 

1492 list(subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=False)), 

1493 expectedAllBiases, 

1494 ) 

1495 self.assertCountEqual( 

1496 list(subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=True)), 

1497 expectedDeduplicatedBiases, 

1498 ) 

1499 

1500 # Searching for a dataset with dimensions we had projected away 

1501 # restores those dimensions. 

1502 self.assertCountEqual( 

1503 list(subsetDataIds.findDatasets("flat", collections=["imported_r"], findFirst=True)), 

1504 expectedFlats, 

1505 ) 

1506 

1507 # Use a component dataset type. 

1508 self.assertCountEqual( 

1509 [ 

1510 ref.makeComponentRef("image") 

1511 for ref in subsetDataIds.findDatasets( 

1512 bias, 

1513 collections=["imported_r", "imported_g"], 

1514 findFirst=False, 

1515 ) 

1516 ], 

1517 [ref.makeComponentRef("image") for ref in expectedAllBiases], 

1518 ) 

1519 

1520 # Use a named dataset type that does not exist and a dataset type 

1521 # object that does not exist. 

1522 unknown_type = DatasetType("not_known", dimensions=bias.dimensions, storageClass="Exposure") 

1523 

1524 # Test both string name and dataset type object. 

1525 test_type: str | DatasetType 

1526 for test_type, test_type_name in ( 

1527 (unknown_type, unknown_type.name), 

1528 (unknown_type.name, unknown_type.name), 

1529 ): 

1530 with self.assertRaisesRegex(DatasetTypeError, expected_regex=test_type_name): 

1531 list( 

1532 subsetDataIds.findDatasets( 

1533 test_type, collections=["imported_r", "imported_g"], findFirst=True 

1534 ) 

1535 ) 

1536 

1537 # Materialize the bias dataset queries (only) by putting the results 

1538 # into temporary tables, then repeat those tests. 

1539 with subsetDataIds.findDatasets( 

1540 bias, collections=["imported_r", "imported_g"], findFirst=False 

1541 ).materialize() as biases: 

1542 self.assertCountEqual(list(biases), expectedAllBiases) 

1543 with subsetDataIds.findDatasets( 

1544 bias, collections=["imported_r", "imported_g"], findFirst=True 

1545 ).materialize() as biases: 

1546 self.assertCountEqual(list(biases), expectedDeduplicatedBiases) 

1547 # Materialize the data ID subset query, but not the dataset queries. 

1548 with subsetDataIds.materialize() as subsetDataIds: 

1549 self.assertEqual(subsetDataIds.dimensions, expected_subset_dimensions) 

1550 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds) 

1551 self.assertCountEqual( 

1552 list( 

1553 subsetDataIds.findDatasets( 

1554 bias, collections=["imported_r", "imported_g"], findFirst=False 

1555 ) 

1556 ), 

1557 expectedAllBiases, 

1558 ) 

1559 self.assertCountEqual( 

1560 list( 

1561 subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=True) 

1562 ), 

1563 expectedDeduplicatedBiases, 

1564 ) 

1565 # Materialize the dataset queries, too. 

1566 with subsetDataIds.findDatasets( 

1567 bias, collections=["imported_r", "imported_g"], findFirst=False 

1568 ).materialize() as biases: 

1569 self.assertCountEqual(list(biases), expectedAllBiases) 

1570 with subsetDataIds.findDatasets( 

1571 bias, collections=["imported_r", "imported_g"], findFirst=True 

1572 ).materialize() as biases: 

1573 self.assertCountEqual(list(biases), expectedDeduplicatedBiases) 

1574 # Materialize the original query, but none of the follow-up queries. 

1575 with dataIds.materialize() as dataIds: 

1576 self.assertEqual(dataIds.dimensions, expected_dimensions) 

1577 self.assertEqual(dataIds.toSet(), expectedDataIds) 

1578 self.assertCountEqual( 

1579 list( 

1580 dataIds.findDatasets( 

1581 flat, 

1582 collections=["imported_r"], 

1583 ) 

1584 ), 

1585 expectedFlats, 

1586 ) 

1587 subsetDataIds = dataIds.subset(expected_subset_dimensions, unique=True) 

1588 self.assertEqual(subsetDataIds.dimensions, expected_subset_dimensions) 

1589 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds) 

1590 self.assertCountEqual( 

1591 list( 

1592 subsetDataIds.findDatasets( 

1593 bias, collections=["imported_r", "imported_g"], findFirst=False 

1594 ) 

1595 ), 

1596 expectedAllBiases, 

1597 ) 

1598 self.assertCountEqual( 

1599 list( 

1600 subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=True) 

1601 ), 

1602 expectedDeduplicatedBiases, 

1603 ) 

1604 # Materialize just the bias dataset queries. 

1605 with subsetDataIds.findDatasets( 

1606 bias, collections=["imported_r", "imported_g"], findFirst=False 

1607 ).materialize() as biases: 

1608 self.assertCountEqual(list(biases), expectedAllBiases) 

1609 with subsetDataIds.findDatasets( 

1610 bias, collections=["imported_r", "imported_g"], findFirst=True 

1611 ).materialize() as biases: 

1612 self.assertCountEqual(list(biases), expectedDeduplicatedBiases) 

1613 # Materialize the subset data ID query, but not the dataset 

1614 # queries. 

1615 with subsetDataIds.materialize() as subsetDataIds: 

1616 self.assertEqual(subsetDataIds.dimensions, expected_subset_dimensions) 

1617 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds) 

1618 self.assertCountEqual( 

1619 list( 

1620 subsetDataIds.findDatasets( 

1621 bias, collections=["imported_r", "imported_g"], findFirst=False 

1622 ) 

1623 ), 

1624 expectedAllBiases, 

1625 ) 

1626 self.assertCountEqual( 

1627 list( 

1628 subsetDataIds.findDatasets( 

1629 bias, collections=["imported_r", "imported_g"], findFirst=True 

1630 ) 

1631 ), 

1632 expectedDeduplicatedBiases, 

1633 ) 

1634 # Materialize the bias dataset queries, too, so now we're 

1635 # materializing every single step. 

1636 with subsetDataIds.findDatasets( 

1637 bias, collections=["imported_r", "imported_g"], findFirst=False 

1638 ).materialize() as biases: 

1639 self.assertCountEqual(list(biases), expectedAllBiases) 

1640 with subsetDataIds.findDatasets( 

1641 bias, collections=["imported_r", "imported_g"], findFirst=True 

1642 ).materialize() as biases: 

1643 self.assertCountEqual(list(biases), expectedDeduplicatedBiases) 

1644 

1645 def testStorageClassPropagation(self): 

1646 """Test that queries for datasets respect the storage class passed in 

1647 as part of a full dataset type. 

1648 """ 

1649 registry = self.makeRegistry() 

1650 self.loadData(registry, "base.yaml") 

1651 dataset_type_in_registry = DatasetType( 

1652 "tbl", dimensions=["instrument"], storageClass="Packages", universe=registry.dimensions 

1653 ) 

1654 registry.registerDatasetType(dataset_type_in_registry) 

1655 run = "run1" 

1656 registry.registerRun(run) 

1657 (inserted_ref,) = registry.insertDatasets( 

1658 dataset_type_in_registry, [registry.expandDataId(instrument="Cam1")], run=run 

1659 ) 

1660 self.assertEqual(inserted_ref.datasetType, dataset_type_in_registry) 

1661 query_dataset_type = DatasetType( 

1662 "tbl", dimensions=["instrument"], storageClass="StructuredDataDict", universe=registry.dimensions 

1663 ) 

1664 self.assertNotEqual(dataset_type_in_registry, query_dataset_type) 

1665 query_datasets_result = registry.queryDatasets(query_dataset_type, collections=[run]) 

1666 self.assertEqual(query_datasets_result.parentDatasetType, query_dataset_type) # type: ignore 

1667 (query_datasets_ref,) = query_datasets_result 

1668 self.assertEqual(query_datasets_ref.datasetType, query_dataset_type) 

1669 query_data_ids_find_datasets_result = registry.queryDataIds(["instrument"]).findDatasets( 

1670 query_dataset_type, collections=[run] 

1671 ) 

1672 self.assertEqual(query_data_ids_find_datasets_result.parentDatasetType, query_dataset_type) 

1673 (query_data_ids_find_datasets_ref,) = query_data_ids_find_datasets_result 

1674 self.assertEqual(query_data_ids_find_datasets_ref.datasetType, query_dataset_type) 

1675 query_dataset_types_result = registry.queryDatasetTypes(query_dataset_type) 

1676 self.assertEqual(list(query_dataset_types_result), [query_dataset_type]) 

1677 find_dataset_ref = registry.findDataset(query_dataset_type, instrument="Cam1", collections=[run]) 

1678 self.assertEqual(find_dataset_ref.datasetType, query_dataset_type) 

1679 

1680 def testEmptyDimensionsQueries(self): 

1681 """Test Query and QueryResults objects in the case where there are no 

1682 dimensions. 

1683 """ 

1684 # Set up test data: one dataset type, two runs, one dataset in each. 

1685 registry = self.makeRegistry() 

1686 self.loadData(registry, "base.yaml") 

1687 schema = DatasetType("schema", dimensions=registry.dimensions.empty, storageClass="Catalog") 

1688 registry.registerDatasetType(schema) 

1689 dataId = DataCoordinate.make_empty(registry.dimensions) 

1690 run1 = "run1" 

1691 run2 = "run2" 

1692 registry.registerRun(run1) 

1693 registry.registerRun(run2) 

1694 (dataset1,) = registry.insertDatasets(schema, dataIds=[dataId], run=run1) 

1695 (dataset2,) = registry.insertDatasets(schema, dataIds=[dataId], run=run2) 

1696 # Query directly for both of the datasets, and each one, one at a time. 

1697 self.checkQueryResults( 

1698 registry.queryDatasets(schema, collections=[run1, run2], findFirst=False), [dataset1, dataset2] 

1699 ) 

1700 self.checkQueryResults( 

1701 registry.queryDatasets(schema, collections=[run1, run2], findFirst=True), 

1702 [dataset1], 

1703 ) 

1704 self.checkQueryResults( 

1705 registry.queryDatasets(schema, collections=[run2, run1], findFirst=True), 

1706 [dataset2], 

1707 ) 

1708 # Query for data IDs with no dimensions. 

1709 dataIds = registry.queryDataIds([]) 

1710 self.checkQueryResults(dataIds, [dataId]) 

1711 # Use queried data IDs to find the datasets. 

1712 self.checkQueryResults( 

1713 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False), 

1714 [dataset1, dataset2], 

1715 ) 

1716 self.checkQueryResults( 

1717 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), 

1718 [dataset1], 

1719 ) 

1720 self.checkQueryResults( 

1721 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), 

1722 [dataset2], 

1723 ) 

1724 # Now materialize the data ID query results and repeat those tests. 

1725 with dataIds.materialize() as dataIds: 

1726 self.checkQueryResults(dataIds, [dataId]) 

1727 self.checkQueryResults( 

1728 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), 

1729 [dataset1], 

1730 ) 

1731 self.checkQueryResults( 

1732 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), 

1733 [dataset2], 

1734 ) 

1735 # Query for non-empty data IDs, then subset that to get the empty one. 

1736 # Repeat the above tests starting from that. 

1737 dataIds = registry.queryDataIds(["instrument"]).subset(registry.dimensions.empty, unique=True) 

1738 self.checkQueryResults(dataIds, [dataId]) 

1739 self.checkQueryResults( 

1740 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False), 

1741 [dataset1, dataset2], 

1742 ) 

1743 self.checkQueryResults( 

1744 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), 

1745 [dataset1], 

1746 ) 

1747 self.checkQueryResults( 

1748 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), 

1749 [dataset2], 

1750 ) 

1751 with dataIds.materialize() as dataIds: 

1752 self.checkQueryResults(dataIds, [dataId]) 

1753 self.checkQueryResults( 

1754 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False), 

1755 [dataset1, dataset2], 

1756 ) 

1757 self.checkQueryResults( 

1758 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), 

1759 [dataset1], 

1760 ) 

1761 self.checkQueryResults( 

1762 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), 

1763 [dataset2], 

1764 ) 

1765 # Query for non-empty data IDs, then materialize, then subset to get 

1766 # the empty one. Repeat again. 

1767 with registry.queryDataIds(["instrument"]).materialize() as nonEmptyDataIds: 

1768 dataIds = nonEmptyDataIds.subset(registry.dimensions.empty, unique=True) 

1769 self.checkQueryResults(dataIds, [dataId]) 

1770 self.checkQueryResults( 

1771 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False), 

1772 [dataset1, dataset2], 

1773 ) 

1774 self.checkQueryResults( 

1775 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), 

1776 [dataset1], 

1777 ) 

1778 self.checkQueryResults( 

1779 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), 

1780 [dataset2], 

1781 ) 

1782 with dataIds.materialize() as dataIds: 

1783 self.checkQueryResults(dataIds, [dataId]) 

1784 self.checkQueryResults( 

1785 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False), 

1786 [dataset1, dataset2], 

1787 ) 

1788 self.checkQueryResults( 

1789 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), 

1790 [dataset1], 

1791 ) 

1792 self.checkQueryResults( 

1793 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), 

1794 [dataset2], 

1795 ) 

1796 # Query for non-empty data IDs with a constraint on an empty-data-ID 

1797 # dataset that exists. 

1798 dataIds = registry.queryDataIds(["instrument"], datasets="schema", collections=...) 

1799 self.checkQueryResults( 

1800 dataIds.subset(unique=True), 

1801 [DataCoordinate.standardize(instrument="Cam1", universe=registry.dimensions)], 

1802 ) 

1803 # Again query for non-empty data IDs with a constraint on empty-data-ID 

1804 # datasets, but when the datasets don't exist. We delete the existing 

1805 # dataset and query just that collection rather than creating a new 

1806 # empty collection because this is a bit less likely for our build-time 

1807 # logic to shortcut-out (via the collection summaries), and such a 

1808 # shortcut would make this test a bit more trivial than we'd like. 

1809 registry.removeDatasets([dataset2]) 

1810 dataIds = registry.queryDataIds(["instrument"], datasets="schema", collections=run2) 

1811 self.checkQueryResults(dataIds, []) 

1812 

1813 def testDimensionDataModifications(self): 

1814 """Test that modifying dimension records via: 

1815 syncDimensionData(..., update=True) and 

1816 insertDimensionData(..., replace=True) works as expected, even in the 

1817 presence of datasets using those dimensions and spatial overlap 

1818 relationships. 

1819 """ 

1820 

1821 def unpack_range_set(ranges: lsst.sphgeom.RangeSet) -> Iterator[int]: 

1822 """Unpack a sphgeom.RangeSet into the integers it contains.""" 

1823 for begin, end in ranges: 

1824 yield from range(begin, end) 

1825 

1826 def range_set_hull( 

1827 ranges: lsst.sphgeom.RangeSet, 

1828 pixelization: lsst.sphgeom.HtmPixelization, 

1829 ) -> lsst.sphgeom.ConvexPolygon: 

1830 """Create a ConvexPolygon hull of the region defined by a set of 

1831 HTM pixelization index ranges. 

1832 """ 

1833 points = [] 

1834 for index in unpack_range_set(ranges): 

1835 points.extend(pixelization.triangle(index).getVertices()) 

1836 return lsst.sphgeom.ConvexPolygon(points) 

1837 

1838 # Use HTM to set up an initial parent region (one arbitrary trixel) 

1839 # and four child regions (the trixels within the parent at the next 

1840 # level. We'll use the parent as a tract/visit region and the children 

1841 # as its patch/visit_detector regions. 

1842 registry = self.makeRegistry() 

1843 htm6 = registry.dimensions.skypix["htm"][6].pixelization 

1844 commonSkyPix = registry.dimensions.commonSkyPix.pixelization 

1845 index = 12288 

1846 child_ranges_small = lsst.sphgeom.RangeSet(index).scaled(4) 

1847 assert htm6.universe().contains(child_ranges_small) 

1848 child_regions_small = [htm6.triangle(i) for i in unpack_range_set(child_ranges_small)] 

1849 parent_region_small = lsst.sphgeom.ConvexPolygon( 

1850 list(itertools.chain.from_iterable(c.getVertices() for c in child_regions_small)) 

1851 ) 

1852 assert all(parent_region_small.contains(c) for c in child_regions_small) 

1853 # Make a larger version of each child region, defined to be the set of 

1854 # htm6 trixels that overlap the original's bounding circle. Make a new 

1855 # parent that's the convex hull of the new children. 

1856 child_regions_large = [ 

1857 range_set_hull(htm6.envelope(c.getBoundingCircle()), htm6) for c in child_regions_small 

1858 ] 

1859 assert all( 

1860 large.contains(small) 

1861 for large, small in zip(child_regions_large, child_regions_small, strict=True) 

1862 ) 

1863 parent_region_large = lsst.sphgeom.ConvexPolygon( 

1864 list(itertools.chain.from_iterable(c.getVertices() for c in child_regions_large)) 

1865 ) 

1866 assert all(parent_region_large.contains(c) for c in child_regions_large) 

1867 assert parent_region_large.contains(parent_region_small) 

1868 assert not parent_region_small.contains(parent_region_large) 

1869 assert not all(parent_region_small.contains(c) for c in child_regions_large) 

1870 # Find some commonSkyPix indices that overlap the large regions but not 

1871 # overlap the small regions. We use commonSkyPix here to make sure the 

1872 # real tests later involve what's in the database, not just post-query 

1873 # filtering of regions. 

1874 child_difference_indices = [] 

1875 for large, small in zip(child_regions_large, child_regions_small, strict=True): 

1876 difference = list(unpack_range_set(commonSkyPix.envelope(large) - commonSkyPix.envelope(small))) 

1877 assert difference, "if this is empty, we can't test anything useful with these regions" 

1878 assert all( 

1879 not commonSkyPix.triangle(d).isDisjointFrom(large) 

1880 and commonSkyPix.triangle(d).isDisjointFrom(small) 

1881 for d in difference 

1882 ) 

1883 child_difference_indices.append(difference) 

1884 parent_difference_indices = list( 

1885 unpack_range_set( 

1886 commonSkyPix.envelope(parent_region_large) - commonSkyPix.envelope(parent_region_small) 

1887 ) 

1888 ) 

1889 assert parent_difference_indices, "if this is empty, we can't test anything useful with these regions" 

1890 assert all( 

1891 ( 

1892 not commonSkyPix.triangle(d).isDisjointFrom(parent_region_large) 

1893 and commonSkyPix.triangle(d).isDisjointFrom(parent_region_small) 

1894 ) 

1895 for d in parent_difference_indices 

1896 ) 

1897 # Now that we've finally got those regions, we'll insert the large ones 

1898 # as tract/patch dimension records. 

1899 skymap_name = "testing_v1" 

1900 registry.insertDimensionData( 

1901 "skymap", 

1902 { 

1903 "name": skymap_name, 

1904 "hash": bytes([42]), 

1905 "tract_max": 1, 

1906 "patch_nx_max": 2, 

1907 "patch_ny_max": 2, 

1908 }, 

1909 ) 

1910 registry.insertDimensionData("tract", {"skymap": skymap_name, "id": 0, "region": parent_region_large}) 

1911 registry.insertDimensionData( 

1912 "patch", 

1913 *[ 

1914 {"skymap": skymap_name, "tract": 0, "id": n, "cell_x": n % 2, "cell_y": n // 2, "region": c} 

1915 for n, c in enumerate(child_regions_large) 

1916 ], 

1917 ) 

1918 # Add at dataset that uses these dimensions to make sure that modifying 

1919 # them doesn't disrupt foreign keys (need to make sure DB doesn't 

1920 # implement insert with replace=True as delete-then-insert). 

1921 dataset_type = DatasetType( 

1922 "coadd", 

1923 dimensions=["tract", "patch"], 

1924 universe=registry.dimensions, 

1925 storageClass="Exposure", 

1926 ) 

1927 registry.registerDatasetType(dataset_type) 

1928 registry.registerCollection("the_run", CollectionType.RUN) 

1929 registry.insertDatasets( 

1930 dataset_type, 

1931 [{"skymap": skymap_name, "tract": 0, "patch": 2}], 

1932 run="the_run", 

1933 ) 

1934 # Query for tracts and patches that overlap some "difference" htm9 

1935 # pixels; there should be overlaps, because the database has 

1936 # the "large" suite of regions. 

1937 self.assertEqual( 

1938 {0}, 

1939 { 

1940 data_id["tract"] 

1941 for data_id in registry.queryDataIds( 

1942 ["tract"], 

1943 skymap=skymap_name, 

1944 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]}, 

1945 ) 

1946 }, 

1947 ) 

1948 for patch_id, patch_difference_indices in enumerate(child_difference_indices): 

1949 self.assertIn( 

1950 patch_id, 

1951 { 

1952 data_id["patch"] 

1953 for data_id in registry.queryDataIds( 

1954 ["patch"], 

1955 skymap=skymap_name, 

1956 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]}, 

1957 ) 

1958 }, 

1959 ) 

1960 # Use sync to update the tract region and insert to update the regions 

1961 # of the patches, to the "small" suite. 

1962 updated = registry.syncDimensionData( 

1963 "tract", 

1964 {"skymap": skymap_name, "id": 0, "region": parent_region_small}, 

1965 update=True, 

1966 ) 

1967 self.assertEqual(updated, {"region": parent_region_large}) 

1968 registry.insertDimensionData( 

1969 "patch", 

1970 *[ 

1971 {"skymap": skymap_name, "tract": 0, "id": n, "cell_x": n % 2, "cell_y": n // 2, "region": c} 

1972 for n, c in enumerate(child_regions_small) 

1973 ], 

1974 replace=True, 

1975 ) 

1976 # Query again; there now should be no such overlaps, because the 

1977 # database has the "small" suite of regions. 

1978 self.assertFalse( 

1979 set( 

1980 registry.queryDataIds( 

1981 ["tract"], 

1982 skymap=skymap_name, 

1983 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]}, 

1984 ) 

1985 ) 

1986 ) 

1987 for patch_id, patch_difference_indices in enumerate(child_difference_indices): 

1988 self.assertNotIn( 

1989 patch_id, 

1990 { 

1991 data_id["patch"] 

1992 for data_id in registry.queryDataIds( 

1993 ["patch"], 

1994 skymap=skymap_name, 

1995 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]}, 

1996 ) 

1997 }, 

1998 ) 

1999 # Update back to the large regions and query one more time. 

2000 updated = registry.syncDimensionData( 

2001 "tract", 

2002 {"skymap": skymap_name, "id": 0, "region": parent_region_large}, 

2003 update=True, 

2004 ) 

2005 self.assertEqual(updated, {"region": parent_region_small}) 

2006 registry.insertDimensionData( 

2007 "patch", 

2008 *[ 

2009 {"skymap": skymap_name, "tract": 0, "id": n, "cell_x": n % 2, "cell_y": n // 2, "region": c} 

2010 for n, c in enumerate(child_regions_large) 

2011 ], 

2012 replace=True, 

2013 ) 

2014 self.assertEqual( 

2015 {0}, 

2016 { 

2017 data_id["tract"] 

2018 for data_id in registry.queryDataIds( 

2019 ["tract"], 

2020 skymap=skymap_name, 

2021 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]}, 

2022 ) 

2023 }, 

2024 ) 

2025 for patch_id, patch_difference_indices in enumerate(child_difference_indices): 

2026 self.assertIn( 

2027 patch_id, 

2028 { 

2029 data_id["patch"] 

2030 for data_id in registry.queryDataIds( 

2031 ["patch"], 

2032 skymap=skymap_name, 

2033 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]}, 

2034 ) 

2035 }, 

2036 ) 

2037 

2038 def testCalibrationCollections(self): 

2039 """Test operations on `~CollectionType.CALIBRATION` collections, 

2040 including `SqlRegistry.certify`, `SqlRegistry.decertify`, 

2041 `SqlRegistry.findDataset`, and 

2042 `DataCoordinateQueryResults.findRelatedDatasets`. 

2043 """ 

2044 # Setup - make a Registry, fill it with some datasets in 

2045 # non-calibration collections. 

2046 registry = self.makeRegistry() 

2047 self.loadData(registry, "base.yaml") 

2048 self.loadData(registry, "datasets.yaml") 

2049 # Set up some timestamps. 

2050 t1 = astropy.time.Time("2020-01-01T01:00:00", format="isot", scale="tai") 

2051 t2 = astropy.time.Time("2020-01-01T02:00:00", format="isot", scale="tai") 

2052 t3 = astropy.time.Time("2020-01-01T03:00:00", format="isot", scale="tai") 

2053 t4 = astropy.time.Time("2020-01-01T04:00:00", format="isot", scale="tai") 

2054 t5 = astropy.time.Time("2020-01-01T05:00:00", format="isot", scale="tai") 

2055 allTimespans = [ 

2056 Timespan(a, b) for a, b in itertools.combinations([None, t1, t2, t3, t4, t5, None], r=2) 

2057 ] 

2058 # Insert some exposure records with timespans between each sequential 

2059 # pair of those. 

2060 registry.insertDimensionData( 

2061 "exposure", 

2062 { 

2063 "instrument": "Cam1", 

2064 "id": 0, 

2065 "obs_id": "zero", 

2066 "physical_filter": "Cam1-G", 

2067 "timespan": Timespan(t1, t2), 

2068 }, 

2069 { 

2070 "instrument": "Cam1", 

2071 "id": 1, 

2072 "obs_id": "one", 

2073 "physical_filter": "Cam1-G", 

2074 "timespan": Timespan(t2, t3), 

2075 }, 

2076 { 

2077 "instrument": "Cam1", 

2078 "id": 2, 

2079 "obs_id": "two", 

2080 "physical_filter": "Cam1-G", 

2081 "timespan": Timespan(t3, t4), 

2082 }, 

2083 { 

2084 "instrument": "Cam1", 

2085 "id": 3, 

2086 "obs_id": "three", 

2087 "physical_filter": "Cam1-G", 

2088 "timespan": Timespan(t4, t5), 

2089 }, 

2090 ) 

2091 # Get references to some datasets. 

2092 bias2a = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g") 

2093 bias3a = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g") 

2094 bias2b = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r") 

2095 bias3b = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r") 

2096 # Register the main calibration collection we'll be working with. 

2097 collection = "Cam1/calibs/default" 

2098 registry.registerCollection(collection, type=CollectionType.CALIBRATION) 

2099 # Cannot associate into a calibration collection (no timespan). 

2100 with self.assertRaises(CollectionTypeError): 

2101 registry.associate(collection, [bias2a]) 

2102 # Certify 2a dataset with [t2, t4) validity. 

2103 registry.certify(collection, [bias2a], Timespan(begin=t2, end=t4)) 

2104 # Test that we can query for this dataset via the new collection, both 

2105 # on its own and with a RUN collection. 

2106 self.assertEqual( 

2107 set(registry.queryDatasets("bias", findFirst=False, collections=collection)), 

2108 {bias2a}, 

2109 ) 

2110 self.assertEqual( 

2111 set(registry.queryDatasets("bias", findFirst=False, collections=[collection, "imported_r"])), 

2112 { 

2113 bias2a, 

2114 bias2b, 

2115 bias3b, 

2116 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

2117 }, 

2118 ) 

2119 self.assertEqual( 

2120 set(registry.queryDataIds("detector", datasets="bias", collections=collection)), 

2121 {registry.expandDataId(instrument="Cam1", detector=2)}, 

2122 ) 

2123 self.assertEqual( 

2124 set(registry.queryDataIds("detector", datasets="bias", collections=[collection, "imported_r"])), 

2125 { 

2126 registry.expandDataId(instrument="Cam1", detector=2), 

2127 registry.expandDataId(instrument="Cam1", detector=3), 

2128 registry.expandDataId(instrument="Cam1", detector=4), 

2129 }, 

2130 ) 

2131 self.assertEqual( 

2132 set( 

2133 registry.queryDataIds(["exposure", "detector"]).findRelatedDatasets( 

2134 "bias", findFirst=True, collections=[collection] 

2135 ) 

2136 ), 

2137 { 

2138 (registry.expandDataId(instrument="Cam1", detector=2, exposure=1), bias2a), 

2139 (registry.expandDataId(instrument="Cam1", detector=2, exposure=2), bias2a), 

2140 }, 

2141 ) 

2142 self.assertEqual( 

2143 set( 

2144 registry.queryDataIds( 

2145 ["exposure", "detector"], instrument="Cam1", detector=2 

2146 ).findRelatedDatasets("bias", findFirst=True, collections=[collection, "imported_r"]) 

2147 ), 

2148 { 

2149 (registry.expandDataId(instrument="Cam1", detector=2, exposure=1), bias2a), 

2150 (registry.expandDataId(instrument="Cam1", detector=2, exposure=2), bias2a), 

2151 (registry.expandDataId(instrument="Cam1", detector=2, exposure=0), bias2b), 

2152 (registry.expandDataId(instrument="Cam1", detector=2, exposure=3), bias2b), 

2153 }, 

2154 ) 

2155 

2156 # We should not be able to certify 2b with anything overlapping that 

2157 # window. 

2158 with self.assertRaises(ConflictingDefinitionError): 

2159 registry.certify(collection, [bias2b], Timespan(begin=None, end=t3)) 

2160 with self.assertRaises(ConflictingDefinitionError): 

2161 registry.certify(collection, [bias2b], Timespan(begin=None, end=t5)) 

2162 with self.assertRaises(ConflictingDefinitionError): 

2163 registry.certify(collection, [bias2b], Timespan(begin=t1, end=t3)) 

2164 with self.assertRaises(ConflictingDefinitionError): 

2165 registry.certify(collection, [bias2b], Timespan(begin=t1, end=t5)) 

2166 with self.assertRaises(ConflictingDefinitionError): 

2167 registry.certify(collection, [bias2b], Timespan(begin=t1, end=None)) 

2168 with self.assertRaises(ConflictingDefinitionError): 

2169 registry.certify(collection, [bias2b], Timespan(begin=t2, end=t3)) 

2170 with self.assertRaises(ConflictingDefinitionError): 

2171 registry.certify(collection, [bias2b], Timespan(begin=t2, end=t5)) 

2172 with self.assertRaises(ConflictingDefinitionError): 

2173 registry.certify(collection, [bias2b], Timespan(begin=t2, end=None)) 

2174 # We should be able to certify 3a with a range overlapping that window, 

2175 # because it's for a different detector. 

2176 # We'll certify 3a over [t1, t3). 

2177 registry.certify(collection, [bias3a], Timespan(begin=t1, end=t3)) 

2178 # Now we'll certify 2b and 3b together over [t4, ∞). 

2179 registry.certify(collection, [bias2b, bias3b], Timespan(begin=t4, end=None)) 

2180 

2181 # Fetch all associations and check that they are what we expect. 

2182 self.assertCountEqual( 

2183 list( 

2184 registry.queryDatasetAssociations( 

2185 "bias", 

2186 collections=[collection, "imported_g", "imported_r"], 

2187 ) 

2188 ), 

2189 [ 

2190 DatasetAssociation( 

2191 ref=registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"), 

2192 collection="imported_g", 

2193 timespan=None, 

2194 ), 

2195 DatasetAssociation( 

2196 ref=registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

2197 collection="imported_r", 

2198 timespan=None, 

2199 ), 

2200 DatasetAssociation(ref=bias2a, collection="imported_g", timespan=None), 

2201 DatasetAssociation(ref=bias3a, collection="imported_g", timespan=None), 

2202 DatasetAssociation(ref=bias2b, collection="imported_r", timespan=None), 

2203 DatasetAssociation(ref=bias3b, collection="imported_r", timespan=None), 

2204 DatasetAssociation(ref=bias2a, collection=collection, timespan=Timespan(begin=t2, end=t4)), 

2205 DatasetAssociation(ref=bias3a, collection=collection, timespan=Timespan(begin=t1, end=t3)), 

2206 DatasetAssociation(ref=bias2b, collection=collection, timespan=Timespan(begin=t4, end=None)), 

2207 DatasetAssociation(ref=bias3b, collection=collection, timespan=Timespan(begin=t4, end=None)), 

2208 ], 

2209 ) 

2210 

2211 class Ambiguous: 

2212 """Tag class to denote lookups that should be ambiguous.""" 

2213 

2214 pass 

2215 

2216 def assertLookup( 

2217 detector: int, timespan: Timespan, expected: DatasetRef | type[Ambiguous] | None 

2218 ) -> None: 

2219 """Local function that asserts that a bias lookup returns the given 

2220 expected result. 

2221 """ 

2222 if expected is Ambiguous: 

2223 with self.assertRaises((DatasetTypeError, LookupError)): 

2224 registry.findDataset( 

2225 "bias", 

2226 collections=collection, 

2227 instrument="Cam1", 

2228 detector=detector, 

2229 timespan=timespan, 

2230 ) 

2231 else: 

2232 self.assertEqual( 

2233 expected, 

2234 registry.findDataset( 

2235 "bias", 

2236 collections=collection, 

2237 instrument="Cam1", 

2238 detector=detector, 

2239 timespan=timespan, 

2240 ), 

2241 ) 

2242 

2243 # Systematically test lookups against expected results. 

2244 assertLookup(detector=2, timespan=Timespan(None, t1), expected=None) 

2245 assertLookup(detector=2, timespan=Timespan(None, t2), expected=None) 

2246 assertLookup(detector=2, timespan=Timespan(None, t3), expected=bias2a) 

2247 assertLookup(detector=2, timespan=Timespan(None, t4), expected=bias2a) 

2248 assertLookup(detector=2, timespan=Timespan(None, t5), expected=Ambiguous) 

2249 assertLookup(detector=2, timespan=Timespan(None, None), expected=Ambiguous) 

2250 assertLookup(detector=2, timespan=Timespan(t1, t2), expected=None) 

2251 assertLookup(detector=2, timespan=Timespan(t1, t3), expected=bias2a) 

2252 assertLookup(detector=2, timespan=Timespan(t1, t4), expected=bias2a) 

2253 assertLookup(detector=2, timespan=Timespan(t1, t5), expected=Ambiguous) 

2254 assertLookup(detector=2, timespan=Timespan(t1, None), expected=Ambiguous) 

2255 assertLookup(detector=2, timespan=Timespan(t2, t3), expected=bias2a) 

2256 assertLookup(detector=2, timespan=Timespan(t2, t4), expected=bias2a) 

2257 assertLookup(detector=2, timespan=Timespan(t2, t5), expected=Ambiguous) 

2258 assertLookup(detector=2, timespan=Timespan(t2, None), expected=Ambiguous) 

2259 assertLookup(detector=2, timespan=Timespan(t3, t4), expected=bias2a) 

2260 assertLookup(detector=2, timespan=Timespan(t3, t5), expected=Ambiguous) 

2261 assertLookup(detector=2, timespan=Timespan(t3, None), expected=Ambiguous) 

2262 assertLookup(detector=2, timespan=Timespan(t4, t5), expected=bias2b) 

2263 assertLookup(detector=2, timespan=Timespan(t4, None), expected=bias2b) 

2264 assertLookup(detector=2, timespan=Timespan(t5, None), expected=bias2b) 

2265 assertLookup(detector=3, timespan=Timespan(None, t1), expected=None) 

2266 assertLookup(detector=3, timespan=Timespan(None, t2), expected=bias3a) 

2267 assertLookup(detector=3, timespan=Timespan(None, t3), expected=bias3a) 

2268 assertLookup(detector=3, timespan=Timespan(None, t4), expected=bias3a) 

2269 assertLookup(detector=3, timespan=Timespan(None, t5), expected=Ambiguous) 

2270 assertLookup(detector=3, timespan=Timespan(None, None), expected=Ambiguous) 

2271 assertLookup(detector=3, timespan=Timespan(t1, t2), expected=bias3a) 

2272 assertLookup(detector=3, timespan=Timespan(t1, t3), expected=bias3a) 

2273 assertLookup(detector=3, timespan=Timespan(t1, t4), expected=bias3a) 

2274 assertLookup(detector=3, timespan=Timespan(t1, t5), expected=Ambiguous) 

2275 assertLookup(detector=3, timespan=Timespan(t1, None), expected=Ambiguous) 

2276 assertLookup(detector=3, timespan=Timespan(t2, t3), expected=bias3a) 

2277 assertLookup(detector=3, timespan=Timespan(t2, t4), expected=bias3a) 

2278 assertLookup(detector=3, timespan=Timespan(t2, t5), expected=Ambiguous) 

2279 assertLookup(detector=3, timespan=Timespan(t2, None), expected=Ambiguous) 

2280 assertLookup(detector=3, timespan=Timespan(t3, t4), expected=None) 

2281 assertLookup(detector=3, timespan=Timespan(t3, t5), expected=bias3b) 

2282 assertLookup(detector=3, timespan=Timespan(t3, None), expected=bias3b) 

2283 assertLookup(detector=3, timespan=Timespan(t4, t5), expected=bias3b) 

2284 assertLookup(detector=3, timespan=Timespan(t4, None), expected=bias3b) 

2285 assertLookup(detector=3, timespan=Timespan(t5, None), expected=bias3b) 

2286 

2287 # Test lookups via temporal joins to exposures. 

2288 self.assertEqual( 

2289 set( 

2290 registry.queryDataIds( 

2291 ["exposure", "detector"], instrument="Cam1", detector=2 

2292 ).findRelatedDatasets("bias", collections=[collection]) 

2293 ), 

2294 { 

2295 (registry.expandDataId(instrument="Cam1", exposure=1, detector=2), bias2a), 

2296 (registry.expandDataId(instrument="Cam1", exposure=2, detector=2), bias2a), 

2297 (registry.expandDataId(instrument="Cam1", exposure=3, detector=2), bias2b), 

2298 }, 

2299 ) 

2300 self.assertEqual( 

2301 set( 

2302 registry.queryDataIds( 

2303 ["exposure", "detector"], instrument="Cam1", detector=3 

2304 ).findRelatedDatasets("bias", collections=[collection]) 

2305 ), 

2306 { 

2307 (registry.expandDataId(instrument="Cam1", exposure=0, detector=3), bias3a), 

2308 (registry.expandDataId(instrument="Cam1", exposure=1, detector=3), bias3a), 

2309 (registry.expandDataId(instrument="Cam1", exposure=3, detector=3), bias3b), 

2310 }, 

2311 ) 

2312 self.assertEqual( 

2313 set( 

2314 registry.queryDataIds( 

2315 ["exposure", "detector"], instrument="Cam1", detector=2 

2316 ).findRelatedDatasets("bias", collections=[collection, "imported_g"]) 

2317 ), 

2318 { 

2319 (registry.expandDataId(instrument="Cam1", exposure=0, detector=2), bias2a), 

2320 (registry.expandDataId(instrument="Cam1", exposure=1, detector=2), bias2a), 

2321 (registry.expandDataId(instrument="Cam1", exposure=2, detector=2), bias2a), 

2322 (registry.expandDataId(instrument="Cam1", exposure=3, detector=2), bias2b), 

2323 }, 

2324 ) 

2325 self.assertEqual( 

2326 set( 

2327 registry.queryDataIds( 

2328 ["exposure", "detector"], instrument="Cam1", detector=3 

2329 ).findRelatedDatasets("bias", collections=[collection, "imported_g"]) 

2330 ), 

2331 { 

2332 (registry.expandDataId(instrument="Cam1", exposure=0, detector=3), bias3a), 

2333 (registry.expandDataId(instrument="Cam1", exposure=1, detector=3), bias3a), 

2334 (registry.expandDataId(instrument="Cam1", exposure=2, detector=3), bias3a), 

2335 (registry.expandDataId(instrument="Cam1", exposure=3, detector=3), bias3b), 

2336 }, 

2337 ) 

2338 

2339 # Decertify [t3, t5) for all data IDs, and do test lookups again. 

2340 # This should truncate bias2a to [t2, t3), leave bias3a unchanged at 

2341 # [t1, t3), and truncate bias2b and bias3b to [t5, ∞). 

2342 registry.decertify(collection=collection, datasetType="bias", timespan=Timespan(t3, t5)) 

2343 assertLookup(detector=2, timespan=Timespan(None, t1), expected=None) 

2344 assertLookup(detector=2, timespan=Timespan(None, t2), expected=None) 

2345 assertLookup(detector=2, timespan=Timespan(None, t3), expected=bias2a) 

2346 assertLookup(detector=2, timespan=Timespan(None, t4), expected=bias2a) 

2347 assertLookup(detector=2, timespan=Timespan(None, t5), expected=bias2a) 

2348 assertLookup(detector=2, timespan=Timespan(None, None), expected=Ambiguous) 

2349 assertLookup(detector=2, timespan=Timespan(t1, t2), expected=None) 

2350 assertLookup(detector=2, timespan=Timespan(t1, t3), expected=bias2a) 

2351 assertLookup(detector=2, timespan=Timespan(t1, t4), expected=bias2a) 

2352 assertLookup(detector=2, timespan=Timespan(t1, t5), expected=bias2a) 

2353 assertLookup(detector=2, timespan=Timespan(t1, None), expected=Ambiguous) 

2354 assertLookup(detector=2, timespan=Timespan(t2, t3), expected=bias2a) 

2355 assertLookup(detector=2, timespan=Timespan(t2, t4), expected=bias2a) 

2356 assertLookup(detector=2, timespan=Timespan(t2, t5), expected=bias2a) 

2357 assertLookup(detector=2, timespan=Timespan(t2, None), expected=Ambiguous) 

2358 assertLookup(detector=2, timespan=Timespan(t3, t4), expected=None) 

2359 assertLookup(detector=2, timespan=Timespan(t3, t5), expected=None) 

2360 assertLookup(detector=2, timespan=Timespan(t3, None), expected=bias2b) 

2361 assertLookup(detector=2, timespan=Timespan(t4, t5), expected=None) 

2362 assertLookup(detector=2, timespan=Timespan(t4, None), expected=bias2b) 

2363 assertLookup(detector=2, timespan=Timespan(t5, None), expected=bias2b) 

2364 assertLookup(detector=3, timespan=Timespan(None, t1), expected=None) 

2365 assertLookup(detector=3, timespan=Timespan(None, t2), expected=bias3a) 

2366 assertLookup(detector=3, timespan=Timespan(None, t3), expected=bias3a) 

2367 assertLookup(detector=3, timespan=Timespan(None, t4), expected=bias3a) 

2368 assertLookup(detector=3, timespan=Timespan(None, t5), expected=bias3a) 

2369 assertLookup(detector=3, timespan=Timespan(None, None), expected=Ambiguous) 

2370 assertLookup(detector=3, timespan=Timespan(t1, t2), expected=bias3a) 

2371 assertLookup(detector=3, timespan=Timespan(t1, t3), expected=bias3a) 

2372 assertLookup(detector=3, timespan=Timespan(t1, t4), expected=bias3a) 

2373 assertLookup(detector=3, timespan=Timespan(t1, t5), expected=bias3a) 

2374 assertLookup(detector=3, timespan=Timespan(t1, None), expected=Ambiguous) 

2375 assertLookup(detector=3, timespan=Timespan(t2, t3), expected=bias3a) 

2376 assertLookup(detector=3, timespan=Timespan(t2, t4), expected=bias3a) 

2377 assertLookup(detector=3, timespan=Timespan(t2, t5), expected=bias3a) 

2378 assertLookup(detector=3, timespan=Timespan(t2, None), expected=Ambiguous) 

2379 assertLookup(detector=3, timespan=Timespan(t3, t4), expected=None) 

2380 assertLookup(detector=3, timespan=Timespan(t3, t5), expected=None) 

2381 assertLookup(detector=3, timespan=Timespan(t3, None), expected=bias3b) 

2382 assertLookup(detector=3, timespan=Timespan(t4, t5), expected=None) 

2383 assertLookup(detector=3, timespan=Timespan(t4, None), expected=bias3b) 

2384 assertLookup(detector=3, timespan=Timespan(t5, None), expected=bias3b) 

2385 

2386 # Decertify everything, this time with explicit data IDs, then check 

2387 # that no lookups succeed. 

2388 registry.decertify( 

2389 collection, 

2390 "bias", 

2391 Timespan(None, None), 

2392 dataIds=[ 

2393 dict(instrument="Cam1", detector=2), 

2394 dict(instrument="Cam1", detector=3), 

2395 ], 

2396 ) 

2397 for detector in (2, 3): 

2398 for timespan in allTimespans: 

2399 assertLookup(detector=detector, timespan=timespan, expected=None) 

2400 # Certify bias2a and bias3a over (-∞, ∞), check that all lookups return 

2401 # those. 

2402 registry.certify( 

2403 collection, 

2404 [bias2a, bias3a], 

2405 Timespan(None, None), 

2406 ) 

2407 for timespan in allTimespans: 

2408 assertLookup(detector=2, timespan=timespan, expected=bias2a) 

2409 assertLookup(detector=3, timespan=timespan, expected=bias3a) 

2410 # Decertify just bias2 over [t2, t4). 

2411 # This should split a single certification row into two (and leave the 

2412 # other existing row, for bias3a, alone). 

2413 registry.decertify( 

2414 collection, "bias", Timespan(t2, t4), dataIds=[dict(instrument="Cam1", detector=2)] 

2415 ) 

2416 for timespan in allTimespans: 

2417 assertLookup(detector=3, timespan=timespan, expected=bias3a) 

2418 overlapsBefore = timespan.overlaps(Timespan(None, t2)) 

2419 overlapsAfter = timespan.overlaps(Timespan(t4, None)) 

2420 if overlapsBefore and overlapsAfter: 

2421 expected = Ambiguous 

2422 elif overlapsBefore or overlapsAfter: 

2423 expected = bias2a 

2424 else: 

2425 expected = None 

2426 assertLookup(detector=2, timespan=timespan, expected=expected) 

2427 

2428 def testSkipCalibs(self): 

2429 """Test how queries handle skipping of calibration collections.""" 

2430 registry = self.makeRegistry() 

2431 self.loadData(registry, "base.yaml") 

2432 self.loadData(registry, "datasets.yaml") 

2433 

2434 coll_calib = "Cam1/calibs/default" 

2435 registry.registerCollection(coll_calib, type=CollectionType.CALIBRATION) 

2436 

2437 # Add all biases to the calibration collection. 

2438 # Without this, the logic that prunes dataset subqueries based on 

2439 # datasetType-collection summary information will fire before the logic 

2440 # we want to test below. This is a good thing (it avoids the dreaded 

2441 # NotImplementedError a bit more often) everywhere but here. 

2442 registry.certify(coll_calib, registry.queryDatasets("bias", collections=...), Timespan(None, None)) 

2443 

2444 coll_list = [coll_calib, "imported_g", "imported_r"] 

2445 chain = "Cam1/chain" 

2446 registry.registerCollection(chain, type=CollectionType.CHAINED) 

2447 registry.setCollectionChain(chain, coll_list) 

2448 

2449 # explicit list will raise if findFirst=True or there are temporal 

2450 # dimensions 

2451 with self.assertRaises(NotImplementedError): 

2452 registry.queryDatasets("bias", collections=coll_list, findFirst=True) 

2453 with self.assertRaises(NotImplementedError): 

2454 registry.queryDataIds( 

2455 ["instrument", "detector", "exposure"], datasets="bias", collections=coll_list 

2456 ).count() 

2457 

2458 # chain will skip 

2459 datasets = list(registry.queryDatasets("bias", collections=chain)) 

2460 self.assertGreater(len(datasets), 0) 

2461 

2462 dataIds = list(registry.queryDataIds(["instrument", "detector"], datasets="bias", collections=chain)) 

2463 self.assertGreater(len(dataIds), 0) 

2464 

2465 # glob will skip too 

2466 datasets = list(registry.queryDatasets("bias", collections="*d*")) 

2467 self.assertGreater(len(datasets), 0) 

2468 

2469 # regular expression will skip too 

2470 pattern = re.compile(".*") 

2471 datasets = list(registry.queryDatasets("bias", collections=pattern)) 

2472 self.assertGreater(len(datasets), 0) 

2473 

2474 # ellipsis should work as usual 

2475 datasets = list(registry.queryDatasets("bias", collections=...)) 

2476 self.assertGreater(len(datasets), 0) 

2477 

2478 # few tests with findFirst 

2479 datasets = list(registry.queryDatasets("bias", collections=chain, findFirst=True)) 

2480 self.assertGreater(len(datasets), 0) 

2481 

2482 def testIngestTimeQuery(self): 

2483 registry = self.makeRegistry() 

2484 self.loadData(registry, "base.yaml") 

2485 dt0 = datetime.datetime.now(datetime.UTC) 

2486 self.loadData(registry, "datasets.yaml") 

2487 dt1 = datetime.datetime.now(datetime.UTC) 

2488 

2489 datasets = list(registry.queryDatasets(..., collections=...)) 

2490 len0 = len(datasets) 

2491 self.assertGreater(len0, 0) 

2492 

2493 where = "ingest_date > T'2000-01-01'" 

2494 datasets = list(registry.queryDatasets(..., collections=..., where=where)) 

2495 len1 = len(datasets) 

2496 self.assertEqual(len0, len1) 

2497 

2498 # no one will ever use this piece of software in 30 years 

2499 where = "ingest_date > T'2050-01-01'" 

2500 datasets = list(registry.queryDatasets(..., collections=..., where=where)) 

2501 len2 = len(datasets) 

2502 self.assertEqual(len2, 0) 

2503 

2504 # Check more exact timing to make sure there is no 37 seconds offset 

2505 # (after fixing DM-30124). SQLite time precision is 1 second, make 

2506 # sure that we don't test with higher precision. 

2507 tests = [ 

2508 # format: (timestamp, operator, expected_len) 

2509 (dt0 - timedelta(seconds=1), ">", len0), 

2510 (dt0 - timedelta(seconds=1), "<", 0), 

2511 (dt1 + timedelta(seconds=1), "<", len0), 

2512 (dt1 + timedelta(seconds=1), ">", 0), 

2513 ] 

2514 for dt, op, expect_len in tests: 

2515 dt_str = dt.isoformat(sep=" ") 

2516 

2517 where = f"ingest_date {op} T'{dt_str}'" 

2518 datasets = list(registry.queryDatasets(..., collections=..., where=where)) 

2519 self.assertEqual(len(datasets), expect_len) 

2520 

2521 # same with bind using datetime or astropy Time 

2522 where = f"ingest_date {op} ingest_time" 

2523 datasets = list( 

2524 registry.queryDatasets(..., collections=..., where=where, bind={"ingest_time": dt}) 

2525 ) 

2526 self.assertEqual(len(datasets), expect_len) 

2527 

2528 dt_astropy = astropy.time.Time(dt, format="datetime") 

2529 datasets = list( 

2530 registry.queryDatasets(..., collections=..., where=where, bind={"ingest_time": dt_astropy}) 

2531 ) 

2532 self.assertEqual(len(datasets), expect_len) 

2533 

2534 def testTimespanQueries(self): 

2535 """Test query expressions involving timespans.""" 

2536 registry = self.makeRegistry() 

2537 self.loadData(registry, "hsc-rc2-subset.yaml") 

2538 # All exposures in the database; mapping from ID to timespan. 

2539 visits = {record.id: record.timespan for record in registry.queryDimensionRecords("visit")} 

2540 # Just those IDs, sorted (which is also temporal sorting, because HSC 

2541 # exposure IDs are monotonically increasing). 

2542 ids = sorted(visits.keys()) 

2543 self.assertGreater(len(ids), 20) 

2544 # Pick some quasi-random indexes into `ids` to play with. 

2545 i1 = int(len(ids) * 0.1) 

2546 i2 = int(len(ids) * 0.3) 

2547 i3 = int(len(ids) * 0.6) 

2548 i4 = int(len(ids) * 0.8) 

2549 # Extract some times from those: just before the beginning of i1 (which 

2550 # should be after the end of the exposure before), exactly the 

2551 # beginning of i2, just after the beginning of i3 (and before its end), 

2552 # and the exact end of i4. 

2553 t1 = visits[ids[i1]].begin - astropy.time.TimeDelta(1.0, format="sec") 

2554 self.assertGreater(t1, visits[ids[i1 - 1]].end) 

2555 t2 = visits[ids[i2]].begin 

2556 t3 = visits[ids[i3]].begin + astropy.time.TimeDelta(1.0, format="sec") 

2557 self.assertLess(t3, visits[ids[i3]].end) 

2558 t4 = visits[ids[i4]].end 

2559 # Make sure those are actually in order. 

2560 self.assertEqual([t1, t2, t3, t4], sorted([t4, t3, t2, t1])) 

2561 

2562 bind = { 

2563 "t1": t1, 

2564 "t2": t2, 

2565 "t3": t3, 

2566 "t4": t4, 

2567 "ts23": Timespan(t2, t3), 

2568 } 

2569 

2570 def query(where): 

2571 """Return results as a sorted, deduplicated list of visit IDs.""" 

2572 return sorted( 

2573 { 

2574 dataId["visit"] 

2575 for dataId in registry.queryDataIds("visit", instrument="HSC", bind=bind, where=where) 

2576 } 

2577 ) 

2578 

2579 # Try a bunch of timespan queries, mixing up the bounds themselves, 

2580 # where they appear in the expression, and how we get the timespan into 

2581 # the expression. 

2582 

2583 # t1 is before the start of i1, so this should not include i1. 

2584 self.assertEqual(ids[:i1], query("visit.timespan OVERLAPS (null, t1)")) 

2585 # t2 is exactly at the start of i2, but ends are exclusive, so these 

2586 # should not include i2. 

2587 self.assertEqual(ids[i1:i2], query("(t1, t2) OVERLAPS visit.timespan")) 

2588 self.assertEqual(ids[:i2], query("visit.timespan < (t2, t4)")) 

2589 # t3 is in the middle of i3, so this should include i3. 

2590 self.assertEqual(ids[i2 : i3 + 1], query("visit.timespan OVERLAPS ts23")) 

2591 # This one should not include t3 by the same reasoning. 

2592 self.assertEqual(ids[i3 + 1 :], query("visit.timespan > (t1, t3)")) 

2593 # t4 is exactly at the end of i4, so this should include i4. 

2594 self.assertEqual(ids[i3 : i4 + 1], query(f"visit.timespan OVERLAPS (T'{t3.tai.isot}', t4)")) 

2595 # i4's upper bound of t4 is exclusive so this should not include t4. 

2596 self.assertEqual(ids[i4 + 1 :], query("visit.timespan OVERLAPS (t4, NULL)")) 

2597 

2598 # Now some timespan vs. time scalar queries. 

2599 self.assertEqual(ids[:i2], query("visit.timespan < t2")) 

2600 self.assertEqual(ids[:i2], query("t2 > visit.timespan")) 

2601 self.assertEqual(ids[i3 + 1 :], query("visit.timespan > t3")) 

2602 self.assertEqual(ids[i3 + 1 :], query("t3 < visit.timespan")) 

2603 self.assertEqual(ids[i3 : i3 + 1], query("visit.timespan OVERLAPS t3")) 

2604 self.assertEqual(ids[i3 : i3 + 1], query(f"T'{t3.tai.isot}' OVERLAPS visit.timespan")) 

2605 

2606 # Empty timespans should not overlap anything. 

2607 self.assertEqual([], query("visit.timespan OVERLAPS (t3, t2)")) 

2608 

2609 def testCollectionSummaries(self): 

2610 """Test recording and retrieval of collection summaries.""" 

2611 self.maxDiff = None 

2612 registry = self.makeRegistry() 

2613 # Importing datasets from yaml should go through the code path where 

2614 # we update collection summaries as we insert datasets. 

2615 self.loadData(registry, "base.yaml") 

2616 self.loadData(registry, "datasets.yaml") 

2617 flat = registry.getDatasetType("flat") 

2618 expected1 = CollectionSummary() 

2619 expected1.dataset_types.add(registry.getDatasetType("bias")) 

2620 expected1.add_data_ids( 

2621 flat, [DataCoordinate.standardize(instrument="Cam1", universe=registry.dimensions)] 

2622 ) 

2623 self.assertEqual(registry.getCollectionSummary("imported_g"), expected1) 

2624 self.assertEqual(registry.getCollectionSummary("imported_r"), expected1) 

2625 # Create a chained collection with both of the imported runs; the 

2626 # summary should be the same, because it's a union with itself. 

2627 chain = "chain" 

2628 registry.registerCollection(chain, CollectionType.CHAINED) 

2629 registry.setCollectionChain(chain, ["imported_r", "imported_g"]) 

2630 self.assertEqual(registry.getCollectionSummary(chain), expected1) 

2631 # Associate flats only into a tagged collection and a calibration 

2632 # collection to check summaries of those. 

2633 tag = "tag" 

2634 registry.registerCollection(tag, CollectionType.TAGGED) 

2635 registry.associate(tag, registry.queryDatasets(flat, collections="imported_g")) 

2636 calibs = "calibs" 

2637 registry.registerCollection(calibs, CollectionType.CALIBRATION) 

2638 registry.certify( 

2639 calibs, registry.queryDatasets(flat, collections="imported_g"), timespan=Timespan(None, None) 

2640 ) 

2641 expected2 = expected1.copy() 

2642 expected2.dataset_types.discard("bias") 

2643 self.assertEqual(registry.getCollectionSummary(tag), expected2) 

2644 self.assertEqual(registry.getCollectionSummary(calibs), expected2) 

2645 # Explicitly calling SqlRegistry.refresh() should load those same 

2646 # summaries, via a totally different code path. 

2647 registry.refresh() 

2648 self.assertEqual(registry.getCollectionSummary("imported_g"), expected1) 

2649 self.assertEqual(registry.getCollectionSummary("imported_r"), expected1) 

2650 self.assertEqual(registry.getCollectionSummary(tag), expected2) 

2651 self.assertEqual(registry.getCollectionSummary(calibs), expected2) 

2652 

2653 def testBindInQueryDatasets(self): 

2654 """Test that the bind parameter is correctly forwarded in 

2655 queryDatasets recursion. 

2656 """ 

2657 registry = self.makeRegistry() 

2658 # Importing datasets from yaml should go through the code path where 

2659 # we update collection summaries as we insert datasets. 

2660 self.loadData(registry, "base.yaml") 

2661 self.loadData(registry, "datasets.yaml") 

2662 self.assertEqual( 

2663 set(registry.queryDatasets("flat", band="r", collections=...)), 

2664 set(registry.queryDatasets("flat", where="band=my_band", bind={"my_band": "r"}, collections=...)), 

2665 ) 

2666 

2667 def testQueryIntRangeExpressions(self): 

2668 """Test integer range expressions in ``where`` arguments. 

2669 

2670 Note that our expressions use inclusive stop values, unlike Python's. 

2671 """ 

2672 registry = self.makeRegistry() 

2673 self.loadData(registry, "base.yaml") 

2674 self.assertEqual( 

2675 set(registry.queryDataIds(["detector"], instrument="Cam1", where="detector IN (1..2)")), 

2676 {registry.expandDataId(instrument="Cam1", detector=n) for n in [1, 2]}, 

2677 ) 

2678 self.assertEqual( 

2679 set(registry.queryDataIds(["detector"], instrument="Cam1", where="detector IN (1..4:2)")), 

2680 {registry.expandDataId(instrument="Cam1", detector=n) for n in [1, 3]}, 

2681 ) 

2682 self.assertEqual( 

2683 set(registry.queryDataIds(["detector"], instrument="Cam1", where="detector IN (2..4:2)")), 

2684 {registry.expandDataId(instrument="Cam1", detector=n) for n in [2, 4]}, 

2685 ) 

2686 

2687 def testQueryResultSummaries(self): 

2688 """Test summary methods like `count`, `any`, and `explain_no_results` 

2689 on `DataCoordinateQueryResults` and `DatasetQueryResults`. 

2690 """ 

2691 registry = self.makeRegistry() 

2692 self.loadData(registry, "base.yaml") 

2693 self.loadData(registry, "datasets.yaml") 

2694 self.loadData(registry, "spatial.yaml") 

2695 # Default test dataset has two collections, each with both flats and 

2696 # biases. Add a new collection with only biases. 

2697 registry.registerCollection("biases", CollectionType.TAGGED) 

2698 registry.associate("biases", registry.queryDatasets("bias", collections=["imported_g"])) 

2699 # First query yields two results, and involves no postprocessing. 

2700 query1 = registry.queryDataIds(["physical_filter"], band="r") 

2701 self.assertTrue(query1.any(execute=False, exact=False)) 

2702 self.assertTrue(query1.any(execute=True, exact=False)) 

2703 self.assertTrue(query1.any(execute=True, exact=True)) 

2704 self.assertEqual(query1.count(exact=False), 2) 

2705 self.assertEqual(query1.count(exact=True), 2) 

2706 self.assertFalse(list(query1.explain_no_results())) 

2707 # Second query should yield no results, which we should see when 

2708 # we attempt to expand the data ID. 

2709 query2 = registry.queryDataIds(["physical_filter"], band="h") 

2710 # There's no execute=False, exact=Fals test here because the behavior 

2711 # not something we want to guarantee in this case (and exact=False 

2712 # says either answer is legal). 

2713 self.assertFalse(query2.any(execute=True, exact=False)) 

2714 self.assertFalse(query2.any(execute=True, exact=True)) 

2715 self.assertEqual(query2.count(exact=False), 0) 

2716 self.assertEqual(query2.count(exact=True), 0) 

2717 self.assertTrue(list(query2.explain_no_results())) 

2718 # These queries yield no results due to various problems that can be 

2719 # spotted prior to execution, yielding helpful diagnostics. 

2720 base_query = registry.queryDataIds(["detector", "physical_filter"]) 

2721 queries_and_snippets = [ 

2722 ( 

2723 # Dataset type name doesn't match any existing dataset types. 

2724 registry.queryDatasets("nonexistent", collections=...), 

2725 ["nonexistent"], 

2726 ), 

2727 ( 

2728 # Dataset type object isn't registered. 

2729 registry.queryDatasets( 

2730 DatasetType( 

2731 "nonexistent", 

2732 dimensions=["instrument"], 

2733 universe=registry.dimensions, 

2734 storageClass="Image", 

2735 ), 

2736 collections=..., 

2737 ), 

2738 ["nonexistent"], 

2739 ), 

2740 ( 

2741 # No datasets of this type in this collection. 

2742 registry.queryDatasets("flat", collections=["biases"]), 

2743 ["flat", "biases"], 

2744 ), 

2745 ( 

2746 # No datasets of this type in this collection. 

2747 base_query.findDatasets("flat", collections=["biases"]), 

2748 ["flat", "biases"], 

2749 ), 

2750 ( 

2751 # No collections matching at all. 

2752 registry.queryDatasets("flat", collections=re.compile("potato.+")), 

2753 ["potato"], 

2754 ), 

2755 ] 

2756 # The behavior of these additional queries is slated to change in the 

2757 # future, so we also check for deprecation warnings. 

2758 with self.assertWarns(FutureWarning): 

2759 queries_and_snippets.append( 

2760 ( 

2761 # Dataset type name doesn't match any existing dataset 

2762 # types. 

2763 registry.queryDataIds(["detector"], datasets=["nonexistent"], collections=...), 

2764 ["nonexistent"], 

2765 ) 

2766 ) 

2767 with self.assertWarns(FutureWarning): 

2768 queries_and_snippets.append( 

2769 ( 

2770 # Dataset type name doesn't match any existing dataset 

2771 # types. 

2772 registry.queryDimensionRecords("detector", datasets=["nonexistent"], collections=...), 

2773 ["nonexistent"], 

2774 ) 

2775 ) 

2776 for query, snippets in queries_and_snippets: 

2777 self.assertFalse(query.any(execute=False, exact=False)) 

2778 self.assertFalse(query.any(execute=True, exact=False)) 

2779 self.assertFalse(query.any(execute=True, exact=True)) 

2780 self.assertEqual(query.count(exact=False), 0) 

2781 self.assertEqual(query.count(exact=True), 0) 

2782 messages = list(query.explain_no_results()) 

2783 self.assertTrue(messages) 

2784 # Want all expected snippets to appear in at least one message. 

2785 self.assertTrue( 

2786 any( 

2787 all(snippet in message for snippet in snippets) for message in query.explain_no_results() 

2788 ), 

2789 messages, 

2790 ) 

2791 

2792 # This query does yield results, but should also emit a warning because 

2793 # dataset type patterns to queryDataIds is deprecated; just look for 

2794 # the warning. 

2795 with self.assertWarns(FutureWarning): 

2796 registry.queryDataIds(["detector"], datasets=re.compile("^nonexistent$"), collections=...) 

2797 

2798 # These queries yield no results due to problems that can be identified 

2799 # by cheap follow-up queries, yielding helpful diagnostics. 

2800 for query, snippets in [ 

2801 ( 

2802 # No records for one of the involved dimensions. 

2803 registry.queryDataIds(["subfilter"]), 

2804 ["no rows", "subfilter"], 

2805 ), 

2806 ( 

2807 # No records for one of the involved dimensions. 

2808 registry.queryDimensionRecords("subfilter"), 

2809 ["no rows", "subfilter"], 

2810 ), 

2811 ]: 

2812 self.assertFalse(query.any(execute=True, exact=False)) 

2813 self.assertFalse(query.any(execute=True, exact=True)) 

2814 self.assertEqual(query.count(exact=True), 0) 

2815 messages = list(query.explain_no_results()) 

2816 self.assertTrue(messages) 

2817 # Want all expected snippets to appear in at least one message. 

2818 self.assertTrue( 

2819 any( 

2820 all(snippet in message for snippet in snippets) for message in query.explain_no_results() 

2821 ), 

2822 messages, 

2823 ) 

2824 

2825 # This query yields four overlaps in the database, but one is filtered 

2826 # out in postprocessing. The count queries aren't accurate because 

2827 # they don't account for duplication that happens due to an internal 

2828 # join against commonSkyPix. 

2829 query3 = registry.queryDataIds(["visit", "tract"], instrument="Cam1", skymap="SkyMap1") 

2830 self.assertEqual( 

2831 { 

2832 DataCoordinate.standardize( 

2833 instrument="Cam1", 

2834 skymap="SkyMap1", 

2835 visit=v, 

2836 tract=t, 

2837 universe=registry.dimensions, 

2838 ) 

2839 for v, t in [(1, 0), (2, 0), (2, 1)] 

2840 }, 

2841 set(query3), 

2842 ) 

2843 self.assertTrue(query3.any(execute=False, exact=False)) 

2844 self.assertTrue(query3.any(execute=True, exact=False)) 

2845 self.assertTrue(query3.any(execute=True, exact=True)) 

2846 self.assertGreaterEqual(query3.count(exact=False), 4) 

2847 self.assertGreaterEqual(query3.count(exact=True, discard=True), 3) 

2848 self.assertFalse(list(query3.explain_no_results())) 

2849 # This query yields overlaps in the database, but all are filtered 

2850 # out in postprocessing. The count queries again aren't very useful. 

2851 # We have to use `where=` here to avoid an optimization that 

2852 # (currently) skips the spatial postprocess-filtering because it 

2853 # recognizes that no spatial join is necessary. That's not ideal, but 

2854 # fixing it is out of scope for this ticket. 

2855 query4 = registry.queryDataIds( 

2856 ["visit", "tract"], 

2857 instrument="Cam1", 

2858 skymap="SkyMap1", 

2859 where="visit=1 AND detector=1 AND tract=0 AND patch=4", 

2860 ) 

2861 self.assertFalse(set(query4)) 

2862 self.assertTrue(query4.any(execute=False, exact=False)) 

2863 self.assertTrue(query4.any(execute=True, exact=False)) 

2864 self.assertFalse(query4.any(execute=True, exact=True)) 

2865 self.assertGreaterEqual(query4.count(exact=False), 1) 

2866 self.assertEqual(query4.count(exact=True, discard=True), 0) 

2867 messages = query4.explain_no_results() 

2868 self.assertTrue(messages) 

2869 self.assertTrue(any("overlap" in message for message in messages)) 

2870 # This query should yield results from one dataset type but not the 

2871 # other, which is not registered. 

2872 query5 = registry.queryDatasets(["bias", "nonexistent"], collections=["biases"]) 

2873 self.assertTrue(set(query5)) 

2874 self.assertTrue(query5.any(execute=False, exact=False)) 

2875 self.assertTrue(query5.any(execute=True, exact=False)) 

2876 self.assertTrue(query5.any(execute=True, exact=True)) 

2877 self.assertGreaterEqual(query5.count(exact=False), 1) 

2878 self.assertGreaterEqual(query5.count(exact=True), 1) 

2879 self.assertFalse(list(query5.explain_no_results())) 

2880 # This query applies a selection that yields no results, fully in the 

2881 # database. Explaining why it fails involves traversing the relation 

2882 # tree and running a LIMIT 1 query at each level that has the potential 

2883 # to remove rows. 

2884 query6 = registry.queryDimensionRecords( 

2885 "detector", where="detector.purpose = 'no-purpose'", instrument="Cam1" 

2886 ) 

2887 self.assertEqual(query6.count(exact=True), 0) 

2888 messages = query6.explain_no_results() 

2889 self.assertTrue(messages) 

2890 self.assertTrue(any("no-purpose" in message for message in messages)) 

2891 

2892 def testQueryDataIdsExpressionError(self): 

2893 """Test error checking of 'where' expressions in queryDataIds.""" 

2894 registry = self.makeRegistry() 

2895 self.loadData(registry, "base.yaml") 

2896 bind = {"time": astropy.time.Time("2020-01-01T01:00:00", format="isot", scale="tai")} 

2897 with self.assertRaisesRegex(LookupError, r"No dimension element with name 'foo' in 'foo\.bar'\."): 

2898 registry.queryDataIds(["detector"], where="foo.bar = 12") 

2899 with self.assertRaisesRegex( 

2900 LookupError, "Dimension element name cannot be inferred in this context." 

2901 ): 

2902 registry.queryDataIds(["detector"], where="timespan.end < time", bind=bind) 

2903 

2904 def testQueryDataIdsOrderBy(self): 

2905 """Test order_by and limit on result returned by queryDataIds().""" 

2906 registry = self.makeRegistry() 

2907 self.loadData(registry, "base.yaml") 

2908 self.loadData(registry, "datasets.yaml") 

2909 self.loadData(registry, "spatial.yaml") 

2910 

2911 def do_query(dimensions=("visit", "tract"), datasets=None, collections=None): 

2912 return registry.queryDataIds( 

2913 dimensions, datasets=datasets, collections=collections, instrument="Cam1", skymap="SkyMap1" 

2914 ) 

2915 

2916 Test = namedtuple( 

2917 "testQueryDataIdsOrderByTest", 

2918 ("order_by", "keys", "result", "limit", "datasets", "collections"), 

2919 defaults=(None, None, None), 

2920 ) 

2921 

2922 test_data = ( 

2923 Test("tract,visit", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))), 

2924 Test("-tract,visit", "tract,visit", ((1, 2), (1, 2), (0, 1), (0, 1), (0, 2), (0, 2))), 

2925 Test("tract,-visit", "tract,visit", ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2))), 

2926 Test("-tract,-visit", "tract,visit", ((1, 2), (1, 2), (0, 2), (0, 2), (0, 1), (0, 1))), 

2927 Test( 

2928 "tract.id,visit.id", 

2929 "tract,visit", 

2930 ((0, 1), (0, 1), (0, 2)), 

2931 limit=(3,), 

2932 ), 

2933 Test("-tract,-visit", "tract,visit", ((1, 2), (1, 2), (0, 2)), limit=(3,)), 

2934 Test("tract,visit", "tract,visit", ((0, 2), (1, 2), (1, 2)), limit=(3, 3)), 

2935 Test("-tract,-visit", "tract,visit", ((0, 1),), limit=(3, 5)), 

2936 Test( 

2937 "tract,visit.exposure_time", "tract,visit", ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2)) 

2938 ), 

2939 Test( 

2940 "-tract,-visit.exposure_time", "tract,visit", ((1, 2), (1, 2), (0, 1), (0, 1), (0, 2), (0, 2)) 

2941 ), 

2942 Test("tract,-exposure_time", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))), 

2943 Test("tract,visit.name", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))), 

2944 Test( 

2945 "tract,-timespan.begin,timespan.end", 

2946 "tract,visit", 

2947 ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2)), 

2948 ), 

2949 Test("visit.day_obs,exposure.day_obs", "visit,exposure", ()), 

2950 Test("visit.timespan.begin,-exposure.timespan.begin", "visit,exposure", ()), 

2951 Test( 

2952 "tract,detector", 

2953 "tract,detector", 

2954 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)), 

2955 datasets="flat", 

2956 collections="imported_r", 

2957 ), 

2958 Test( 

2959 "tract,detector.full_name", 

2960 "tract,detector", 

2961 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)), 

2962 datasets="flat", 

2963 collections="imported_r", 

2964 ), 

2965 Test( 

2966 "tract,detector.raft,detector.name_in_raft", 

2967 "tract,detector", 

2968 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)), 

2969 datasets="flat", 

2970 collections="imported_r", 

2971 ), 

2972 ) 

2973 

2974 for test in test_data: 

2975 order_by = test.order_by.split(",") 

2976 keys = test.keys.split(",") 

2977 query = do_query(keys, test.datasets, test.collections).order_by(*order_by) 

2978 if test.limit is not None: 

2979 query = query.limit(*test.limit) 

2980 dataIds = tuple(tuple(dataId[k] for k in keys) for dataId in query) 

2981 self.assertEqual(dataIds, test.result) 

2982 

2983 # and materialize 

2984 query = do_query(keys).order_by(*order_by) 

2985 if test.limit is not None: 

2986 query = query.limit(*test.limit) 

2987 with self.assertRaises(RelationalAlgebraError): 

2988 with query.materialize(): 

2989 pass 

2990 

2991 # errors in a name 

2992 for order_by in ("", "-"): 

2993 with self.assertRaisesRegex(ValueError, "Empty dimension name in ORDER BY"): 

2994 list(do_query().order_by(order_by)) 

2995 

2996 for order_by in ("undimension.name", "-undimension.name"): 

2997 with self.assertRaisesRegex(ValueError, "Unknown dimension element 'undimension'"): 

2998 list(do_query().order_by(order_by)) 

2999 

3000 for order_by in ("attract", "-attract"): 

3001 with self.assertRaisesRegex(ValueError, "Metadata 'attract' cannot be found in any dimension"): 

3002 list(do_query().order_by(order_by)) 

3003 

3004 with self.assertRaisesRegex(ValueError, "Metadata 'exposure_time' exists in more than one dimension"): 

3005 list(do_query(("exposure", "visit")).order_by("exposure_time")) 

3006 

3007 with self.assertRaisesRegex( 

3008 ValueError, 

3009 r"Timespan exists in more than one dimension element \(exposure, visit\); " 

3010 r"qualify timespan with specific dimension name\.", 

3011 ): 

3012 list(do_query(("exposure", "visit")).order_by("timespan.begin")) 

3013 

3014 with self.assertRaisesRegex( 

3015 ValueError, "Cannot find any temporal dimension element for 'timespan.begin'" 

3016 ): 

3017 list(do_query("tract").order_by("timespan.begin")) 

3018 

3019 with self.assertRaisesRegex(ValueError, "Cannot use 'timespan.begin' with non-temporal element"): 

3020 list(do_query("tract").order_by("tract.timespan.begin")) 

3021 

3022 with self.assertRaisesRegex(ValueError, "Field 'name' does not exist in 'tract'."): 

3023 list(do_query("tract").order_by("tract.name")) 

3024 

3025 with self.assertRaisesRegex( 

3026 ValueError, r"Unknown dimension element 'timestamp'; perhaps you meant 'timespan.begin'\?" 

3027 ): 

3028 list(do_query("visit").order_by("timestamp.begin")) 

3029 

3030 def testQueryDataIdsGovernorExceptions(self): 

3031 """Test exceptions raised by queryDataIds() for incorrect governors.""" 

3032 registry = self.makeRegistry() 

3033 self.loadData(registry, "base.yaml") 

3034 self.loadData(registry, "datasets.yaml") 

3035 self.loadData(registry, "spatial.yaml") 

3036 

3037 def do_query(dimensions, dataId=None, where="", bind=None, **kwargs): 

3038 return registry.queryDataIds(dimensions, dataId=dataId, where=where, bind=bind, **kwargs) 

3039 

3040 Test = namedtuple( 

3041 "testQueryDataIdExceptionsTest", 

3042 ("dimensions", "dataId", "where", "bind", "kwargs", "exception", "count"), 

3043 defaults=(None, None, None, {}, None, 0), 

3044 ) 

3045 

3046 test_data = ( 

3047 Test("tract,visit", count=6), 

3048 Test("tract,visit", kwargs={"instrument": "Cam1", "skymap": "SkyMap1"}, count=6), 

3049 Test( 

3050 "tract,visit", kwargs={"instrument": "Cam2", "skymap": "SkyMap1"}, exception=DataIdValueError 

3051 ), 

3052 Test("tract,visit", dataId={"instrument": "Cam1", "skymap": "SkyMap1"}, count=6), 

3053 Test( 

3054 "tract,visit", dataId={"instrument": "Cam1", "skymap": "SkyMap2"}, exception=DataIdValueError 

3055 ), 

3056 Test("tract,visit", where="instrument='Cam1' AND skymap='SkyMap1'", count=6), 

3057 Test("tract,visit", where="instrument='Cam1' AND skymap='SkyMap5'", exception=DataIdValueError), 

3058 Test( 

3059 "tract,visit", 

3060 where="instrument=cam AND skymap=map", 

3061 bind={"cam": "Cam1", "map": "SkyMap1"}, 

3062 count=6, 

3063 ), 

3064 Test( 

3065 "tract,visit", 

3066 where="instrument=cam AND skymap=map", 

3067 bind={"cam": "Cam", "map": "SkyMap"}, 

3068 exception=DataIdValueError, 

3069 ), 

3070 ) 

3071 

3072 for test in test_data: 

3073 dimensions = test.dimensions.split(",") 

3074 if test.exception: 

3075 with self.assertRaises(test.exception): 

3076 do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs).count() 

3077 else: 

3078 query = do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs) 

3079 self.assertEqual(query.count(discard=True), test.count) 

3080 

3081 # and materialize 

3082 if test.exception: 

3083 with self.assertRaises(test.exception): 

3084 query = do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs) 

3085 with query.materialize() as materialized: 

3086 materialized.count(discard=True) 

3087 else: 

3088 query = do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs) 

3089 with query.materialize() as materialized: 

3090 self.assertEqual(materialized.count(discard=True), test.count) 

3091 

3092 def testQueryDimensionRecordsOrderBy(self): 

3093 """Test order_by and limit on result returned by 

3094 queryDimensionRecords(). 

3095 """ 

3096 registry = self.makeRegistry() 

3097 self.loadData(registry, "base.yaml") 

3098 self.loadData(registry, "datasets.yaml") 

3099 self.loadData(registry, "spatial.yaml") 

3100 

3101 def do_query(element, datasets=None, collections=None): 

3102 return registry.queryDimensionRecords( 

3103 element, instrument="Cam1", datasets=datasets, collections=collections 

3104 ) 

3105 

3106 query = do_query("detector") 

3107 self.assertEqual(len(list(query)), 4) 

3108 

3109 Test = namedtuple( 

3110 "testQueryDataIdsOrderByTest", 

3111 ("element", "order_by", "result", "limit", "datasets", "collections"), 

3112 defaults=(None, None, None), 

3113 ) 

3114 

3115 test_data = ( 

3116 Test("detector", "detector", (1, 2, 3, 4)), 

3117 Test("detector", "-detector", (4, 3, 2, 1)), 

3118 Test("detector", "raft,-name_in_raft", (2, 1, 4, 3)), 

3119 Test("detector", "-detector.purpose", (4,), limit=(1,)), 

3120 Test("detector", "-purpose,detector.raft,name_in_raft", (2, 3), limit=(2, 2)), 

3121 Test("visit", "visit", (1, 2)), 

3122 Test("visit", "-visit.id", (2, 1)), 

3123 Test("visit", "zenith_angle", (1, 2)), 

3124 Test("visit", "-visit.name", (2, 1)), 

3125 Test("visit", "day_obs,-timespan.begin", (2, 1)), 

3126 ) 

3127 

3128 for test in test_data: 

3129 order_by = test.order_by.split(",") 

3130 query = do_query(test.element).order_by(*order_by) 

3131 if test.limit is not None: 

3132 query = query.limit(*test.limit) 

3133 dataIds = tuple(rec.id for rec in query) 

3134 self.assertEqual(dataIds, test.result) 

3135 

3136 # errors in a name 

3137 for order_by in ("", "-"): 

3138 with self.assertRaisesRegex(ValueError, "Empty dimension name in ORDER BY"): 

3139 list(do_query("detector").order_by(order_by)) 

3140 

3141 for order_by in ("undimension.name", "-undimension.name"): 

3142 with self.assertRaisesRegex(ValueError, "Element name mismatch: 'undimension'"): 

3143 list(do_query("detector").order_by(order_by)) 

3144 

3145 for order_by in ("attract", "-attract"): 

3146 with self.assertRaisesRegex(ValueError, "Field 'attract' does not exist in 'detector'."): 

3147 list(do_query("detector").order_by(order_by)) 

3148 

3149 for order_by in ("timestamp.begin", "-timestamp.begin"): 

3150 with self.assertRaisesRegex( 

3151 ValueError, 

3152 r"Element name mismatch: 'timestamp' instead of 'visit'; " 

3153 r"perhaps you meant 'timespan.begin'\?", 

3154 ): 

3155 list(do_query("visit").order_by(order_by)) 

3156 

3157 def testQueryDimensionRecordsExceptions(self): 

3158 """Test exceptions raised by queryDimensionRecords().""" 

3159 registry = self.makeRegistry() 

3160 self.loadData(registry, "base.yaml") 

3161 self.loadData(registry, "datasets.yaml") 

3162 self.loadData(registry, "spatial.yaml") 

3163 

3164 result = registry.queryDimensionRecords("detector") 

3165 self.assertEqual(result.count(), 4) 

3166 result = registry.queryDimensionRecords("detector", instrument="Cam1") 

3167 self.assertEqual(result.count(), 4) 

3168 result = registry.queryDimensionRecords("detector", dataId={"instrument": "Cam1"}) 

3169 self.assertEqual(result.count(), 4) 

3170 result = registry.queryDimensionRecords("detector", where="instrument='Cam1'") 

3171 self.assertEqual(result.count(), 4) 

3172 result = registry.queryDimensionRecords("detector", where="instrument=instr", bind={"instr": "Cam1"}) 

3173 self.assertEqual(result.count(), 4) 

3174 

3175 with self.assertRaisesRegex(DataIdValueError, "dimension instrument"): 

3176 result = registry.queryDimensionRecords("detector", instrument="NotCam1") 

3177 result.count() 

3178 

3179 with self.assertRaisesRegex(DataIdValueError, "dimension instrument"): 

3180 result = registry.queryDimensionRecords("detector", dataId={"instrument": "NotCam1"}) 

3181 result.count() 

3182 

3183 with self.assertRaisesRegex(DataIdValueError, "Unknown values specified for governor dimension"): 

3184 result = registry.queryDimensionRecords("detector", where="instrument='NotCam1'") 

3185 result.count() 

3186 

3187 with self.assertRaisesRegex(DataIdValueError, "Unknown values specified for governor dimension"): 

3188 result = registry.queryDimensionRecords( 

3189 "detector", where="instrument=instr", bind={"instr": "NotCam1"} 

3190 ) 

3191 result.count() 

3192 

3193 def testDatasetConstrainedDimensionRecordQueries(self): 

3194 """Test that queryDimensionRecords works even when given a dataset 

3195 constraint whose dimensions extend beyond the requested dimension 

3196 element's. 

3197 """ 

3198 registry = self.makeRegistry() 

3199 self.loadData(registry, "base.yaml") 

3200 self.loadData(registry, "datasets.yaml") 

3201 # Query for physical_filter dimension records, using a dataset that 

3202 # has both physical_filter and dataset dimensions. 

3203 records = registry.queryDimensionRecords( 

3204 "physical_filter", 

3205 datasets=["flat"], 

3206 collections="imported_r", 

3207 ) 

3208 self.assertEqual({record.name for record in records}, {"Cam1-R1", "Cam1-R2"}) 

3209 # Trying to constrain by all dataset types is an error. 

3210 with self.assertRaises(TypeError): 

3211 list(registry.queryDimensionRecords("physical_filter", datasets=..., collections="imported_r")) 

3212 

3213 def testSkyPixDatasetQueries(self): 

3214 """Test that we can build queries involving skypix dimensions as long 

3215 as a dataset type that uses those dimensions is included. 

3216 """ 

3217 registry = self.makeRegistry() 

3218 self.loadData(registry, "base.yaml") 

3219 dataset_type = DatasetType( 

3220 "a", dimensions=["htm7", "instrument"], universe=registry.dimensions, storageClass="int" 

3221 ) 

3222 registry.registerDatasetType(dataset_type) 

3223 run = "r" 

3224 registry.registerRun(run) 

3225 # First try queries where there are no datasets; the concern is whether 

3226 # we can even build and execute these queries without raising, even 

3227 # when "doomed" query shortcuts are in play. 

3228 self.assertFalse( 

3229 list(registry.queryDataIds(["htm7", "instrument"], datasets=dataset_type, collections=run)) 

3230 ) 

3231 self.assertFalse(list(registry.queryDatasets(dataset_type, collections=run))) 

3232 # Now add a dataset and see that we can get it back. 

3233 htm7 = registry.dimensions.skypix["htm"][7].pixelization 

3234 data_id = registry.expandDataId(instrument="Cam1", htm7=htm7.universe()[0][0]) 

3235 (ref,) = registry.insertDatasets(dataset_type, [data_id], run=run) 

3236 self.assertEqual( 

3237 set(registry.queryDataIds(["htm7", "instrument"], datasets=dataset_type, collections=run)), 

3238 {data_id}, 

3239 ) 

3240 self.assertEqual(set(registry.queryDatasets(dataset_type, collections=run)), {ref}) 

3241 

3242 def testDatasetIdFactory(self): 

3243 """Simple test for DatasetIdFactory, mostly to catch potential changes 

3244 in its API. 

3245 """ 

3246 registry = self.makeRegistry() 

3247 factory = DatasetIdFactory() 

3248 dataset_type = DatasetType( 

3249 "datasetType", 

3250 dimensions=["detector", "instrument"], 

3251 universe=registry.dimensions, 

3252 storageClass="int", 

3253 ) 

3254 run = "run" 

3255 data_id = DataCoordinate.standardize( 

3256 instrument="Cam1", detector=1, dimensions=dataset_type.dimensions 

3257 ) 

3258 

3259 datasetId = factory.makeDatasetId(run, dataset_type, data_id, DatasetIdGenEnum.UNIQUE) 

3260 self.assertIsInstance(datasetId, uuid.UUID) 

3261 self.assertEqual(datasetId.version, 4) 

3262 

3263 datasetId = factory.makeDatasetId(run, dataset_type, data_id, DatasetIdGenEnum.DATAID_TYPE) 

3264 self.assertIsInstance(datasetId, uuid.UUID) 

3265 self.assertEqual(datasetId.version, 5) 

3266 

3267 datasetId = factory.makeDatasetId(run, dataset_type, data_id, DatasetIdGenEnum.DATAID_TYPE_RUN) 

3268 self.assertIsInstance(datasetId, uuid.UUID) 

3269 self.assertEqual(datasetId.version, 5) 

3270 

3271 def testExposureQueries(self): 

3272 """Test query methods using arguments sourced from the exposure log 

3273 service. 

3274 

3275 The most complete test dataset currently available to daf_butler tests 

3276 is hsc-rc2-subset.yaml export (which is unfortunately distinct from the 

3277 the lsst/rc2_subset GitHub repo), but that does not have 'exposure' 

3278 dimension records as it was focused on providing nontrivial spatial 

3279 overlaps between visit+detector and tract+patch. So in this test we 

3280 need to translate queries that originally used the exposure dimension 

3281 to use the (very similar) visit dimension instead. 

3282 """ 

3283 registry = self.makeRegistry() 

3284 self.loadData(registry, "hsc-rc2-subset.yaml") 

3285 self.assertEqual( 

3286 [ 

3287 record.id 

3288 for record in registry.queryDimensionRecords("visit", instrument="HSC") 

3289 .order_by("id") 

3290 .limit(5) 

3291 ], 

3292 [318, 322, 326, 330, 332], 

3293 ) 

3294 self.assertEqual( 

3295 [ 

3296 data_id["visit"] 

3297 for data_id in registry.queryDataIds(["visit"], instrument="HSC").order_by("id").limit(5) 

3298 ], 

3299 [318, 322, 326, 330, 332], 

3300 ) 

3301 self.assertEqual( 

3302 [ 

3303 record.id 

3304 for record in registry.queryDimensionRecords("detector", instrument="HSC") 

3305 .order_by("full_name") 

3306 .limit(5) 

3307 ], 

3308 [73, 72, 71, 70, 65], 

3309 ) 

3310 self.assertEqual( 

3311 [ 

3312 data_id["detector"] 

3313 for data_id in registry.queryDataIds(["detector"], instrument="HSC") 

3314 .order_by("full_name") 

3315 .limit(5) 

3316 ], 

3317 [73, 72, 71, 70, 65], 

3318 ) 

3319 

3320 def test_long_query_names(self) -> None: 

3321 """Test that queries involving very long names are handled correctly. 

3322 

3323 This is especially important for PostgreSQL, which truncates symbols 

3324 longer than 64 chars, but it's worth testing for all DBs. 

3325 """ 

3326 registry = self.makeRegistry() 

3327 name = "abcd" * 17 

3328 registry.registerDatasetType( 

3329 DatasetType( 

3330 name, 

3331 dimensions=(), 

3332 storageClass="Exposure", 

3333 universe=registry.dimensions, 

3334 ) 

3335 ) 

3336 # Need to search more than one collection actually containing a 

3337 # matching dataset to avoid optimizations that sidestep bugs due to 

3338 # truncation by making findFirst=True a no-op. 

3339 run1 = "run1" 

3340 registry.registerRun(run1) 

3341 run2 = "run2" 

3342 registry.registerRun(run2) 

3343 (ref1,) = registry.insertDatasets(name, [DataCoordinate.make_empty(registry.dimensions)], run1) 

3344 registry.insertDatasets(name, [DataCoordinate.make_empty(registry.dimensions)], run2) 

3345 self.assertEqual( 

3346 set(registry.queryDatasets(name, collections=[run1, run2], findFirst=True)), 

3347 {ref1}, 

3348 ) 

3349 

3350 def test_skypix_constraint_queries(self) -> None: 

3351 """Test queries spatially constrained by a skypix data ID.""" 

3352 registry = self.makeRegistry() 

3353 self.loadData(registry, "hsc-rc2-subset.yaml") 

3354 patch_regions = { 

3355 (data_id["tract"], data_id["patch"]): data_id.region 

3356 for data_id in registry.queryDataIds(["patch"]).expanded() 

3357 } 

3358 skypix_dimension: SkyPixDimension = registry.dimensions["htm11"] 

3359 # This check ensures the test doesn't become trivial due to a config 

3360 # change; if it does, just pick a different HTML level. 

3361 self.assertNotEqual(skypix_dimension, registry.dimensions.commonSkyPix) 

3362 # Gather all skypix IDs that definitely overlap at least one of these 

3363 # patches. 

3364 relevant_skypix_ids = lsst.sphgeom.RangeSet() 

3365 for patch_region in patch_regions.values(): 

3366 relevant_skypix_ids |= skypix_dimension.pixelization.interior(patch_region) 

3367 # Look for a "nontrivial" skypix_id that overlaps at least one patch 

3368 # and does not overlap at least one other patch. 

3369 for skypix_id in itertools.chain.from_iterable( 

3370 range(begin, end) for begin, end in relevant_skypix_ids 

3371 ): 

3372 skypix_region = skypix_dimension.pixelization.pixel(skypix_id) 

3373 overlapping_patches = { 

3374 patch_key 

3375 for patch_key, patch_region in patch_regions.items() 

3376 if not patch_region.isDisjointFrom(skypix_region) 

3377 } 

3378 if overlapping_patches and overlapping_patches != patch_regions.keys(): 

3379 break 

3380 else: 

3381 raise RuntimeError("Could not find usable skypix ID for this dimension configuration.") 

3382 self.assertEqual( 

3383 { 

3384 (data_id["tract"], data_id["patch"]) 

3385 for data_id in registry.queryDataIds( 

3386 ["patch"], 

3387 dataId={skypix_dimension.name: skypix_id}, 

3388 ) 

3389 }, 

3390 overlapping_patches, 

3391 ) 

3392 # Test that a three-way join that includes the common skypix system in 

3393 # the dimensions doesn't generate redundant join terms in the query. 

3394 full_data_ids = set( 

3395 registry.queryDataIds( 

3396 ["tract", "visit", "htm7"], skymap="hsc_rings_v1", instrument="HSC" 

3397 ).expanded() 

3398 ) 

3399 self.assertGreater(len(full_data_ids), 0) 

3400 for data_id in full_data_ids: 

3401 self.assertFalse(data_id.records["tract"].region.isDisjointFrom(data_id.records["htm7"].region)) 

3402 self.assertFalse(data_id.records["visit"].region.isDisjointFrom(data_id.records["htm7"].region)) 

3403 

3404 def test_spatial_constraint_queries(self) -> None: 

3405 """Test queries in which one spatial dimension in the constraint (data 

3406 ID or ``where`` string) constrains a different spatial dimension in the 

3407 query result columns. 

3408 """ 

3409 registry = self.makeRegistry() 

3410 self.loadData(registry, "hsc-rc2-subset.yaml") 

3411 patch_regions = { 

3412 (data_id["tract"], data_id["patch"]): data_id.region 

3413 for data_id in registry.queryDataIds(["patch"]).expanded() 

3414 } 

3415 observation_regions = { 

3416 (data_id["visit"], data_id["detector"]): data_id.region 

3417 for data_id in registry.queryDataIds(["visit", "detector"]).expanded() 

3418 } 

3419 all_combos = { 

3420 (patch_key, observation_key) 

3421 for patch_key, observation_key in itertools.product(patch_regions, observation_regions) 

3422 } 

3423 overlapping_combos = { 

3424 (patch_key, observation_key) 

3425 for patch_key, observation_key in all_combos 

3426 if not patch_regions[patch_key].isDisjointFrom(observation_regions[observation_key]) 

3427 } 

3428 # Check a direct spatial join with no constraint first. 

3429 self.assertEqual( 

3430 { 

3431 ((data_id["tract"], data_id["patch"]), (data_id["visit"], data_id["detector"])) 

3432 for data_id in registry.queryDataIds(["patch", "visit", "detector"]) 

3433 }, 

3434 overlapping_combos, 

3435 ) 

3436 overlaps_by_patch: defaultdict[tuple[int, int], set[tuple[str, str]]] = defaultdict(set) 

3437 overlaps_by_observation: defaultdict[tuple[int, int], set[tuple[str, str]]] = defaultdict(set) 

3438 for patch_key, observation_key in overlapping_combos: 

3439 overlaps_by_patch[patch_key].add(observation_key) 

3440 overlaps_by_observation[observation_key].add(patch_key) 

3441 # Find patches and observations that overlap at least one of the other 

3442 # but not all of the other. 

3443 nontrivial_patch = next( 

3444 iter( 

3445 patch_key 

3446 for patch_key, observation_keys in overlaps_by_patch.items() 

3447 if observation_keys and observation_keys != observation_regions.keys() 

3448 ) 

3449 ) 

3450 nontrivial_observation = next( 

3451 iter( 

3452 observation_key 

3453 for observation_key, patch_keys in overlaps_by_observation.items() 

3454 if patch_keys and patch_keys != patch_regions.keys() 

3455 ) 

3456 ) 

3457 # Use the nontrivial patches and observations as constraints on the 

3458 # other dimensions in various ways, first via a 'where' expression. 

3459 # It's better in general to us 'bind' instead of f-strings, but these 

3460 # all integers so there are no quoting concerns. 

3461 self.assertEqual( 

3462 { 

3463 (data_id["visit"], data_id["detector"]) 

3464 for data_id in registry.queryDataIds( 

3465 ["visit", "detector"], 

3466 where=f"tract={nontrivial_patch[0]} AND patch={nontrivial_patch[1]}", 

3467 skymap="hsc_rings_v1", 

3468 ) 

3469 }, 

3470 overlaps_by_patch[nontrivial_patch], 

3471 ) 

3472 self.assertEqual( 

3473 { 

3474 (data_id["tract"], data_id["patch"]) 

3475 for data_id in registry.queryDataIds( 

3476 ["patch"], 

3477 where=f"visit={nontrivial_observation[0]} AND detector={nontrivial_observation[1]}", 

3478 instrument="HSC", 

3479 ) 

3480 }, 

3481 overlaps_by_observation[nontrivial_observation], 

3482 ) 

3483 # and then via the dataId argument. 

3484 self.assertEqual( 

3485 { 

3486 (data_id["visit"], data_id["detector"]) 

3487 for data_id in registry.queryDataIds( 

3488 ["visit", "detector"], 

3489 dataId={ 

3490 "tract": nontrivial_patch[0], 

3491 "patch": nontrivial_patch[1], 

3492 }, 

3493 skymap="hsc_rings_v1", 

3494 ) 

3495 }, 

3496 overlaps_by_patch[nontrivial_patch], 

3497 ) 

3498 self.assertEqual( 

3499 { 

3500 (data_id["tract"], data_id["patch"]) 

3501 for data_id in registry.queryDataIds( 

3502 ["patch"], 

3503 dataId={ 

3504 "visit": nontrivial_observation[0], 

3505 "detector": nontrivial_observation[1], 

3506 }, 

3507 instrument="HSC", 

3508 ) 

3509 }, 

3510 overlaps_by_observation[nontrivial_observation], 

3511 ) 

3512 

3513 def test_query_projection_drop_postprocessing(self) -> None: 

3514 """Test that projections and deduplications on query objects can 

3515 drop post-query region filtering to ensure the query remains in 

3516 the SQL engine. 

3517 """ 

3518 registry = self.makeRegistry() 

3519 self.loadData(registry, "base.yaml") 

3520 self.loadData(registry, "spatial.yaml") 

3521 

3522 def pop_transfer(tree: Relation) -> Relation: 

3523 """If a relation tree terminates with a transfer to a new engine, 

3524 return the relation prior to that transfer. If not, return the 

3525 original relation. 

3526 """ 

3527 match tree: 

3528 case Transfer(target=target): 

3529 return target 

3530 case _: 

3531 return tree 

3532 

3533 # There's no public way to get a Query object yet, so we get one from a 

3534 # DataCoordinateQueryResults private attribute. When a public API is 

3535 # available this test should use it. 

3536 query = registry.queryDataIds(["visit", "detector", "tract", "patch"])._query 

3537 # We expect this query to terminate in the iteration engine originally, 

3538 # because region-filtering is necessary. 

3539 self.assertIsInstance(pop_transfer(query.relation).engine, iteration.Engine) 

3540 # If we deduplicate, we usually have to do that downstream of the 

3541 # filtering. That means the deduplication has to happen in the 

3542 # iteration engine. 

3543 self.assertIsInstance(pop_transfer(query.projected(unique=True).relation).engine, iteration.Engine) 

3544 # If we pass drop_postprocessing, we instead drop the region filtering 

3545 # so the deduplication can happen in SQL (though there might still be 

3546 # transfer to iteration at the tail of the tree that we can ignore; 

3547 # that's what the pop_transfer takes care of here). 

3548 self.assertIsInstance( 

3549 pop_transfer(query.projected(unique=True, drop_postprocessing=True).relation).engine, 

3550 sql.Engine, 

3551 ) 

3552 

3553 def test_query_find_datasets_drop_postprocessing(self) -> None: 

3554 """Test that DataCoordinateQueryResults.findDatasets avoids commutator 

3555 problems with the FindFirstDataset relation operation. 

3556 """ 

3557 # Setup: load some visit, tract, and patch records, and insert two 

3558 # datasets with dimensions {visit, patch}, with one in each of two 

3559 # RUN collections. 

3560 registry = self.makeRegistry() 

3561 self.loadData(registry, "base.yaml") 

3562 self.loadData(registry, "spatial.yaml") 

3563 storage_class = StorageClass("Warpy") 

3564 registry.storageClasses.registerStorageClass(storage_class) 

3565 dataset_type = DatasetType( 

3566 "warp", {"visit", "patch"}, storageClass=storage_class, universe=registry.dimensions 

3567 ) 

3568 registry.registerDatasetType(dataset_type) 

3569 (data_id,) = registry.queryDataIds(["visit", "patch"]).limit(1) 

3570 registry.registerRun("run1") 

3571 registry.registerRun("run2") 

3572 (ref1,) = registry.insertDatasets(dataset_type, [data_id], run="run1") 

3573 (ref2,) = registry.insertDatasets(dataset_type, [data_id], run="run2") 

3574 # Query for the dataset using queryDataIds(...).findDatasets(...) 

3575 # against only one of the two collections. This should work even 

3576 # though the relation returned by queryDataIds ends with 

3577 # iteration-engine region-filtering, because we can recognize before 

3578 # running the query that there is only one collecton to search and 

3579 # hence the (default) findFirst=True is irrelevant, and joining in the 

3580 # dataset query commutes past the iteration-engine postprocessing. 

3581 query1 = registry.queryDataIds( 

3582 {"visit", "patch"}, visit=data_id["visit"], instrument=data_id["instrument"] 

3583 ) 

3584 self.assertEqual( 

3585 set(query1.findDatasets(dataset_type.name, collections=["run1"])), 

3586 {ref1}, 

3587 ) 

3588 # Query for the dataset using queryDataIds(...).findDatasets(...) 

3589 # against both collections. This can only work if the FindFirstDataset 

3590 # operation can be commuted past the iteration-engine options into SQL. 

3591 query2 = registry.queryDataIds( 

3592 {"visit", "patch"}, visit=data_id["visit"], instrument=data_id["instrument"] 

3593 ) 

3594 self.assertEqual( 

3595 set(query2.findDatasets(dataset_type.name, collections=["run2", "run1"])), 

3596 {ref2}, 

3597 ) 

3598 

3599 def test_query_empty_collections(self) -> None: 

3600 """Test for registry query methods with empty collections. The methods 

3601 should return empty result set (or None when applicable) and provide 

3602 "doomed" diagnostics. 

3603 """ 

3604 registry = self.makeRegistry() 

3605 self.loadData(registry, "base.yaml") 

3606 self.loadData(registry, "datasets.yaml") 

3607 

3608 # Tests for registry.findDataset() 

3609 with self.assertRaises(NoDefaultCollectionError): 

3610 registry.findDataset("bias", instrument="Cam1", detector=1) 

3611 self.assertIsNotNone(registry.findDataset("bias", instrument="Cam1", detector=1, collections=...)) 

3612 self.assertIsNone(registry.findDataset("bias", instrument="Cam1", detector=1, collections=[])) 

3613 

3614 # Tests for registry.queryDatasets() 

3615 with self.assertRaises(NoDefaultCollectionError): 

3616 registry.queryDatasets("bias") 

3617 self.assertTrue(list(registry.queryDatasets("bias", collections=...))) 

3618 

3619 result = registry.queryDatasets("bias", collections=[]) 

3620 self.assertEqual(len(list(result)), 0) 

3621 messages = list(result.explain_no_results()) 

3622 self.assertTrue(messages) 

3623 self.assertTrue(any("because collection list is empty" in message for message in messages)) 

3624 

3625 # Tests for registry.queryDataIds() 

3626 with self.assertRaises(NoDefaultCollectionError): 

3627 registry.queryDataIds("detector", datasets="bias") 

3628 self.assertTrue(list(registry.queryDataIds("detector", datasets="bias", collections=...))) 

3629 

3630 result = registry.queryDataIds("detector", datasets="bias", collections=[]) 

3631 self.assertEqual(len(list(result)), 0) 

3632 messages = list(result.explain_no_results()) 

3633 self.assertTrue(messages) 

3634 self.assertTrue(any("because collection list is empty" in message for message in messages)) 

3635 

3636 # Tests for registry.queryDimensionRecords() 

3637 with self.assertRaises(NoDefaultCollectionError): 

3638 registry.queryDimensionRecords("detector", datasets="bias") 

3639 self.assertTrue(list(registry.queryDimensionRecords("detector", datasets="bias", collections=...))) 

3640 

3641 result = registry.queryDimensionRecords("detector", datasets="bias", collections=[]) 

3642 self.assertEqual(len(list(result)), 0) 

3643 messages = list(result.explain_no_results()) 

3644 self.assertTrue(messages) 

3645 self.assertTrue(any("because collection list is empty" in message for message in messages)) 

3646 

3647 def test_dataset_followup_spatial_joins(self) -> None: 

3648 """Test queryDataIds(...).findRelatedDatasets(...) where a spatial join 

3649 is involved. 

3650 """ 

3651 registry = self.makeRegistry() 

3652 self.loadData(registry, "base.yaml") 

3653 self.loadData(registry, "spatial.yaml") 

3654 pvi_dataset_type = DatasetType( 

3655 "pvi", {"visit", "detector"}, storageClass="StructuredDataDict", universe=registry.dimensions 

3656 ) 

3657 registry.registerDatasetType(pvi_dataset_type) 

3658 collection = "datasets" 

3659 registry.registerRun(collection) 

3660 (pvi1,) = registry.insertDatasets( 

3661 pvi_dataset_type, [{"instrument": "Cam1", "visit": 1, "detector": 1}], run=collection 

3662 ) 

3663 (pvi2,) = registry.insertDatasets( 

3664 pvi_dataset_type, [{"instrument": "Cam1", "visit": 1, "detector": 2}], run=collection 

3665 ) 

3666 (pvi3,) = registry.insertDatasets( 

3667 pvi_dataset_type, [{"instrument": "Cam1", "visit": 1, "detector": 3}], run=collection 

3668 ) 

3669 self.assertEqual( 

3670 set( 

3671 registry.queryDataIds(["patch"], skymap="SkyMap1", tract=0) 

3672 .expanded() 

3673 .findRelatedDatasets("pvi", [collection]) 

3674 ), 

3675 { 

3676 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=0), pvi1), 

3677 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=0), pvi2), 

3678 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=1), pvi2), 

3679 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=2), pvi1), 

3680 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=2), pvi2), 

3681 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=2), pvi3), 

3682 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=3), pvi2), 

3683 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=4), pvi3), 

3684 }, 

3685 )