Coverage for python/lsst/daf/butler/registry/tests/_registry.py: 6%

1541 statements  

« prev     ^ index     » next       coverage.py v7.5.0, created at 2024-04-30 02:53 -0700

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27from __future__ import annotations 

28 

29from ... import ddl 

30 

31__all__ = ["RegistryTests"] 

32 

33import datetime 

34import itertools 

35import os 

36import re 

37import time 

38import unittest 

39import uuid 

40from abc import ABC, abstractmethod 

41from collections import defaultdict, namedtuple 

42from collections.abc import Callable, Iterator 

43from concurrent.futures import ThreadPoolExecutor 

44from datetime import timedelta 

45from threading import Barrier 

46 

47import astropy.time 

48import sqlalchemy 

49 

50try: 

51 import numpy as np 

52except ImportError: 

53 np = None 

54 

55import lsst.sphgeom 

56from lsst.daf.relation import Relation, RelationalAlgebraError, Transfer, iteration, sql 

57 

58from ..._dataset_association import DatasetAssociation 

59from ..._dataset_ref import DatasetIdFactory, DatasetIdGenEnum, DatasetRef 

60from ..._dataset_type import DatasetType 

61from ..._exceptions import ( 

62 CollectionTypeError, 

63 DataIdValueError, 

64 InconsistentDataIdError, 

65 MissingCollectionError, 

66 MissingDatasetTypeError, 

67) 

68from ..._exceptions_legacy import DatasetTypeError 

69from ..._storage_class import StorageClass 

70from ..._timespan import Timespan 

71from ...dimensions import DataCoordinate, DataCoordinateSet, SkyPixDimension 

72from .._collection_summary import CollectionSummary 

73from .._collection_type import CollectionType 

74from .._config import RegistryConfig 

75from .._exceptions import ( 

76 ArgumentError, 

77 CollectionError, 

78 ConflictingDefinitionError, 

79 DatasetTypeExpressionError, 

80 NoDefaultCollectionError, 

81 OrphanedRecordError, 

82) 

83from .._registry import Registry 

84from ..interfaces import ButlerAttributeExistsError 

85from ..sql_registry import SqlRegistry 

86 

87 

88class RegistryTests(ABC): 

89 """Generic tests for the `SqlRegistry` class that can be subclassed to 

90 generate tests for different configurations. 

91 """ 

92 

93 collectionsManager: str | None = None 

94 """Name of the collections manager class, if subclass provides value for 

95 this member then it overrides name specified in default configuration 

96 (`str`). 

97 """ 

98 

99 datasetsManager: str | dict[str, str] | None = None 

100 """Name or configuration dictionary of the datasets manager class, if 

101 subclass provides value for this member then it overrides name specified 

102 in default configuration (`str` or `dict`). 

103 """ 

104 

105 supportsCollectionRegex: bool = True 

106 """True if the registry class being tested supports regex searches for 

107 collections.""" 

108 

109 @classmethod 

110 @abstractmethod 

111 def getDataDir(cls) -> str: 

112 """Return the root directory containing test data YAML files.""" 

113 raise NotImplementedError() 

114 

115 def makeRegistryConfig(self) -> RegistryConfig: 

116 """Create RegistryConfig used to create a registry. 

117 

118 This method should be called by a subclass from `makeRegistry`. 

119 Returned instance will be pre-configured based on the values of class 

120 members, and default-configured for all other parameters. Subclasses 

121 that need default configuration should just instantiate 

122 `RegistryConfig` directly. 

123 """ 

124 config = RegistryConfig() 

125 if self.collectionsManager: 

126 config["managers", "collections"] = self.collectionsManager 

127 if self.datasetsManager: 

128 config["managers", "datasets"] = self.datasetsManager 

129 return config 

130 

131 @abstractmethod 

132 def makeRegistry(self, share_repo_with: Registry | None = None) -> Registry | None: 

133 """Return the Registry instance to be tested. 

134 

135 Parameters 

136 ---------- 

137 share_repo_with : `Registry`, optional 

138 If provided, the new registry should point to the same data 

139 repository as this existing registry. 

140 

141 Returns 

142 ------- 

143 registry : `Registry` 

144 New `Registry` instance, or `None` *only* if `share_repo_with` 

145 is not `None` and this test case does not support that argument 

146 (e.g. it is impossible with in-memory SQLite DBs). 

147 """ 

148 raise NotImplementedError() 

149 

150 def loadData(self, registry: SqlRegistry, filename: str) -> None: 

151 """Load registry test data from ``getDataDir/<filename>``, 

152 which should be a YAML import/export file. 

153 

154 Parameters 

155 ---------- 

156 registry : `SqlRegistry` 

157 The registry to load into. 

158 filename : `str` 

159 The name of the file to load. 

160 """ 

161 from ...transfers import YamlRepoImportBackend 

162 

163 with open(os.path.join(self.getDataDir(), filename)) as stream: 

164 backend = YamlRepoImportBackend(stream, registry) 

165 backend.register() 

166 backend.load(datastore=None) 

167 

168 def checkQueryResults(self, results, expected): 

169 """Check that a query results object contains expected values. 

170 

171 Parameters 

172 ---------- 

173 results : `DataCoordinateQueryResults` or `DatasetQueryResults` 

174 A lazy-evaluation query results object. 

175 expected : `list` 

176 A list of `DataCoordinate` o `DatasetRef` objects that should be 

177 equal to results of the query, aside from ordering. 

178 """ 

179 self.assertCountEqual(list(results), expected) 

180 self.assertEqual(results.count(), len(expected)) 

181 if expected: 

182 self.assertTrue(results.any()) 

183 else: 

184 self.assertFalse(results.any()) 

185 

186 def testOpaque(self): 

187 """Tests for `SqlRegistry.registerOpaqueTable`, 

188 `SqlRegistry.insertOpaqueData`, `SqlRegistry.fetchOpaqueData`, and 

189 `SqlRegistry.deleteOpaqueData`. 

190 """ 

191 registry = self.makeRegistry() 

192 table = "opaque_table_for_testing" 

193 registry.registerOpaqueTable( 

194 table, 

195 spec=ddl.TableSpec( 

196 fields=[ 

197 ddl.FieldSpec("id", dtype=sqlalchemy.BigInteger, primaryKey=True), 

198 ddl.FieldSpec("name", dtype=sqlalchemy.String, length=16, nullable=False), 

199 ddl.FieldSpec("count", dtype=sqlalchemy.SmallInteger, nullable=True), 

200 ], 

201 ), 

202 ) 

203 rows = [ 

204 {"id": 1, "name": "one", "count": None}, 

205 {"id": 2, "name": "two", "count": 5}, 

206 {"id": 3, "name": "three", "count": 6}, 

207 ] 

208 registry.insertOpaqueData(table, *rows) 

209 self.assertCountEqual(rows, list(registry.fetchOpaqueData(table))) 

210 self.assertEqual(rows[0:1], list(registry.fetchOpaqueData(table, id=1))) 

211 self.assertEqual(rows[1:2], list(registry.fetchOpaqueData(table, name="two"))) 

212 self.assertEqual(rows[0:1], list(registry.fetchOpaqueData(table, id=(1, 3), name=("one", "two")))) 

213 self.assertEqual(rows, list(registry.fetchOpaqueData(table, id=(1, 2, 3)))) 

214 # Test very long IN clause which exceeds sqlite limit on number of 

215 # parameters. SQLite says the limit is 32k but it looks like it is 

216 # much higher. 

217 self.assertEqual(rows, list(registry.fetchOpaqueData(table, id=list(range(300_000))))) 

218 # Two IN clauses, each longer than 1k batch size, first with 

219 # duplicates, second has matching elements in different batches (after 

220 # sorting). 

221 self.assertEqual( 

222 rows[0:2], 

223 list( 

224 registry.fetchOpaqueData( 

225 table, 

226 id=list(range(1000)) + list(range(100, 0, -1)), 

227 name=["one"] + [f"q{i}" for i in range(2200)] + ["two"], 

228 ) 

229 ), 

230 ) 

231 self.assertEqual([], list(registry.fetchOpaqueData(table, id=1, name="two"))) 

232 registry.deleteOpaqueData(table, id=3) 

233 self.assertCountEqual(rows[:2], list(registry.fetchOpaqueData(table))) 

234 registry.deleteOpaqueData(table) 

235 self.assertEqual([], list(registry.fetchOpaqueData(table))) 

236 

237 def testDatasetType(self): 

238 """Tests for `SqlRegistry.registerDatasetType` and 

239 `SqlRegistry.getDatasetType`. 

240 """ 

241 registry = self.makeRegistry() 

242 # Check valid insert 

243 datasetTypeName = "test" 

244 storageClass = StorageClass("testDatasetType") 

245 registry.storageClasses.registerStorageClass(storageClass) 

246 dimensions = registry.dimensions.conform(("instrument", "visit")) 

247 differentDimensions = registry.dimensions.conform(("instrument", "patch")) 

248 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

249 # Inserting for the first time should return True 

250 self.assertTrue(registry.registerDatasetType(inDatasetType)) 

251 outDatasetType1 = registry.getDatasetType(datasetTypeName) 

252 self.assertEqual(outDatasetType1, inDatasetType) 

253 

254 # Re-inserting should work 

255 self.assertFalse(registry.registerDatasetType(inDatasetType)) 

256 # Except when they are not identical 

257 with self.assertRaises(ConflictingDefinitionError): 

258 nonIdenticalDatasetType = DatasetType(datasetTypeName, differentDimensions, storageClass) 

259 registry.registerDatasetType(nonIdenticalDatasetType) 

260 

261 # Template can be None 

262 datasetTypeName = "testNoneTemplate" 

263 storageClass = StorageClass("testDatasetType2") 

264 registry.storageClasses.registerStorageClass(storageClass) 

265 dimensions = registry.dimensions.conform(("instrument", "visit")) 

266 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

267 registry.registerDatasetType(inDatasetType) 

268 outDatasetType2 = registry.getDatasetType(datasetTypeName) 

269 self.assertEqual(outDatasetType2, inDatasetType) 

270 

271 allTypes = set(registry.queryDatasetTypes()) 

272 self.assertEqual(allTypes, {outDatasetType1, outDatasetType2}) 

273 

274 def testDimensions(self): 

275 """Tests for `SqlRegistry.insertDimensionData`, 

276 `SqlRegistry.syncDimensionData`, and `SqlRegistry.expandDataId`. 

277 """ 

278 registry = self.makeRegistry() 

279 dimensionName = "instrument" 

280 dimension = registry.dimensions[dimensionName] 

281 dimensionValue = { 

282 "name": "DummyCam", 

283 "visit_max": 10, 

284 "visit_system": 0, 

285 "exposure_max": 10, 

286 "detector_max": 2, 

287 "class_name": "lsst.pipe.base.Instrument", 

288 } 

289 registry.insertDimensionData(dimensionName, dimensionValue) 

290 # Inserting the same value twice should fail 

291 with self.assertRaises(sqlalchemy.exc.IntegrityError): 

292 registry.insertDimensionData(dimensionName, dimensionValue) 

293 # expandDataId should retrieve the record we just inserted 

294 self.assertEqual( 

295 registry.expandDataId(instrument="DummyCam", dimensions=dimension.minimal_group) 

296 .records[dimensionName] 

297 .toDict(), 

298 dimensionValue, 

299 ) 

300 # expandDataId should raise if there is no record with the given ID. 

301 with self.assertRaises(DataIdValueError): 

302 registry.expandDataId({"instrument": "Unknown"}, dimensions=dimension.minimal_group) 

303 # band doesn't have a table; insert should fail. 

304 with self.assertRaises(TypeError): 

305 registry.insertDimensionData("band", {"band": "i"}) 

306 dimensionName2 = "physical_filter" 

307 dimension2 = registry.dimensions[dimensionName2] 

308 dimensionValue2 = {"name": "DummyCam_i", "band": "i"} 

309 # Missing required dependency ("instrument") should fail 

310 with self.assertRaises(KeyError): 

311 registry.insertDimensionData(dimensionName2, dimensionValue2) 

312 # Adding required dependency should fix the failure 

313 dimensionValue2["instrument"] = "DummyCam" 

314 registry.insertDimensionData(dimensionName2, dimensionValue2) 

315 # expandDataId should retrieve the record we just inserted. 

316 self.assertEqual( 

317 registry.expandDataId( 

318 instrument="DummyCam", physical_filter="DummyCam_i", dimensions=dimension2.minimal_group 

319 ) 

320 .records[dimensionName2] 

321 .toDict(), 

322 dimensionValue2, 

323 ) 

324 # Use syncDimensionData to insert a new record successfully. 

325 dimensionName3 = "detector" 

326 dimensionValue3 = { 

327 "instrument": "DummyCam", 

328 "id": 1, 

329 "full_name": "one", 

330 "name_in_raft": "zero", 

331 "purpose": "SCIENCE", 

332 } 

333 self.assertTrue(registry.syncDimensionData(dimensionName3, dimensionValue3)) 

334 # Sync that again. Note that one field ("raft") is NULL, and that 

335 # should be okay. 

336 self.assertFalse(registry.syncDimensionData(dimensionName3, dimensionValue3)) 

337 # Now try that sync with the same primary key but a different value. 

338 # This should fail. 

339 with self.assertRaises(ConflictingDefinitionError): 

340 registry.syncDimensionData( 

341 dimensionName3, 

342 { 

343 "instrument": "DummyCam", 

344 "id": 1, 

345 "full_name": "one", 

346 "name_in_raft": "four", 

347 "purpose": "SCIENCE", 

348 }, 

349 ) 

350 

351 @unittest.skipIf(np is None, "numpy not available.") 

352 def testNumpyDataId(self): 

353 """Test that we can use a numpy int in a dataId.""" 

354 registry = self.makeRegistry() 

355 dimensionEntries = [ 

356 ("instrument", {"instrument": "DummyCam"}), 

357 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}), 

358 ("day_obs", {"instrument": "DummyCam", "id": 20250101}), 

359 # Using an np.int64 here fails unless Records.fromDict is also 

360 # patched to look for numbers.Integral 

361 ( 

362 "visit", 

363 { 

364 "instrument": "DummyCam", 

365 "id": 42, 

366 "name": "fortytwo", 

367 "physical_filter": "d-r", 

368 "day_obs": 20250101, 

369 }, 

370 ), 

371 ] 

372 for args in dimensionEntries: 

373 registry.insertDimensionData(*args) 

374 

375 # Try a normal integer and something that looks like an int but 

376 # is not. 

377 for visit_id in (42, np.int64(42)): 

378 with self.subTest(visit_id=visit_id, id_type=type(visit_id).__name__): 

379 expanded = registry.expandDataId({"instrument": "DummyCam", "visit": visit_id}) 

380 self.assertEqual(expanded["visit"], int(visit_id)) 

381 self.assertIsInstance(expanded["visit"], int) 

382 

383 def testDataIdRelationships(self): 

384 """Test that `SqlRegistry.expandDataId` raises an exception when the 

385 given keys are inconsistent. 

386 """ 

387 registry = self.makeRegistry() 

388 self.loadData(registry, "base.yaml") 

389 # Insert a few more dimension records for the next test. 

390 registry.insertDimensionData( 

391 "day_obs", 

392 {"instrument": "Cam1", "id": 20250101}, 

393 ) 

394 registry.insertDimensionData( 

395 "group", 

396 {"instrument": "Cam1", "name": "group1"}, 

397 ) 

398 registry.insertDimensionData( 

399 "exposure", 

400 { 

401 "instrument": "Cam1", 

402 "id": 1, 

403 "obs_id": "one", 

404 "physical_filter": "Cam1-G", 

405 "group": "group1", 

406 "day_obs": 20250101, 

407 }, 

408 ) 

409 registry.insertDimensionData( 

410 "group", 

411 {"instrument": "Cam1", "name": "group2"}, 

412 ) 

413 registry.insertDimensionData( 

414 "exposure", 

415 { 

416 "instrument": "Cam1", 

417 "id": 2, 

418 "obs_id": "two", 

419 "physical_filter": "Cam1-G", 

420 "group": "group2", 

421 "day_obs": 20250101, 

422 }, 

423 ) 

424 registry.insertDimensionData( 

425 "visit_system", 

426 {"instrument": "Cam1", "id": 0, "name": "one-to-one"}, 

427 ) 

428 registry.insertDimensionData( 

429 "visit", 

430 {"instrument": "Cam1", "id": 1, "name": "one", "physical_filter": "Cam1-G", "day_obs": 20250101}, 

431 ) 

432 registry.insertDimensionData( 

433 "visit_definition", 

434 {"instrument": "Cam1", "visit": 1, "exposure": 1}, 

435 ) 

436 with self.assertRaises(InconsistentDataIdError): 

437 registry.expandDataId( 

438 {"instrument": "Cam1", "visit": 1, "exposure": 2}, 

439 ) 

440 

441 def testDataset(self): 

442 """Basic tests for `SqlRegistry.insertDatasets`, 

443 `SqlRegistry.getDataset`, and `SqlRegistry.removeDatasets`. 

444 """ 

445 registry = self.makeRegistry() 

446 self.loadData(registry, "base.yaml") 

447 run = "tésτ" 

448 registry.registerRun(run) 

449 datasetType = registry.getDatasetType("bias") 

450 dataId = {"instrument": "Cam1", "detector": 2} 

451 (ref,) = registry.insertDatasets(datasetType, dataIds=[dataId], run=run) 

452 outRef = registry.getDataset(ref.id) 

453 self.assertIsNotNone(ref.id) 

454 self.assertEqual(ref, outRef) 

455 with self.assertRaises(ConflictingDefinitionError): 

456 registry.insertDatasets(datasetType, dataIds=[dataId], run=run) 

457 registry.removeDatasets([ref]) 

458 self.assertIsNone(registry.findDataset(datasetType, dataId, collections=[run])) 

459 

460 def testFindDataset(self): 

461 """Tests for `SqlRegistry.findDataset`.""" 

462 registry = self.makeRegistry() 

463 self.loadData(registry, "base.yaml") 

464 run = "tésτ" 

465 datasetType = registry.getDatasetType("bias") 

466 dataId = {"instrument": "Cam1", "detector": 4} 

467 registry.registerRun(run) 

468 (inputRef,) = registry.insertDatasets(datasetType, dataIds=[dataId], run=run) 

469 outputRef = registry.findDataset(datasetType, dataId, collections=[run]) 

470 self.assertEqual(outputRef, inputRef) 

471 # Check that retrieval with invalid dataId raises 

472 with self.assertRaises(LookupError): 

473 dataId = {"instrument": "Cam1"} # no detector 

474 registry.findDataset(datasetType, dataId, collections=run) 

475 # Check that different dataIds match to different datasets 

476 dataId1 = {"instrument": "Cam1", "detector": 1} 

477 (inputRef1,) = registry.insertDatasets(datasetType, dataIds=[dataId1], run=run) 

478 dataId2 = {"instrument": "Cam1", "detector": 2} 

479 (inputRef2,) = registry.insertDatasets(datasetType, dataIds=[dataId2], run=run) 

480 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef1) 

481 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef2) 

482 self.assertNotEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef2) 

483 self.assertNotEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef1) 

484 # Check that requesting a non-existing dataId returns None 

485 nonExistingDataId = {"instrument": "Cam1", "detector": 3} 

486 self.assertIsNone(registry.findDataset(datasetType, nonExistingDataId, collections=run)) 

487 # Search more than one collection, in which two have the right 

488 # dataset type and another does not. 

489 registry.registerRun("empty") 

490 self.loadData(registry, "datasets.yaml") 

491 bias1 = registry.findDataset("bias", instrument="Cam1", detector=2, collections=["imported_g"]) 

492 self.assertIsNotNone(bias1) 

493 bias2 = registry.findDataset("bias", instrument="Cam1", detector=2, collections=["imported_r"]) 

494 self.assertIsNotNone(bias2) 

495 self.assertEqual( 

496 bias1, 

497 registry.findDataset( 

498 "bias", instrument="Cam1", detector=2, collections=["empty", "imported_g", "imported_r"] 

499 ), 

500 ) 

501 self.assertEqual( 

502 bias2, 

503 registry.findDataset( 

504 "bias", instrument="Cam1", detector=2, collections=["empty", "imported_r", "imported_g"] 

505 ), 

506 ) 

507 # Search more than one collection, with one of them a CALIBRATION 

508 # collection. 

509 registry.registerCollection("Cam1/calib", CollectionType.CALIBRATION) 

510 timespan = Timespan( 

511 begin=astropy.time.Time("2020-01-01T01:00:00", format="isot", scale="tai"), 

512 end=astropy.time.Time("2020-01-01T02:00:00", format="isot", scale="tai"), 

513 ) 

514 registry.certify("Cam1/calib", [bias2], timespan=timespan) 

515 self.assertEqual( 

516 bias1, 

517 registry.findDataset( 

518 "bias", 

519 instrument="Cam1", 

520 detector=2, 

521 collections=["empty", "imported_g", "Cam1/calib"], 

522 timespan=timespan, 

523 ), 

524 ) 

525 self.assertEqual( 

526 bias2, 

527 registry.findDataset( 

528 "bias", 

529 instrument="Cam1", 

530 detector=2, 

531 collections=["empty", "Cam1/calib", "imported_g"], 

532 timespan=timespan, 

533 ), 

534 ) 

535 # If we try to search those same collections without a timespan, it 

536 # should still work, since the CALIBRATION collection is ignored. 

537 self.assertEqual( 

538 bias1, 

539 registry.findDataset( 

540 "bias", instrument="Cam1", detector=2, collections=["empty", "imported_g", "Cam1/calib"] 

541 ), 

542 ) 

543 self.assertEqual( 

544 bias1, 

545 registry.findDataset( 

546 "bias", instrument="Cam1", detector=2, collections=["empty", "Cam1/calib", "imported_g"] 

547 ), 

548 ) 

549 

550 def testRemoveDatasetTypeSuccess(self): 

551 """Test that SqlRegistry.removeDatasetType works when there are no 

552 datasets of that type present. 

553 """ 

554 registry = self.makeRegistry() 

555 self.loadData(registry, "base.yaml") 

556 registry.removeDatasetType("flat") 

557 with self.assertRaises(MissingDatasetTypeError): 

558 registry.getDatasetType("flat") 

559 

560 def testRemoveDatasetTypeFailure(self): 

561 """Test that SqlRegistry.removeDatasetType raises when there are 

562 datasets of that type present or if the dataset type is for a 

563 component. 

564 """ 

565 registry = self.makeRegistry() 

566 self.loadData(registry, "base.yaml") 

567 self.loadData(registry, "datasets.yaml") 

568 with self.assertRaises(OrphanedRecordError): 

569 registry.removeDatasetType("flat") 

570 with self.assertRaises(DatasetTypeError): 

571 registry.removeDatasetType(DatasetType.nameWithComponent("flat", "image")) 

572 

573 def testImportDatasetsUUID(self): 

574 """Test for `SqlRegistry._importDatasets` with UUID dataset ID.""" 

575 if isinstance(self.datasetsManager, str): 

576 if not self.datasetsManager.endswith(".ByDimensionsDatasetRecordStorageManagerUUID"): 

577 self.skipTest(f"Unexpected dataset manager {self.datasetsManager}") 

578 elif isinstance(self.datasetsManager, dict) and not self.datasetsManager["cls"].endswith( 

579 ".ByDimensionsDatasetRecordStorageManagerUUID" 

580 ): 

581 self.skipTest(f"Unexpected dataset manager {self.datasetsManager['cls']}") 

582 

583 registry = self.makeRegistry() 

584 self.loadData(registry, "base.yaml") 

585 for run in range(6): 

586 registry.registerRun(f"run{run}") 

587 datasetTypeBias = registry.getDatasetType("bias") 

588 datasetTypeFlat = registry.getDatasetType("flat") 

589 dataIdBias1 = {"instrument": "Cam1", "detector": 1} 

590 dataIdBias2 = {"instrument": "Cam1", "detector": 2} 

591 dataIdFlat1 = {"instrument": "Cam1", "detector": 1, "physical_filter": "Cam1-G", "band": "g"} 

592 

593 ref = DatasetRef(datasetTypeBias, dataIdBias1, run="run0") 

594 (ref1,) = registry._importDatasets([ref]) 

595 # UUID is used without change 

596 self.assertEqual(ref.id, ref1.id) 

597 

598 # All different failure modes 

599 refs = ( 

600 # Importing same DatasetRef with different dataset ID is an error 

601 DatasetRef(datasetTypeBias, dataIdBias1, run="run0"), 

602 # Same DatasetId but different DataId 

603 DatasetRef(datasetTypeBias, dataIdBias2, id=ref1.id, run="run0"), 

604 DatasetRef(datasetTypeFlat, dataIdFlat1, id=ref1.id, run="run0"), 

605 # Same DatasetRef and DatasetId but different run 

606 DatasetRef(datasetTypeBias, dataIdBias1, id=ref1.id, run="run1"), 

607 ) 

608 for ref in refs: 

609 with self.assertRaises(ConflictingDefinitionError): 

610 registry._importDatasets([ref]) 

611 

612 # Test for non-unique IDs, they can be re-imported multiple times. 

613 for run, idGenMode in ((2, DatasetIdGenEnum.DATAID_TYPE), (4, DatasetIdGenEnum.DATAID_TYPE_RUN)): 

614 with self.subTest(idGenMode=idGenMode): 

615 # Make dataset ref with reproducible dataset ID. 

616 ref = DatasetRef(datasetTypeBias, dataIdBias1, run=f"run{run}", id_generation_mode=idGenMode) 

617 (ref1,) = registry._importDatasets([ref]) 

618 self.assertIsInstance(ref1.id, uuid.UUID) 

619 self.assertEqual(ref1.id.version, 5) 

620 self.assertEqual(ref1.id, ref.id) 

621 

622 # Importing it again is OK 

623 (ref2,) = registry._importDatasets([ref1]) 

624 self.assertEqual(ref2.id, ref1.id) 

625 

626 # Cannot import to different run with the same ID 

627 ref = DatasetRef(datasetTypeBias, dataIdBias1, id=ref1.id, run=f"run{run+1}") 

628 with self.assertRaises(ConflictingDefinitionError): 

629 registry._importDatasets([ref]) 

630 

631 ref = DatasetRef( 

632 datasetTypeBias, dataIdBias1, run=f"run{run+1}", id_generation_mode=idGenMode 

633 ) 

634 if idGenMode is DatasetIdGenEnum.DATAID_TYPE: 

635 # Cannot import same DATAID_TYPE ref into a new run 

636 with self.assertRaises(ConflictingDefinitionError): 

637 (ref2,) = registry._importDatasets([ref]) 

638 else: 

639 # DATAID_TYPE_RUN ref can be imported into a new run 

640 (ref2,) = registry._importDatasets([ref]) 

641 

642 def testComponentLookups(self): 

643 """Test searching for component datasets via their parents. 

644 

645 Components can no longer be found by registry. This test checks 

646 that this now fails. 

647 """ 

648 registry = self.makeRegistry() 

649 self.loadData(registry, "base.yaml") 

650 self.loadData(registry, "datasets.yaml") 

651 # Test getting the child dataset type (which does still exist in the 

652 # Registry), and check for consistency with 

653 # DatasetRef.makeComponentRef. 

654 collection = "imported_g" 

655 parentType = registry.getDatasetType("bias") 

656 childType = registry.getDatasetType("bias.wcs") 

657 parentRefResolved = registry.findDataset( 

658 parentType, collections=collection, instrument="Cam1", detector=1 

659 ) 

660 self.assertIsInstance(parentRefResolved, DatasetRef) 

661 self.assertEqual(childType, parentRefResolved.makeComponentRef("wcs").datasetType) 

662 # Search for a single dataset with findDataset. 

663 with self.assertRaises(DatasetTypeError): 

664 registry.findDataset("bias.wcs", collections=collection, dataId=parentRefResolved.dataId) 

665 

666 def testCollections(self): 

667 """Tests for registry methods that manage collections.""" 

668 registry = self.makeRegistry() 

669 other_registry = self.makeRegistry(share_repo_with=registry) 

670 self.loadData(registry, "base.yaml") 

671 self.loadData(registry, "datasets.yaml") 

672 run1 = "imported_g" 

673 run2 = "imported_r" 

674 # Test setting a collection docstring after it has been created. 

675 registry.setCollectionDocumentation(run1, "doc for run1") 

676 self.assertEqual(registry.getCollectionDocumentation(run1), "doc for run1") 

677 registry.setCollectionDocumentation(run1, None) 

678 self.assertIsNone(registry.getCollectionDocumentation(run1)) 

679 datasetType = "bias" 

680 # Find some datasets via their run's collection. 

681 dataId1 = {"instrument": "Cam1", "detector": 1} 

682 ref1 = registry.findDataset(datasetType, dataId1, collections=run1) 

683 self.assertIsNotNone(ref1) 

684 dataId2 = {"instrument": "Cam1", "detector": 2} 

685 ref2 = registry.findDataset(datasetType, dataId2, collections=run1) 

686 self.assertIsNotNone(ref2) 

687 # Associate those into a new collection, then look for them there. 

688 tag1 = "tag1" 

689 registry.registerCollection(tag1, type=CollectionType.TAGGED, doc="doc for tag1") 

690 # Check that we can query for old and new collections by type. 

691 self.assertEqual(set(registry.queryCollections(collectionTypes=CollectionType.RUN)), {run1, run2}) 

692 self.assertEqual( 

693 set(registry.queryCollections(collectionTypes={CollectionType.TAGGED, CollectionType.RUN})), 

694 {tag1, run1, run2}, 

695 ) 

696 self.assertEqual(registry.getCollectionDocumentation(tag1), "doc for tag1") 

697 registry.associate(tag1, [ref1, ref2]) 

698 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1) 

699 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

700 # Disassociate one and verify that we can't it there anymore... 

701 registry.disassociate(tag1, [ref1]) 

702 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=tag1)) 

703 # ...but we can still find ref2 in tag1, and ref1 in the run. 

704 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run1), ref1) 

705 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

706 collections = set(registry.queryCollections()) 

707 self.assertEqual(collections, {run1, run2, tag1}) 

708 # Associate both refs into tag1 again; ref2 is already there, but that 

709 # should be a harmless no-op. 

710 registry.associate(tag1, [ref1, ref2]) 

711 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1) 

712 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

713 # Get a different dataset (from a different run) that has the same 

714 # dataset type and data ID as ref2. 

715 ref2b = registry.findDataset(datasetType, dataId2, collections=run2) 

716 self.assertNotEqual(ref2, ref2b) 

717 # Attempting to associate that into tag1 should be an error. 

718 with self.assertRaises(ConflictingDefinitionError): 

719 registry.associate(tag1, [ref2b]) 

720 # That error shouldn't have messed up what we had before. 

721 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1) 

722 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

723 # Attempt to associate the conflicting dataset again, this time with 

724 # a dataset that isn't in the collection and won't cause a conflict. 

725 # Should also fail without modifying anything. 

726 dataId3 = {"instrument": "Cam1", "detector": 3} 

727 ref3 = registry.findDataset(datasetType, dataId3, collections=run1) 

728 with self.assertRaises(ConflictingDefinitionError): 

729 registry.associate(tag1, [ref3, ref2b]) 

730 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1) 

731 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

732 self.assertIsNone(registry.findDataset(datasetType, dataId3, collections=tag1)) 

733 # Register a chained collection that searches [tag1, run2] 

734 chain1 = "chain1" 

735 registry.registerCollection(chain1, type=CollectionType.CHAINED) 

736 self.assertIs(registry.getCollectionType(chain1), CollectionType.CHAINED) 

737 # Chained collection exists, but has no collections in it. 

738 self.assertFalse(registry.getCollectionChain(chain1)) 

739 # If we query for all collections, we should get the chained collection 

740 # only if we don't ask to flatten it (i.e. yield only its children). 

741 self.assertEqual(set(registry.queryCollections(flattenChains=False)), {tag1, run1, run2, chain1}) 

742 self.assertEqual(set(registry.queryCollections(flattenChains=True)), {tag1, run1, run2}) 

743 # Attempt to set its child collections to something circular; that 

744 # should fail. 

745 with self.assertRaises(ValueError): 

746 registry.setCollectionChain(chain1, [tag1, chain1]) 

747 # Add the child collections. 

748 registry.setCollectionChain(chain1, [tag1, run2]) 

749 self.assertEqual(list(registry.getCollectionChain(chain1)), [tag1, run2]) 

750 self.assertEqual(registry.getCollectionParentChains(tag1), {chain1}) 

751 self.assertEqual(registry.getCollectionParentChains(run2), {chain1}) 

752 # Refresh the other registry that points to the same repo, and make 

753 # sure it can see the things we've done (note that this does require 

754 # an explicit refresh(); that's the documented behavior, because 

755 # caching is ~impossible otherwise). 

756 if other_registry is not None: 

757 other_registry.refresh() 

758 self.assertEqual(list(other_registry.getCollectionChain(chain1)), [tag1, run2]) 

759 self.assertEqual(other_registry.getCollectionParentChains(tag1), {chain1}) 

760 self.assertEqual(other_registry.getCollectionParentChains(run2), {chain1}) 

761 # Searching for dataId1 or dataId2 in the chain should return ref1 and 

762 # ref2, because both are in tag1. 

763 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain1), ref1) 

764 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain1), ref2) 

765 # Now disassociate ref2 from tag1. The search (for bias) with 

766 # dataId2 in chain1 should then: 

767 # 1. not find it in tag1 

768 # 2. find a different dataset in run2 

769 registry.disassociate(tag1, [ref2]) 

770 ref2b = registry.findDataset(datasetType, dataId2, collections=chain1) 

771 self.assertNotEqual(ref2b, ref2) 

772 self.assertEqual(ref2b, registry.findDataset(datasetType, dataId2, collections=run2)) 

773 # Define a new chain so we can test recursive chains. 

774 chain2 = "chain2" 

775 registry.registerCollection(chain2, type=CollectionType.CHAINED) 

776 registry.setCollectionChain(chain2, [run2, chain1]) 

777 self.assertEqual(registry.getCollectionParentChains(chain1), {chain2}) 

778 self.assertEqual(registry.getCollectionParentChains(run2), {chain1, chain2}) 

779 

780 if self.supportsCollectionRegex: 

781 # Query for collections matching a regex. 

782 self.assertCountEqual( 

783 list(registry.queryCollections(re.compile("imported_."), flattenChains=False)), 

784 ["imported_r", "imported_g"], 

785 ) 

786 # Query for collections matching a regex or an explicit str. 

787 self.assertCountEqual( 

788 list(registry.queryCollections([re.compile("imported_."), "chain1"], flattenChains=False)), 

789 ["imported_r", "imported_g", "chain1"], 

790 ) 

791 # Same queries as the regex ones above, but using globs instead of 

792 # regex. 

793 self.assertCountEqual( 

794 list(registry.queryCollections("imported_*", flattenChains=False)), 

795 ["imported_r", "imported_g"], 

796 ) 

797 # Query for collections matching a regex or an explicit str. 

798 self.assertCountEqual( 

799 list(registry.queryCollections(["imported_*", "chain1"], flattenChains=False)), 

800 ["imported_r", "imported_g", "chain1"], 

801 ) 

802 

803 # Search for bias with dataId1 should find it via tag1 in chain2, 

804 # recursing, because is not in run1. 

805 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=run2)) 

806 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain2), ref1) 

807 # Search for bias with dataId2 should find it in run2 (ref2b). 

808 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain2), ref2b) 

809 # Search for a flat that is in run2. That should not be found 

810 # at the front of chain2, because of the restriction to bias 

811 # on run2 there, but it should be found in at the end of chain1. 

812 dataId4 = {"instrument": "Cam1", "detector": 3, "physical_filter": "Cam1-R2"} 

813 ref4 = registry.findDataset("flat", dataId4, collections=run2) 

814 self.assertIsNotNone(ref4) 

815 self.assertEqual(ref4, registry.findDataset("flat", dataId4, collections=chain2)) 

816 # Deleting a collection that's part of a CHAINED collection is not 

817 # allowed, and is exception-safe. 

818 with self.assertRaises(sqlalchemy.exc.IntegrityError): 

819 registry.removeCollection(run2) 

820 self.assertEqual(registry.getCollectionType(run2), CollectionType.RUN) 

821 with self.assertRaises(sqlalchemy.exc.IntegrityError): 

822 registry.removeCollection(chain1) 

823 self.assertEqual(registry.getCollectionType(chain1), CollectionType.CHAINED) 

824 # Actually remove chain2, test that it's gone by asking for its type. 

825 registry.removeCollection(chain2) 

826 with self.assertRaises(MissingCollectionError): 

827 registry.getCollectionType(chain2) 

828 # Actually remove run2 and chain1, which should work now. 

829 registry.removeCollection(chain1) 

830 registry.removeCollection(run2) 

831 with self.assertRaises(MissingCollectionError): 

832 registry.getCollectionType(run2) 

833 with self.assertRaises(MissingCollectionError): 

834 registry.getCollectionType(chain1) 

835 # Remove tag1 as well, just to test that we can remove TAGGED 

836 # collections. 

837 registry.removeCollection(tag1) 

838 with self.assertRaises(MissingCollectionError): 

839 registry.getCollectionType(tag1) 

840 

841 def testCollectionChainCaching(self): 

842 registry = self.makeRegistry() 

843 with registry.caching_context(): 

844 registry.registerCollection("a") 

845 registry.registerCollection("chain", CollectionType.CHAINED) 

846 # There used to be a caching bug (DM-43750) that would throw an 

847 # exception if you modified a collection chain for a collection 

848 # that was already in the cache. 

849 registry.setCollectionChain("chain", ["a"]) 

850 self.assertEqual(list(registry.getCollectionChain("chain")), ["a"]) 

851 

852 def testCollectionChainFlatten(self): 

853 """Test that `SqlRegistry.setCollectionChain` obeys its 'flatten' 

854 option. 

855 """ 

856 registry = self.makeRegistry() 

857 registry.registerCollection("inner", CollectionType.CHAINED) 

858 registry.registerCollection("innermost", CollectionType.RUN) 

859 registry.setCollectionChain("inner", ["innermost"]) 

860 registry.registerCollection("outer", CollectionType.CHAINED) 

861 registry.setCollectionChain("outer", ["inner"], flatten=False) 

862 self.assertEqual(list(registry.getCollectionChain("outer")), ["inner"]) 

863 registry.setCollectionChain("outer", ["inner"], flatten=True) 

864 self.assertEqual(list(registry.getCollectionChain("outer")), ["innermost"]) 

865 

866 def testCollectionChainPrependConcurrency(self): 

867 """Verify that locking via database row locks is working as 

868 expected. 

869 """ 

870 

871 def blocked_thread_func(registry: SqlRegistry): 

872 # This call will become blocked after it has decided on positions 

873 # for the new children in the collection chain, but before 

874 # inserting them. 

875 registry._managers.collections.prepend_collection_chain("chain", ["a"]) 

876 

877 def unblocked_thread_func(registry: SqlRegistry): 

878 registry._managers.collections.prepend_collection_chain("chain", ["b"]) 

879 

880 registry = self._do_collection_concurrency_test(blocked_thread_func, unblocked_thread_func) 

881 

882 # blocked_thread_func should have finished first, inserting "a". 

883 # unblocked_thread_func should have finished second, prepending "b". 

884 self.assertEqual(("b", "a"), registry.getCollectionChain("chain")) 

885 

886 def testCollectionChainReplaceConcurrency(self): 

887 """Verify that locking via database row locks is working as 

888 expected. 

889 """ 

890 

891 def blocked_thread_func(registry: SqlRegistry): 

892 # This call will become blocked after deleting children, but before 

893 # inserting new ones. 

894 registry.setCollectionChain("chain", ["a"]) 

895 

896 def unblocked_thread_func(registry: SqlRegistry): 

897 registry.setCollectionChain("chain", ["b"]) 

898 

899 registry = self._do_collection_concurrency_test(blocked_thread_func, unblocked_thread_func) 

900 

901 # blocked_thread_func should have finished first. 

902 # unblocked_thread_func should have finished second, overwriting the 

903 # chain with "b". 

904 self.assertEqual(("b",), registry.getCollectionChain("chain")) 

905 

906 def _do_collection_concurrency_test( 

907 self, blocked_thread_func: Callable[[SqlRegistry]], unblocked_thread_func: Callable[[SqlRegistry]] 

908 ) -> SqlRegistry: 

909 # This function: 

910 # 1. Sets up two registries pointing at the same database. 

911 # 2. Start running 'blocked_thread_func' in a background thread, 

912 # arranging for it to become blocked during a critical section in 

913 # the collections manager. 

914 # 3. Wait for 'blocked_thread_func' to reach the critical section 

915 # 4. Start running 'unblocked_thread_func'. 

916 # 5. Allow both functions to run to completion. 

917 

918 # Set up two registries pointing to the same DB 

919 registry1 = self.makeRegistry() 

920 assert isinstance(registry1, SqlRegistry) 

921 registry2 = self.makeRegistry(share_repo_with=registry1) 

922 if registry2 is None: 

923 # This will happen for in-memory SQL databases. 

924 raise unittest.SkipTest("Testing concurrency requires two connections to the same DB.") 

925 

926 registry1.registerCollection("chain", CollectionType.CHAINED) 

927 for collection in ["a", "b"]: 

928 registry1.registerCollection(collection) 

929 

930 # Arrange for registry1 to block during its critical section, allowing 

931 # us to detect this and control when it becomes unblocked. 

932 enter_barrier = Barrier(2, timeout=60) 

933 exit_barrier = Barrier(2, timeout=60) 

934 

935 def wait_for_barrier(): 

936 enter_barrier.wait() 

937 exit_barrier.wait() 

938 

939 registry1._managers.collections._block_for_concurrency_test = wait_for_barrier 

940 

941 with ThreadPoolExecutor(max_workers=1) as exec1: 

942 with ThreadPoolExecutor(max_workers=1) as exec2: 

943 future1 = exec1.submit(blocked_thread_func, registry1) 

944 enter_barrier.wait() 

945 

946 # At this point registry 1 has entered the critical section and 

947 # is waiting for us to release it. Start the other thread. 

948 future2 = exec2.submit(unblocked_thread_func, registry2) 

949 # thread2 should block inside a database call, but we have no 

950 # way to detect when it is in this state. 

951 time.sleep(0.200) 

952 

953 # Let the threads run to completion. 

954 exit_barrier.wait() 

955 future1.result() 

956 future2.result() 

957 

958 return registry1 

959 

960 def testBasicTransaction(self): 

961 """Test that all operations within a single transaction block are 

962 rolled back if an exception propagates out of the block. 

963 """ 

964 registry = self.makeRegistry() 

965 storageClass = StorageClass("testDatasetType") 

966 registry.storageClasses.registerStorageClass(storageClass) 

967 with registry.transaction(): 

968 registry.insertDimensionData("instrument", {"name": "Cam1", "class_name": "A"}) 

969 with self.assertRaises(ValueError): 

970 with registry.transaction(): 

971 registry.insertDimensionData("instrument", {"name": "Cam2"}) 

972 raise ValueError("Oops, something went wrong") 

973 # Cam1 should exist 

974 self.assertEqual(registry.expandDataId(instrument="Cam1").records["instrument"].class_name, "A") 

975 # But Cam2 and Cam3 should both not exist 

976 with self.assertRaises(DataIdValueError): 

977 registry.expandDataId(instrument="Cam2") 

978 with self.assertRaises(DataIdValueError): 

979 registry.expandDataId(instrument="Cam3") 

980 

981 def testNestedTransaction(self): 

982 """Test that operations within a transaction block are not rolled back 

983 if an exception propagates out of an inner transaction block and is 

984 then caught. 

985 """ 

986 registry = self.makeRegistry() 

987 dimension = registry.dimensions["instrument"] 

988 dataId1 = {"instrument": "DummyCam"} 

989 dataId2 = {"instrument": "DummyCam2"} 

990 checkpointReached = False 

991 with registry.transaction(): 

992 # This should be added and (ultimately) committed. 

993 registry.insertDimensionData(dimension, dataId1) 

994 with self.assertRaises(sqlalchemy.exc.IntegrityError): 

995 with registry.transaction(savepoint=True): 

996 # This does not conflict, and should succeed (but not 

997 # be committed). 

998 registry.insertDimensionData(dimension, dataId2) 

999 checkpointReached = True 

1000 # This should conflict and raise, triggerring a rollback 

1001 # of the previous insertion within the same transaction 

1002 # context, but not the original insertion in the outer 

1003 # block. 

1004 registry.insertDimensionData(dimension, dataId1) 

1005 self.assertTrue(checkpointReached) 

1006 self.assertIsNotNone(registry.expandDataId(dataId1, dimensions=dimension.minimal_group)) 

1007 with self.assertRaises(DataIdValueError): 

1008 registry.expandDataId(dataId2, dimensions=dimension.minimal_group) 

1009 

1010 def testInstrumentDimensions(self): 

1011 """Test queries involving only instrument dimensions, with no joins to 

1012 skymap. 

1013 """ 

1014 registry = self.makeRegistry() 

1015 

1016 # need a bunch of dimensions and datasets for test 

1017 registry.insertDimensionData( 

1018 "instrument", dict(name="DummyCam", visit_max=25, exposure_max=300, detector_max=6) 

1019 ) 

1020 registry.insertDimensionData("day_obs", dict(instrument="DummyCam", id=20250101)) 

1021 registry.insertDimensionData( 

1022 "physical_filter", 

1023 dict(instrument="DummyCam", name="dummy_r", band="r"), 

1024 dict(instrument="DummyCam", name="dummy_i", band="i"), 

1025 ) 

1026 registry.insertDimensionData( 

1027 "detector", *[dict(instrument="DummyCam", id=i, full_name=str(i)) for i in range(1, 6)] 

1028 ) 

1029 registry.insertDimensionData( 

1030 "visit", 

1031 dict(instrument="DummyCam", id=10, name="ten", physical_filter="dummy_i", day_obs=20250101), 

1032 dict(instrument="DummyCam", id=11, name="eleven", physical_filter="dummy_r", day_obs=20250101), 

1033 dict(instrument="DummyCam", id=20, name="twelve", physical_filter="dummy_r", day_obs=20250101), 

1034 ) 

1035 registry.insertDimensionData( 

1036 "group", 

1037 dict(instrument="DummyCam", name="ten"), 

1038 dict(instrument="DummyCam", name="eleven"), 

1039 dict(instrument="DummyCam", name="twelve"), 

1040 ) 

1041 for i in range(1, 6): 

1042 registry.insertDimensionData( 

1043 "visit_detector_region", 

1044 dict(instrument="DummyCam", visit=10, detector=i), 

1045 dict(instrument="DummyCam", visit=11, detector=i), 

1046 dict(instrument="DummyCam", visit=20, detector=i), 

1047 ) 

1048 registry.insertDimensionData( 

1049 "exposure", 

1050 dict( 

1051 instrument="DummyCam", 

1052 id=100, 

1053 obs_id="100", 

1054 physical_filter="dummy_i", 

1055 group="ten", 

1056 day_obs=20250101, 

1057 ), 

1058 dict( 

1059 instrument="DummyCam", 

1060 id=101, 

1061 obs_id="101", 

1062 physical_filter="dummy_i", 

1063 group="ten", 

1064 day_obs=20250101, 

1065 ), 

1066 dict( 

1067 instrument="DummyCam", 

1068 id=110, 

1069 obs_id="110", 

1070 physical_filter="dummy_r", 

1071 group="eleven", 

1072 day_obs=20250101, 

1073 ), 

1074 dict( 

1075 instrument="DummyCam", 

1076 id=111, 

1077 obs_id="111", 

1078 physical_filter="dummy_r", 

1079 group="eleven", 

1080 day_obs=20250101, 

1081 ), 

1082 dict( 

1083 instrument="DummyCam", 

1084 id=200, 

1085 obs_id="200", 

1086 physical_filter="dummy_r", 

1087 group="twelve", 

1088 day_obs=20250101, 

1089 ), 

1090 dict( 

1091 instrument="DummyCam", 

1092 id=201, 

1093 obs_id="201", 

1094 physical_filter="dummy_r", 

1095 group="twelve", 

1096 day_obs=20250101, 

1097 ), 

1098 ) 

1099 registry.insertDimensionData( 

1100 "visit_definition", 

1101 dict(instrument="DummyCam", exposure=100, visit=10), 

1102 dict(instrument="DummyCam", exposure=101, visit=10), 

1103 dict(instrument="DummyCam", exposure=110, visit=11), 

1104 dict(instrument="DummyCam", exposure=111, visit=11), 

1105 dict(instrument="DummyCam", exposure=200, visit=20), 

1106 dict(instrument="DummyCam", exposure=201, visit=20), 

1107 ) 

1108 # dataset types 

1109 run1 = "test1_r" 

1110 run2 = "test2_r" 

1111 tagged2 = "test2_t" 

1112 registry.registerRun(run1) 

1113 registry.registerRun(run2) 

1114 registry.registerCollection(tagged2) 

1115 storageClass = StorageClass("testDataset") 

1116 registry.storageClasses.registerStorageClass(storageClass) 

1117 rawType = DatasetType( 

1118 name="RAW", 

1119 dimensions=registry.dimensions.conform(("instrument", "exposure", "detector")), 

1120 storageClass=storageClass, 

1121 ) 

1122 registry.registerDatasetType(rawType) 

1123 calexpType = DatasetType( 

1124 name="CALEXP", 

1125 dimensions=registry.dimensions.conform(("instrument", "visit", "detector")), 

1126 storageClass=storageClass, 

1127 ) 

1128 registry.registerDatasetType(calexpType) 

1129 

1130 # add pre-existing datasets 

1131 for exposure in (100, 101, 110, 111): 

1132 for detector in (1, 2, 3): 

1133 # note that only 3 of 5 detectors have datasets 

1134 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector) 

1135 (ref,) = registry.insertDatasets(rawType, dataIds=[dataId], run=run1) 

1136 # exposures 100 and 101 appear in both run1 and tagged2. 

1137 # 100 has different datasets in the different collections 

1138 # 101 has the same dataset in both collections. 

1139 if exposure == 100: 

1140 (ref,) = registry.insertDatasets(rawType, dataIds=[dataId], run=run2) 

1141 if exposure in (100, 101): 

1142 registry.associate(tagged2, [ref]) 

1143 # Add pre-existing datasets to tagged2. 

1144 for exposure in (200, 201): 

1145 for detector in (3, 4, 5): 

1146 # note that only 3 of 5 detectors have datasets 

1147 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector) 

1148 (ref,) = registry.insertDatasets(rawType, dataIds=[dataId], run=run2) 

1149 registry.associate(tagged2, [ref]) 

1150 

1151 dimensions = registry.dimensions.conform( 

1152 rawType.dimensions.required.names | calexpType.dimensions.required.names 

1153 ) 

1154 # Test that single dim string works as well as list of str 

1155 rows = registry.queryDataIds("visit", datasets=rawType, collections=run1).expanded().toSet() 

1156 rowsI = registry.queryDataIds(["visit"], datasets=rawType, collections=run1).expanded().toSet() 

1157 self.assertEqual(rows, rowsI) 

1158 # with empty expression 

1159 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1).expanded().toSet() 

1160 self.assertEqual(len(rows), 4 * 3) # 4 exposures times 3 detectors 

1161 self.assertCountEqual({dataId["exposure"] for dataId in rows}, (100, 101, 110, 111)) 

1162 self.assertCountEqual({dataId["visit"] for dataId in rows}, (10, 11)) 

1163 self.assertCountEqual({dataId["detector"] for dataId in rows}, (1, 2, 3)) 

1164 

1165 # second collection 

1166 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=tagged2).toSet() 

1167 self.assertEqual(len(rows), 4 * 3) # 4 exposures times 3 detectors 

1168 for dataId in rows: 

1169 self.assertCountEqual(dataId.dimensions.required, ("instrument", "detector", "exposure", "visit")) 

1170 self.assertCountEqual({dataId["exposure"] for dataId in rows}, (100, 101, 200, 201)) 

1171 self.assertCountEqual({dataId["visit"] for dataId in rows}, (10, 20)) 

1172 self.assertCountEqual({dataId["detector"] for dataId in rows}, (1, 2, 3, 4, 5)) 

1173 

1174 # with two input datasets 

1175 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=[run1, tagged2]).toSet() 

1176 self.assertEqual(len(set(rows)), 6 * 3) # 6 exposures times 3 detectors; set needed to de-dupe 

1177 for dataId in rows: 

1178 self.assertCountEqual(dataId.dimensions.required, ("instrument", "detector", "exposure", "visit")) 

1179 self.assertCountEqual({dataId["exposure"] for dataId in rows}, (100, 101, 110, 111, 200, 201)) 

1180 self.assertCountEqual({dataId["visit"] for dataId in rows}, (10, 11, 20)) 

1181 self.assertCountEqual({dataId["detector"] for dataId in rows}, (1, 2, 3, 4, 5)) 

1182 

1183 # limit to single visit 

1184 rows = registry.queryDataIds( 

1185 dimensions, datasets=rawType, collections=run1, where="visit = 10", instrument="DummyCam" 

1186 ).toSet() 

1187 self.assertEqual(len(rows), 2 * 3) # 2 exposures times 3 detectors 

1188 self.assertCountEqual({dataId["exposure"] for dataId in rows}, (100, 101)) 

1189 self.assertCountEqual({dataId["visit"] for dataId in rows}, (10,)) 

1190 self.assertCountEqual({dataId["detector"] for dataId in rows}, (1, 2, 3)) 

1191 

1192 # more limiting expression, using link names instead of Table.column 

1193 rows = registry.queryDataIds( 

1194 dimensions, 

1195 datasets=rawType, 

1196 collections=run1, 

1197 where="visit = 10 and detector > 1 and 'DummyCam'=instrument", 

1198 ).toSet() 

1199 self.assertEqual(len(rows), 2 * 2) # 2 exposures times 2 detectors 

1200 self.assertCountEqual({dataId["exposure"] for dataId in rows}, (100, 101)) 

1201 self.assertCountEqual({dataId["visit"] for dataId in rows}, (10,)) 

1202 self.assertCountEqual({dataId["detector"] for dataId in rows}, (2, 3)) 

1203 

1204 # queryDataIds with only one of `datasets` and `collections` is an 

1205 # error. 

1206 with self.assertRaises(CollectionError): 

1207 registry.queryDataIds(dimensions, datasets=rawType) 

1208 with self.assertRaises(ArgumentError): 

1209 registry.queryDataIds(dimensions, collections=run1) 

1210 

1211 # expression excludes everything 

1212 rows = registry.queryDataIds( 

1213 dimensions, datasets=rawType, collections=run1, where="visit > 1000", instrument="DummyCam" 

1214 ).toSet() 

1215 self.assertEqual(len(rows), 0) 

1216 

1217 # Selecting by physical_filter, this is not in the dimensions, but it 

1218 # is a part of the full expression so it should work too. 

1219 rows = registry.queryDataIds( 

1220 dimensions, 

1221 datasets=rawType, 

1222 collections=run1, 

1223 where="physical_filter = 'dummy_r'", 

1224 instrument="DummyCam", 

1225 ).toSet() 

1226 self.assertEqual(len(rows), 2 * 3) # 2 exposures times 3 detectors 

1227 self.assertCountEqual({dataId["exposure"] for dataId in rows}, (110, 111)) 

1228 self.assertCountEqual({dataId["visit"] for dataId in rows}, (11,)) 

1229 self.assertCountEqual({dataId["detector"] for dataId in rows}, (1, 2, 3)) 

1230 

1231 def testSkyMapDimensions(self): 

1232 """Tests involving only skymap dimensions, no joins to instrument.""" 

1233 registry = self.makeRegistry() 

1234 

1235 # need a bunch of dimensions and datasets for test, we want 

1236 # "band" in the test so also have to add physical_filter 

1237 # dimensions 

1238 registry.insertDimensionData("instrument", dict(instrument="DummyCam")) 

1239 registry.insertDimensionData( 

1240 "physical_filter", 

1241 dict(instrument="DummyCam", name="dummy_r", band="r"), 

1242 dict(instrument="DummyCam", name="dummy_i", band="i"), 

1243 ) 

1244 registry.insertDimensionData("skymap", dict(name="DummyMap", hash=b"sha!")) 

1245 for tract in range(10): 

1246 registry.insertDimensionData("tract", dict(skymap="DummyMap", id=tract)) 

1247 registry.insertDimensionData( 

1248 "patch", 

1249 *[dict(skymap="DummyMap", tract=tract, id=patch, cell_x=0, cell_y=0) for patch in range(10)], 

1250 ) 

1251 

1252 # dataset types 

1253 run = "tésτ" 

1254 registry.registerRun(run) 

1255 storageClass = StorageClass("testDataset") 

1256 registry.storageClasses.registerStorageClass(storageClass) 

1257 calexpType = DatasetType( 

1258 name="deepCoadd_calexp", 

1259 dimensions=registry.dimensions.conform(("skymap", "tract", "patch", "band")), 

1260 storageClass=storageClass, 

1261 ) 

1262 registry.registerDatasetType(calexpType) 

1263 mergeType = DatasetType( 

1264 name="deepCoadd_mergeDet", 

1265 dimensions=registry.dimensions.conform(("skymap", "tract", "patch")), 

1266 storageClass=storageClass, 

1267 ) 

1268 registry.registerDatasetType(mergeType) 

1269 measType = DatasetType( 

1270 name="deepCoadd_meas", 

1271 dimensions=registry.dimensions.conform(("skymap", "tract", "patch", "band")), 

1272 storageClass=storageClass, 

1273 ) 

1274 registry.registerDatasetType(measType) 

1275 

1276 dimensions = registry.dimensions.conform( 

1277 calexpType.dimensions.required.names 

1278 | mergeType.dimensions.required.names 

1279 | measType.dimensions.required.names 

1280 ) 

1281 

1282 # add pre-existing datasets 

1283 for tract in (1, 3, 5): 

1284 for patch in (2, 4, 6, 7): 

1285 dataId = dict(skymap="DummyMap", tract=tract, patch=patch) 

1286 registry.insertDatasets(mergeType, dataIds=[dataId], run=run) 

1287 for aFilter in ("i", "r"): 

1288 dataId = dict(skymap="DummyMap", tract=tract, patch=patch, band=aFilter) 

1289 registry.insertDatasets(calexpType, dataIds=[dataId], run=run) 

1290 

1291 # with empty expression 

1292 rows = registry.queryDataIds(dimensions, datasets=[calexpType, mergeType], collections=run).toSet() 

1293 self.assertEqual(len(rows), 3 * 4 * 2) # 4 tracts x 4 patches x 2 filters 

1294 for dataId in rows: 

1295 self.assertCountEqual(dataId.dimensions.required, ("skymap", "tract", "patch", "band")) 

1296 self.assertCountEqual({dataId["tract"] for dataId in rows}, (1, 3, 5)) 

1297 self.assertCountEqual({dataId["patch"] for dataId in rows}, (2, 4, 6, 7)) 

1298 self.assertCountEqual({dataId["band"] for dataId in rows}, ("i", "r")) 

1299 

1300 # limit to 2 tracts and 2 patches 

1301 rows = registry.queryDataIds( 

1302 dimensions, 

1303 datasets=[calexpType, mergeType], 

1304 collections=run, 

1305 where="tract IN (1, 5) AND patch IN (2, 7)", 

1306 skymap="DummyMap", 

1307 ).toSet() 

1308 self.assertEqual(len(rows), 2 * 2 * 2) # 2 tracts x 2 patches x 2 filters 

1309 self.assertCountEqual({dataId["tract"] for dataId in rows}, (1, 5)) 

1310 self.assertCountEqual({dataId["patch"] for dataId in rows}, (2, 7)) 

1311 self.assertCountEqual({dataId["band"] for dataId in rows}, ("i", "r")) 

1312 

1313 # limit to single filter 

1314 rows = registry.queryDataIds( 

1315 dimensions, datasets=[calexpType, mergeType], collections=run, where="band = 'i'" 

1316 ).toSet() 

1317 self.assertEqual(len(rows), 3 * 4 * 1) # 4 tracts x 4 patches x 2 filters 

1318 self.assertCountEqual({dataId["tract"] for dataId in rows}, (1, 3, 5)) 

1319 self.assertCountEqual({dataId["patch"] for dataId in rows}, (2, 4, 6, 7)) 

1320 self.assertCountEqual({dataId["band"] for dataId in rows}, ("i",)) 

1321 

1322 # Specifying non-existing skymap is an exception 

1323 with self.assertRaisesRegex(DataIdValueError, "Unknown values specified for governor dimension"): 

1324 rows = registry.queryDataIds( 

1325 dimensions, datasets=[calexpType, mergeType], collections=run, where="skymap = 'Mars'" 

1326 ).toSet() 

1327 

1328 def testSpatialJoin(self): 

1329 """Test queries that involve spatial overlap joins.""" 

1330 registry = self.makeRegistry() 

1331 self.loadData(registry, "hsc-rc2-subset.yaml") 

1332 

1333 # Dictionary of spatial DatabaseDimensionElements, keyed by the name of 

1334 # the TopologicalFamily they belong to. We'll relate all elements in 

1335 # each family to all of the elements in each other family. 

1336 families = defaultdict(set) 

1337 # Dictionary of {element.name: {dataId: region}}. 

1338 regions = {} 

1339 for element in registry.dimensions.database_elements: 

1340 if element.spatial is not None: 

1341 families[element.spatial.name].add(element) 

1342 regions[element.name] = { 

1343 record.dataId: record.region for record in registry.queryDimensionRecords(element) 

1344 } 

1345 

1346 # If this check fails, it's not necessarily a problem - it may just be 

1347 # a reasonable change to the default dimension definitions - but the 

1348 # test below depends on there being more than one family to do anything 

1349 # useful. 

1350 self.assertEqual(len(families), 2) 

1351 

1352 # Overlap DatabaseDimensionElements with each other. 

1353 for family1, family2 in itertools.combinations(families, 2): 

1354 for element1, element2 in itertools.product(families[family1], families[family2]): 

1355 dimensions = element1.minimal_group | element2.minimal_group 

1356 # Construct expected set of overlapping data IDs via a 

1357 # brute-force comparison of the regions we've already fetched. 

1358 expected = { 

1359 DataCoordinate.standardize( 

1360 {**dataId1.required, **dataId2.required}, dimensions=dimensions 

1361 ) 

1362 for (dataId1, region1), (dataId2, region2) in itertools.product( 

1363 regions[element1.name].items(), regions[element2.name].items() 

1364 ) 

1365 if not region1.isDisjointFrom(region2) 

1366 } 

1367 self.assertGreater(len(expected), 2, msg="Test that we aren't just comparing empty sets.") 

1368 queried = set(registry.queryDataIds(dimensions)) 

1369 self.assertEqual(expected, queried) 

1370 

1371 # Overlap each DatabaseDimensionElement with the commonSkyPix system. 

1372 commonSkyPix = registry.dimensions.commonSkyPix 

1373 for elementName, these_regions in regions.items(): 

1374 dimensions = registry.dimensions[elementName].minimal_group | commonSkyPix.minimal_group 

1375 expected = set() 

1376 for dataId, region in these_regions.items(): 

1377 for begin, end in commonSkyPix.pixelization.envelope(region): 

1378 expected.update( 

1379 DataCoordinate.standardize( 

1380 {commonSkyPix.name: index, **dataId.required}, dimensions=dimensions 

1381 ) 

1382 for index in range(begin, end) 

1383 ) 

1384 self.assertGreater(len(expected), 2, msg="Test that we aren't just comparing empty sets.") 

1385 queried = set(registry.queryDataIds(dimensions)) 

1386 self.assertEqual(expected, queried) 

1387 

1388 def testAbstractQuery(self): 

1389 """Test that we can run a query that just lists the known 

1390 bands. This is tricky because band is 

1391 backed by a query against physical_filter. 

1392 """ 

1393 registry = self.makeRegistry() 

1394 registry.insertDimensionData("instrument", dict(name="DummyCam")) 

1395 registry.insertDimensionData( 

1396 "physical_filter", 

1397 dict(instrument="DummyCam", name="dummy_i", band="i"), 

1398 dict(instrument="DummyCam", name="dummy_i2", band="i"), 

1399 dict(instrument="DummyCam", name="dummy_r", band="r"), 

1400 ) 

1401 rows = registry.queryDataIds(["band"]).toSet() 

1402 self.assertCountEqual( 

1403 rows, 

1404 [ 

1405 DataCoordinate.standardize(band="i", universe=registry.dimensions), 

1406 DataCoordinate.standardize(band="r", universe=registry.dimensions), 

1407 ], 

1408 ) 

1409 

1410 def testAttributeManager(self): 

1411 """Test basic functionality of attribute manager.""" 

1412 # number of attributes with schema versions in a fresh database, 

1413 # 6 managers with 2 records per manager, plus config for dimensions 

1414 VERSION_COUNT = 6 * 2 + 1 

1415 

1416 registry = self.makeRegistry() 

1417 attributes = registry._managers.attributes 

1418 

1419 # check what get() returns for non-existing key 

1420 self.assertIsNone(attributes.get("attr")) 

1421 self.assertEqual(attributes.get("attr", ""), "") 

1422 self.assertEqual(attributes.get("attr", "Value"), "Value") 

1423 self.assertEqual(len(list(attributes.items())), VERSION_COUNT) 

1424 

1425 # cannot store empty key or value 

1426 with self.assertRaises(ValueError): 

1427 attributes.set("", "value") 

1428 with self.assertRaises(ValueError): 

1429 attributes.set("attr", "") 

1430 

1431 # set value of non-existing key 

1432 attributes.set("attr", "value") 

1433 self.assertEqual(len(list(attributes.items())), VERSION_COUNT + 1) 

1434 self.assertEqual(attributes.get("attr"), "value") 

1435 

1436 # update value of existing key 

1437 with self.assertRaises(ButlerAttributeExistsError): 

1438 attributes.set("attr", "value2") 

1439 

1440 attributes.set("attr", "value2", force=True) 

1441 self.assertEqual(len(list(attributes.items())), VERSION_COUNT + 1) 

1442 self.assertEqual(attributes.get("attr"), "value2") 

1443 

1444 # delete existing key 

1445 self.assertTrue(attributes.delete("attr")) 

1446 self.assertEqual(len(list(attributes.items())), VERSION_COUNT) 

1447 

1448 # delete non-existing key 

1449 self.assertFalse(attributes.delete("non-attr")) 

1450 

1451 # store bunch of keys and get the list back 

1452 data = [ 

1453 ("version.core", "1.2.3"), 

1454 ("version.dimensions", "3.2.1"), 

1455 ("config.managers.opaque", "ByNameOpaqueTableStorageManager"), 

1456 ] 

1457 for key, value in data: 

1458 attributes.set(key, value) 

1459 items = dict(attributes.items()) 

1460 for key, value in data: 

1461 self.assertEqual(items[key], value) 

1462 

1463 def testQueryDatasetsDeduplication(self): 

1464 """Test that the findFirst option to queryDatasets selects datasets 

1465 from collections in the order given". 

1466 """ 

1467 registry = self.makeRegistry() 

1468 self.loadData(registry, "base.yaml") 

1469 self.loadData(registry, "datasets.yaml") 

1470 self.assertCountEqual( 

1471 list(registry.queryDatasets("bias", collections=["imported_g", "imported_r"])), 

1472 [ 

1473 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"), 

1474 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g"), 

1475 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g"), 

1476 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r"), 

1477 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r"), 

1478 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

1479 ], 

1480 ) 

1481 self.assertCountEqual( 

1482 list(registry.queryDatasets("bias", collections=["imported_g", "imported_r"], findFirst=True)), 

1483 [ 

1484 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"), 

1485 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g"), 

1486 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g"), 

1487 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

1488 ], 

1489 ) 

1490 self.assertCountEqual( 

1491 list(registry.queryDatasets("bias", collections=["imported_r", "imported_g"], findFirst=True)), 

1492 [ 

1493 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"), 

1494 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r"), 

1495 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r"), 

1496 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

1497 ], 

1498 ) 

1499 

1500 def testQueryResults(self): 

1501 """Test querying for data IDs and then manipulating the QueryResults 

1502 object returned to perform other queries. 

1503 """ 

1504 registry = self.makeRegistry() 

1505 self.loadData(registry, "base.yaml") 

1506 self.loadData(registry, "datasets.yaml") 

1507 bias = registry.getDatasetType("bias") 

1508 flat = registry.getDatasetType("flat") 

1509 # Obtain expected results from methods other than those we're testing 

1510 # here. That includes: 

1511 # - the dimensions of the data IDs we want to query: 

1512 expected_dimensions = registry.dimensions.conform(["detector", "physical_filter"]) 

1513 # - the dimensions of some other data IDs we'll extract from that: 

1514 expected_subset_dimensions = registry.dimensions.conform(["detector"]) 

1515 # - the data IDs we expect to obtain from the first queries: 

1516 expectedDataIds = DataCoordinateSet( 

1517 { 

1518 DataCoordinate.standardize( 

1519 instrument="Cam1", detector=d, physical_filter=p, universe=registry.dimensions 

1520 ) 

1521 for d, p in itertools.product({1, 2, 3}, {"Cam1-G", "Cam1-R1", "Cam1-R2"}) 

1522 }, 

1523 dimensions=expected_dimensions, 

1524 hasFull=False, 

1525 hasRecords=False, 

1526 ) 

1527 # - the flat datasets we expect to find from those data IDs, in just 

1528 # one collection (so deduplication is irrelevant): 

1529 expectedFlats = [ 

1530 registry.findDataset( 

1531 flat, instrument="Cam1", detector=1, physical_filter="Cam1-R1", collections="imported_r" 

1532 ), 

1533 registry.findDataset( 

1534 flat, instrument="Cam1", detector=2, physical_filter="Cam1-R1", collections="imported_r" 

1535 ), 

1536 registry.findDataset( 

1537 flat, instrument="Cam1", detector=3, physical_filter="Cam1-R2", collections="imported_r" 

1538 ), 

1539 ] 

1540 # - the data IDs we expect to extract from that: 

1541 expectedSubsetDataIds = expectedDataIds.subset(expected_subset_dimensions) 

1542 # - the bias datasets we expect to find from those data IDs, after we 

1543 # subset-out the physical_filter dimension, both with duplicates: 

1544 expectedAllBiases = [ 

1545 registry.findDataset(bias, instrument="Cam1", detector=1, collections="imported_g"), 

1546 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_g"), 

1547 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_g"), 

1548 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_r"), 

1549 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_r"), 

1550 ] 

1551 # - ...and without duplicates: 

1552 expectedDeduplicatedBiases = [ 

1553 registry.findDataset(bias, instrument="Cam1", detector=1, collections="imported_g"), 

1554 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_r"), 

1555 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_r"), 

1556 ] 

1557 # Test against those expected results, using a "lazy" query for the 

1558 # data IDs (which re-executes that query each time we use it to do 

1559 # something new). 

1560 dataIds = registry.queryDataIds( 

1561 ["detector", "physical_filter"], 

1562 where="detector.purpose = 'SCIENCE'", # this rejects detector=4 

1563 instrument="Cam1", 

1564 ) 

1565 self.assertEqual(dataIds.dimensions, expected_dimensions) 

1566 self.assertEqual(dataIds.toSet(), expectedDataIds) 

1567 self.assertCountEqual( 

1568 list( 

1569 dataIds.findDatasets( 

1570 flat, 

1571 collections=["imported_r"], 

1572 ) 

1573 ), 

1574 expectedFlats, 

1575 ) 

1576 subsetDataIds = dataIds.subset(expected_subset_dimensions, unique=True) 

1577 self.assertEqual(subsetDataIds.dimensions, expected_subset_dimensions) 

1578 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds) 

1579 self.assertCountEqual( 

1580 list(subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=False)), 

1581 expectedAllBiases, 

1582 ) 

1583 self.assertCountEqual( 

1584 list(subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=True)), 

1585 expectedDeduplicatedBiases, 

1586 ) 

1587 

1588 # Searching for a dataset with dimensions we had projected away 

1589 # restores those dimensions. 

1590 self.assertCountEqual( 

1591 list(subsetDataIds.findDatasets("flat", collections=["imported_r"], findFirst=True)), 

1592 expectedFlats, 

1593 ) 

1594 

1595 # Use a named dataset type that does not exist and a dataset type 

1596 # object that does not exist. 

1597 unknown_type = DatasetType("not_known", dimensions=bias.dimensions, storageClass="Exposure") 

1598 

1599 # Test both string name and dataset type object. 

1600 test_type: str | DatasetType 

1601 for test_type, test_type_name in ( 

1602 (unknown_type, unknown_type.name), 

1603 (unknown_type.name, unknown_type.name), 

1604 ): 

1605 with self.assertRaisesRegex(DatasetTypeError, expected_regex=test_type_name): 

1606 list( 

1607 subsetDataIds.findDatasets( 

1608 test_type, collections=["imported_r", "imported_g"], findFirst=True 

1609 ) 

1610 ) 

1611 

1612 # Materialize the bias dataset queries (only) by putting the results 

1613 # into temporary tables, then repeat those tests. 

1614 with subsetDataIds.findDatasets( 

1615 bias, collections=["imported_r", "imported_g"], findFirst=False 

1616 ).materialize() as biases: 

1617 self.assertCountEqual(list(biases), expectedAllBiases) 

1618 with subsetDataIds.findDatasets( 

1619 bias, collections=["imported_r", "imported_g"], findFirst=True 

1620 ).materialize() as biases: 

1621 self.assertCountEqual(list(biases), expectedDeduplicatedBiases) 

1622 # Materialize the data ID subset query, but not the dataset queries. 

1623 with subsetDataIds.materialize() as subsetDataIds: 

1624 self.assertEqual(subsetDataIds.dimensions, expected_subset_dimensions) 

1625 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds) 

1626 self.assertCountEqual( 

1627 list( 

1628 subsetDataIds.findDatasets( 

1629 bias, collections=["imported_r", "imported_g"], findFirst=False 

1630 ) 

1631 ), 

1632 expectedAllBiases, 

1633 ) 

1634 self.assertCountEqual( 

1635 list( 

1636 subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=True) 

1637 ), 

1638 expectedDeduplicatedBiases, 

1639 ) 

1640 # Materialize the dataset queries, too. 

1641 with subsetDataIds.findDatasets( 

1642 bias, collections=["imported_r", "imported_g"], findFirst=False 

1643 ).materialize() as biases: 

1644 self.assertCountEqual(list(biases), expectedAllBiases) 

1645 with subsetDataIds.findDatasets( 

1646 bias, collections=["imported_r", "imported_g"], findFirst=True 

1647 ).materialize() as biases: 

1648 self.assertCountEqual(list(biases), expectedDeduplicatedBiases) 

1649 # Materialize the original query, but none of the follow-up queries. 

1650 with dataIds.materialize() as dataIds: 

1651 self.assertEqual(dataIds.dimensions, expected_dimensions) 

1652 self.assertEqual(dataIds.toSet(), expectedDataIds) 

1653 self.assertCountEqual( 

1654 list( 

1655 dataIds.findDatasets( 

1656 flat, 

1657 collections=["imported_r"], 

1658 ) 

1659 ), 

1660 expectedFlats, 

1661 ) 

1662 subsetDataIds = dataIds.subset(expected_subset_dimensions, unique=True) 

1663 self.assertEqual(subsetDataIds.dimensions, expected_subset_dimensions) 

1664 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds) 

1665 self.assertCountEqual( 

1666 list( 

1667 subsetDataIds.findDatasets( 

1668 bias, collections=["imported_r", "imported_g"], findFirst=False 

1669 ) 

1670 ), 

1671 expectedAllBiases, 

1672 ) 

1673 self.assertCountEqual( 

1674 list( 

1675 subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=True) 

1676 ), 

1677 expectedDeduplicatedBiases, 

1678 ) 

1679 # Materialize just the bias dataset queries. 

1680 with subsetDataIds.findDatasets( 

1681 bias, collections=["imported_r", "imported_g"], findFirst=False 

1682 ).materialize() as biases: 

1683 self.assertCountEqual(list(biases), expectedAllBiases) 

1684 with subsetDataIds.findDatasets( 

1685 bias, collections=["imported_r", "imported_g"], findFirst=True 

1686 ).materialize() as biases: 

1687 self.assertCountEqual(list(biases), expectedDeduplicatedBiases) 

1688 # Materialize the subset data ID query, but not the dataset 

1689 # queries. 

1690 with subsetDataIds.materialize() as subsetDataIds: 

1691 self.assertEqual(subsetDataIds.dimensions, expected_subset_dimensions) 

1692 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds) 

1693 self.assertCountEqual( 

1694 list( 

1695 subsetDataIds.findDatasets( 

1696 bias, collections=["imported_r", "imported_g"], findFirst=False 

1697 ) 

1698 ), 

1699 expectedAllBiases, 

1700 ) 

1701 self.assertCountEqual( 

1702 list( 

1703 subsetDataIds.findDatasets( 

1704 bias, collections=["imported_r", "imported_g"], findFirst=True 

1705 ) 

1706 ), 

1707 expectedDeduplicatedBiases, 

1708 ) 

1709 # Materialize the bias dataset queries, too, so now we're 

1710 # materializing every single step. 

1711 with subsetDataIds.findDatasets( 

1712 bias, collections=["imported_r", "imported_g"], findFirst=False 

1713 ).materialize() as biases: 

1714 self.assertCountEqual(list(biases), expectedAllBiases) 

1715 with subsetDataIds.findDatasets( 

1716 bias, collections=["imported_r", "imported_g"], findFirst=True 

1717 ).materialize() as biases: 

1718 self.assertCountEqual(list(biases), expectedDeduplicatedBiases) 

1719 

1720 def testStorageClassPropagation(self): 

1721 """Test that queries for datasets respect the storage class passed in 

1722 as part of a full dataset type. 

1723 """ 

1724 registry = self.makeRegistry() 

1725 self.loadData(registry, "base.yaml") 

1726 dataset_type_in_registry = DatasetType( 

1727 "tbl", dimensions=["instrument"], storageClass="Packages", universe=registry.dimensions 

1728 ) 

1729 registry.registerDatasetType(dataset_type_in_registry) 

1730 run = "run1" 

1731 registry.registerRun(run) 

1732 (inserted_ref,) = registry.insertDatasets( 

1733 dataset_type_in_registry, [registry.expandDataId(instrument="Cam1")], run=run 

1734 ) 

1735 self.assertEqual(inserted_ref.datasetType, dataset_type_in_registry) 

1736 query_dataset_type = DatasetType( 

1737 "tbl", dimensions=["instrument"], storageClass="StructuredDataDict", universe=registry.dimensions 

1738 ) 

1739 self.assertNotEqual(dataset_type_in_registry, query_dataset_type) 

1740 query_datasets_result = registry.queryDatasets(query_dataset_type, collections=[run]) 

1741 self.assertEqual(query_datasets_result.parentDatasetType, query_dataset_type) # type: ignore 

1742 (query_datasets_ref,) = query_datasets_result 

1743 self.assertEqual(query_datasets_ref.datasetType, query_dataset_type) 

1744 query_data_ids_find_datasets_result = registry.queryDataIds(["instrument"]).findDatasets( 

1745 query_dataset_type, collections=[run] 

1746 ) 

1747 self.assertEqual(query_data_ids_find_datasets_result.parentDatasetType, query_dataset_type) 

1748 (query_data_ids_find_datasets_ref,) = query_data_ids_find_datasets_result 

1749 self.assertEqual(query_data_ids_find_datasets_ref.datasetType, query_dataset_type) 

1750 query_dataset_types_result = registry.queryDatasetTypes(query_dataset_type) 

1751 self.assertEqual(list(query_dataset_types_result), [query_dataset_type]) 

1752 find_dataset_ref = registry.findDataset(query_dataset_type, instrument="Cam1", collections=[run]) 

1753 self.assertEqual(find_dataset_ref.datasetType, query_dataset_type) 

1754 

1755 def testEmptyDimensionsQueries(self): 

1756 """Test Query and QueryResults objects in the case where there are no 

1757 dimensions. 

1758 """ 

1759 # Set up test data: one dataset type, two runs, one dataset in each. 

1760 registry = self.makeRegistry() 

1761 self.loadData(registry, "base.yaml") 

1762 schema = DatasetType("schema", dimensions=registry.dimensions.empty, storageClass="Catalog") 

1763 registry.registerDatasetType(schema) 

1764 dataId = DataCoordinate.make_empty(registry.dimensions) 

1765 run1 = "run1" 

1766 run2 = "run2" 

1767 registry.registerRun(run1) 

1768 registry.registerRun(run2) 

1769 (dataset1,) = registry.insertDatasets(schema, dataIds=[dataId], run=run1) 

1770 (dataset2,) = registry.insertDatasets(schema, dataIds=[dataId], run=run2) 

1771 # Query directly for both of the datasets, and each one, one at a time. 

1772 self.checkQueryResults( 

1773 registry.queryDatasets(schema, collections=[run1, run2], findFirst=False), [dataset1, dataset2] 

1774 ) 

1775 self.checkQueryResults( 

1776 registry.queryDatasets(schema, collections=[run1, run2], findFirst=True), 

1777 [dataset1], 

1778 ) 

1779 self.checkQueryResults( 

1780 registry.queryDatasets(schema, collections=[run2, run1], findFirst=True), 

1781 [dataset2], 

1782 ) 

1783 # Query for data IDs with no dimensions. 

1784 dataIds = registry.queryDataIds([]) 

1785 self.checkQueryResults(dataIds, [dataId]) 

1786 # Use queried data IDs to find the datasets. 

1787 self.checkQueryResults( 

1788 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False), 

1789 [dataset1, dataset2], 

1790 ) 

1791 self.checkQueryResults( 

1792 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), 

1793 [dataset1], 

1794 ) 

1795 self.checkQueryResults( 

1796 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), 

1797 [dataset2], 

1798 ) 

1799 # Now materialize the data ID query results and repeat those tests. 

1800 with dataIds.materialize() as dataIds: 

1801 self.checkQueryResults(dataIds, [dataId]) 

1802 self.checkQueryResults( 

1803 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), 

1804 [dataset1], 

1805 ) 

1806 self.checkQueryResults( 

1807 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), 

1808 [dataset2], 

1809 ) 

1810 # Query for non-empty data IDs, then subset that to get the empty one. 

1811 # Repeat the above tests starting from that. 

1812 dataIds = registry.queryDataIds(["instrument"]).subset(registry.dimensions.empty, unique=True) 

1813 self.checkQueryResults(dataIds, [dataId]) 

1814 self.checkQueryResults( 

1815 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False), 

1816 [dataset1, dataset2], 

1817 ) 

1818 self.checkQueryResults( 

1819 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), 

1820 [dataset1], 

1821 ) 

1822 self.checkQueryResults( 

1823 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), 

1824 [dataset2], 

1825 ) 

1826 with dataIds.materialize() as dataIds: 

1827 self.checkQueryResults(dataIds, [dataId]) 

1828 self.checkQueryResults( 

1829 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False), 

1830 [dataset1, dataset2], 

1831 ) 

1832 self.checkQueryResults( 

1833 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), 

1834 [dataset1], 

1835 ) 

1836 self.checkQueryResults( 

1837 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), 

1838 [dataset2], 

1839 ) 

1840 # Query for non-empty data IDs, then materialize, then subset to get 

1841 # the empty one. Repeat again. 

1842 with registry.queryDataIds(["instrument"]).materialize() as nonEmptyDataIds: 

1843 dataIds = nonEmptyDataIds.subset(registry.dimensions.empty, unique=True) 

1844 self.checkQueryResults(dataIds, [dataId]) 

1845 self.checkQueryResults( 

1846 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False), 

1847 [dataset1, dataset2], 

1848 ) 

1849 self.checkQueryResults( 

1850 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), 

1851 [dataset1], 

1852 ) 

1853 self.checkQueryResults( 

1854 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), 

1855 [dataset2], 

1856 ) 

1857 with dataIds.materialize() as dataIds: 

1858 self.checkQueryResults(dataIds, [dataId]) 

1859 self.checkQueryResults( 

1860 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False), 

1861 [dataset1, dataset2], 

1862 ) 

1863 self.checkQueryResults( 

1864 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), 

1865 [dataset1], 

1866 ) 

1867 self.checkQueryResults( 

1868 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), 

1869 [dataset2], 

1870 ) 

1871 # Repeat the materialization tests with a dimension element that isn't 

1872 # cached, so there's no way we can know when building the query where 

1873 # there are any rows are not (there aren't). 

1874 dataIds = registry.queryDataIds(["exposure"]).subset(registry.dimensions.empty, unique=True) 

1875 with dataIds.materialize() as dataIds: 

1876 self.checkQueryResults(dataIds, []) 

1877 self.checkQueryResults( 

1878 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False), [] 

1879 ) 

1880 self.checkQueryResults(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), []) 

1881 self.checkQueryResults(dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), []) 

1882 # Query for non-empty data IDs with a constraint on an empty-data-ID 

1883 # dataset that exists. 

1884 dataIds = registry.queryDataIds(["instrument"], datasets="schema", collections=...) 

1885 self.checkQueryResults( 

1886 dataIds.subset(unique=True), 

1887 [DataCoordinate.standardize(instrument="Cam1", universe=registry.dimensions)], 

1888 ) 

1889 # Again query for non-empty data IDs with a constraint on empty-data-ID 

1890 # datasets, but when the datasets don't exist. We delete the existing 

1891 # dataset and query just that collection rather than creating a new 

1892 # empty collection because this is a bit less likely for our build-time 

1893 # logic to shortcut-out (via the collection summaries), and such a 

1894 # shortcut would make this test a bit more trivial than we'd like. 

1895 registry.removeDatasets([dataset2]) 

1896 dataIds = registry.queryDataIds(["instrument"], datasets="schema", collections=run2) 

1897 self.checkQueryResults(dataIds, []) 

1898 

1899 def testDimensionDataModifications(self): 

1900 """Test that modifying dimension records via: 

1901 syncDimensionData(..., update=True) and 

1902 insertDimensionData(..., replace=True) works as expected, even in the 

1903 presence of datasets using those dimensions and spatial overlap 

1904 relationships. 

1905 """ 

1906 

1907 def _unpack_range_set(ranges: lsst.sphgeom.RangeSet) -> Iterator[int]: 

1908 """Unpack a sphgeom.RangeSet into the integers it contains.""" 

1909 for begin, end in ranges: 

1910 yield from range(begin, end) 

1911 

1912 def _range_set_hull( 

1913 ranges: lsst.sphgeom.RangeSet, 

1914 pixelization: lsst.sphgeom.HtmPixelization, 

1915 ) -> lsst.sphgeom.ConvexPolygon: 

1916 """Create a ConvexPolygon hull of the region defined by a set of 

1917 HTM pixelization index ranges. 

1918 """ 

1919 points = [] 

1920 for index in _unpack_range_set(ranges): 

1921 points.extend(pixelization.triangle(index).getVertices()) 

1922 return lsst.sphgeom.ConvexPolygon(points) 

1923 

1924 # Use HTM to set up an initial parent region (one arbitrary trixel) 

1925 # and four child regions (the trixels within the parent at the next 

1926 # level. We'll use the parent as a tract/visit region and the children 

1927 # as its patch/visit_detector regions. 

1928 registry = self.makeRegistry() 

1929 htm6 = registry.dimensions.skypix["htm"][6].pixelization 

1930 commonSkyPix = registry.dimensions.commonSkyPix.pixelization 

1931 index = 12288 

1932 child_ranges_small = lsst.sphgeom.RangeSet(index).scaled(4) 

1933 assert htm6.universe().contains(child_ranges_small) 

1934 child_regions_small = [htm6.triangle(i) for i in _unpack_range_set(child_ranges_small)] 

1935 parent_region_small = lsst.sphgeom.ConvexPolygon( 

1936 list(itertools.chain.from_iterable(c.getVertices() for c in child_regions_small)) 

1937 ) 

1938 assert all(parent_region_small.contains(c) for c in child_regions_small) 

1939 # Make a larger version of each child region, defined to be the set of 

1940 # htm6 trixels that overlap the original's bounding circle. Make a new 

1941 # parent that's the convex hull of the new children. 

1942 child_regions_large = [ 

1943 _range_set_hull(htm6.envelope(c.getBoundingCircle()), htm6) for c in child_regions_small 

1944 ] 

1945 assert all( 

1946 large.contains(small) 

1947 for large, small in zip(child_regions_large, child_regions_small, strict=True) 

1948 ) 

1949 parent_region_large = lsst.sphgeom.ConvexPolygon( 

1950 list(itertools.chain.from_iterable(c.getVertices() for c in child_regions_large)) 

1951 ) 

1952 assert all(parent_region_large.contains(c) for c in child_regions_large) 

1953 assert parent_region_large.contains(parent_region_small) 

1954 assert not parent_region_small.contains(parent_region_large) 

1955 assert not all(parent_region_small.contains(c) for c in child_regions_large) 

1956 # Find some commonSkyPix indices that overlap the large regions but not 

1957 # overlap the small regions. We use commonSkyPix here to make sure the 

1958 # real tests later involve what's in the database, not just post-query 

1959 # filtering of regions. 

1960 child_difference_indices = [] 

1961 for large, small in zip(child_regions_large, child_regions_small, strict=True): 

1962 difference = list(_unpack_range_set(commonSkyPix.envelope(large) - commonSkyPix.envelope(small))) 

1963 assert difference, "if this is empty, we can't test anything useful with these regions" 

1964 assert all( 

1965 not commonSkyPix.triangle(d).isDisjointFrom(large) 

1966 and commonSkyPix.triangle(d).isDisjointFrom(small) 

1967 for d in difference 

1968 ) 

1969 child_difference_indices.append(difference) 

1970 parent_difference_indices = list( 

1971 _unpack_range_set( 

1972 commonSkyPix.envelope(parent_region_large) - commonSkyPix.envelope(parent_region_small) 

1973 ) 

1974 ) 

1975 assert parent_difference_indices, "if this is empty, we can't test anything useful with these regions" 

1976 assert all( 

1977 ( 

1978 not commonSkyPix.triangle(d).isDisjointFrom(parent_region_large) 

1979 and commonSkyPix.triangle(d).isDisjointFrom(parent_region_small) 

1980 ) 

1981 for d in parent_difference_indices 

1982 ) 

1983 # Now that we've finally got those regions, we'll insert the large ones 

1984 # as tract/patch dimension records. 

1985 skymap_name = "testing_v1" 

1986 registry.insertDimensionData( 

1987 "skymap", 

1988 { 

1989 "name": skymap_name, 

1990 "hash": bytes([42]), 

1991 "tract_max": 1, 

1992 "patch_nx_max": 2, 

1993 "patch_ny_max": 2, 

1994 }, 

1995 ) 

1996 registry.insertDimensionData("tract", {"skymap": skymap_name, "id": 0, "region": parent_region_large}) 

1997 registry.insertDimensionData( 

1998 "patch", 

1999 *[ 

2000 {"skymap": skymap_name, "tract": 0, "id": n, "cell_x": n % 2, "cell_y": n // 2, "region": c} 

2001 for n, c in enumerate(child_regions_large) 

2002 ], 

2003 ) 

2004 # Add at dataset that uses these dimensions to make sure that modifying 

2005 # them doesn't disrupt foreign keys (need to make sure DB doesn't 

2006 # implement insert with replace=True as delete-then-insert). 

2007 dataset_type = DatasetType( 

2008 "coadd", 

2009 dimensions=["tract", "patch"], 

2010 universe=registry.dimensions, 

2011 storageClass="Exposure", 

2012 ) 

2013 registry.registerDatasetType(dataset_type) 

2014 registry.registerCollection("the_run", CollectionType.RUN) 

2015 registry.insertDatasets( 

2016 dataset_type, 

2017 [{"skymap": skymap_name, "tract": 0, "patch": 2}], 

2018 run="the_run", 

2019 ) 

2020 # Query for tracts and patches that overlap some "difference" htm9 

2021 # pixels; there should be overlaps, because the database has 

2022 # the "large" suite of regions. 

2023 self.assertEqual( 

2024 {0}, 

2025 { 

2026 data_id["tract"] 

2027 for data_id in registry.queryDataIds( 

2028 ["tract"], 

2029 skymap=skymap_name, 

2030 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]}, 

2031 ) 

2032 }, 

2033 ) 

2034 for patch_id, patch_difference_indices in enumerate(child_difference_indices): 

2035 self.assertIn( 

2036 patch_id, 

2037 { 

2038 data_id["patch"] 

2039 for data_id in registry.queryDataIds( 

2040 ["patch"], 

2041 skymap=skymap_name, 

2042 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]}, 

2043 ) 

2044 }, 

2045 ) 

2046 # Use sync to update the tract region and insert to update the regions 

2047 # of the patches, to the "small" suite. 

2048 updated = registry.syncDimensionData( 

2049 "tract", 

2050 {"skymap": skymap_name, "id": 0, "region": parent_region_small}, 

2051 update=True, 

2052 ) 

2053 self.assertEqual(updated, {"region": parent_region_large}) 

2054 registry.insertDimensionData( 

2055 "patch", 

2056 *[ 

2057 {"skymap": skymap_name, "tract": 0, "id": n, "cell_x": n % 2, "cell_y": n // 2, "region": c} 

2058 for n, c in enumerate(child_regions_small) 

2059 ], 

2060 replace=True, 

2061 ) 

2062 # Query again; there now should be no such overlaps, because the 

2063 # database has the "small" suite of regions. 

2064 self.assertFalse( 

2065 set( 

2066 registry.queryDataIds( 

2067 ["tract"], 

2068 skymap=skymap_name, 

2069 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]}, 

2070 ) 

2071 ) 

2072 ) 

2073 for patch_id, patch_difference_indices in enumerate(child_difference_indices): 

2074 self.assertNotIn( 

2075 patch_id, 

2076 { 

2077 data_id["patch"] 

2078 for data_id in registry.queryDataIds( 

2079 ["patch"], 

2080 skymap=skymap_name, 

2081 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]}, 

2082 ) 

2083 }, 

2084 ) 

2085 # Update back to the large regions and query one more time. 

2086 updated = registry.syncDimensionData( 

2087 "tract", 

2088 {"skymap": skymap_name, "id": 0, "region": parent_region_large}, 

2089 update=True, 

2090 ) 

2091 self.assertEqual(updated, {"region": parent_region_small}) 

2092 registry.insertDimensionData( 

2093 "patch", 

2094 *[ 

2095 {"skymap": skymap_name, "tract": 0, "id": n, "cell_x": n % 2, "cell_y": n // 2, "region": c} 

2096 for n, c in enumerate(child_regions_large) 

2097 ], 

2098 replace=True, 

2099 ) 

2100 self.assertEqual( 

2101 {0}, 

2102 { 

2103 data_id["tract"] 

2104 for data_id in registry.queryDataIds( 

2105 ["tract"], 

2106 skymap=skymap_name, 

2107 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]}, 

2108 ) 

2109 }, 

2110 ) 

2111 for patch_id, patch_difference_indices in enumerate(child_difference_indices): 

2112 self.assertIn( 

2113 patch_id, 

2114 { 

2115 data_id["patch"] 

2116 for data_id in registry.queryDataIds( 

2117 ["patch"], 

2118 skymap=skymap_name, 

2119 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]}, 

2120 ) 

2121 }, 

2122 ) 

2123 

2124 def testCalibrationCollections(self): 

2125 """Test operations on `~CollectionType.CALIBRATION` collections, 

2126 including `SqlRegistry.certify`, `SqlRegistry.decertify`, 

2127 `SqlRegistry.findDataset`, and 

2128 `DataCoordinateQueryResults.findRelatedDatasets`. 

2129 """ 

2130 # Setup - make a Registry, fill it with some datasets in 

2131 # non-calibration collections. 

2132 registry = self.makeRegistry() 

2133 self.loadData(registry, "base.yaml") 

2134 self.loadData(registry, "datasets.yaml") 

2135 # Set up some timestamps. 

2136 t1 = astropy.time.Time("2020-01-01T01:00:00", format="isot", scale="tai") 

2137 t2 = astropy.time.Time("2020-01-01T02:00:00", format="isot", scale="tai") 

2138 t3 = astropy.time.Time("2020-01-01T03:00:00", format="isot", scale="tai") 

2139 t4 = astropy.time.Time("2020-01-01T04:00:00", format="isot", scale="tai") 

2140 t5 = astropy.time.Time("2020-01-01T05:00:00", format="isot", scale="tai") 

2141 allTimespans = [ 

2142 Timespan(a, b) for a, b in itertools.combinations([None, t1, t2, t3, t4, t5, None], r=2) 

2143 ] 

2144 # Insert some exposure records with timespans between each sequential 

2145 # pair of those. 

2146 registry.insertDimensionData( 

2147 "day_obs", {"instrument": "Cam1", "id": 20200101, "timespan": Timespan(t1, t5)} 

2148 ) 

2149 registry.insertDimensionData( 

2150 "group", 

2151 {"instrument": "Cam1", "name": "group0"}, 

2152 {"instrument": "Cam1", "name": "group1"}, 

2153 {"instrument": "Cam1", "name": "group2"}, 

2154 {"instrument": "Cam1", "name": "group3"}, 

2155 ) 

2156 registry.insertDimensionData( 

2157 "exposure", 

2158 { 

2159 "instrument": "Cam1", 

2160 "id": 0, 

2161 "group": "group0", 

2162 "obs_id": "zero", 

2163 "physical_filter": "Cam1-G", 

2164 "day_obs": 20200101, 

2165 "timespan": Timespan(t1, t2), 

2166 }, 

2167 { 

2168 "instrument": "Cam1", 

2169 "id": 1, 

2170 "group": "group1", 

2171 "obs_id": "one", 

2172 "physical_filter": "Cam1-G", 

2173 "day_obs": 20200101, 

2174 "timespan": Timespan(t2, t3), 

2175 }, 

2176 { 

2177 "instrument": "Cam1", 

2178 "id": 2, 

2179 "group": "group2", 

2180 "obs_id": "two", 

2181 "physical_filter": "Cam1-G", 

2182 "day_obs": 20200101, 

2183 "timespan": Timespan(t3, t4), 

2184 }, 

2185 { 

2186 "instrument": "Cam1", 

2187 "id": 3, 

2188 "group": "group3", 

2189 "obs_id": "three", 

2190 "physical_filter": "Cam1-G", 

2191 "day_obs": 20200101, 

2192 "timespan": Timespan(t4, t5), 

2193 }, 

2194 ) 

2195 # Get references to some datasets. 

2196 bias2a = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g") 

2197 bias3a = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g") 

2198 bias2b = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r") 

2199 bias3b = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r") 

2200 # Register the main calibration collection we'll be working with. 

2201 collection = "Cam1/calibs/default" 

2202 registry.registerCollection(collection, type=CollectionType.CALIBRATION) 

2203 # Cannot associate into a calibration collection (no timespan). 

2204 with self.assertRaises(CollectionTypeError): 

2205 registry.associate(collection, [bias2a]) 

2206 # Certify 2a dataset with [t2, t4) validity. 

2207 registry.certify(collection, [bias2a], Timespan(begin=t2, end=t4)) 

2208 # Test that we can query for this dataset via the new collection, both 

2209 # on its own and with a RUN collection. 

2210 self.assertEqual( 

2211 set(registry.queryDatasets("bias", findFirst=False, collections=collection)), 

2212 {bias2a}, 

2213 ) 

2214 self.assertEqual( 

2215 set(registry.queryDatasets("bias", findFirst=False, collections=[collection, "imported_r"])), 

2216 { 

2217 bias2a, 

2218 bias2b, 

2219 bias3b, 

2220 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

2221 }, 

2222 ) 

2223 self.assertEqual( 

2224 set(registry.queryDataIds("detector", datasets="bias", collections=collection)), 

2225 {registry.expandDataId(instrument="Cam1", detector=2)}, 

2226 ) 

2227 self.assertEqual( 

2228 set(registry.queryDataIds("detector", datasets="bias", collections=[collection, "imported_r"])), 

2229 { 

2230 registry.expandDataId(instrument="Cam1", detector=2), 

2231 registry.expandDataId(instrument="Cam1", detector=3), 

2232 registry.expandDataId(instrument="Cam1", detector=4), 

2233 }, 

2234 ) 

2235 self.assertEqual( 

2236 set( 

2237 registry.queryDataIds(["exposure", "detector"]).findRelatedDatasets( 

2238 "bias", findFirst=True, collections=[collection] 

2239 ) 

2240 ), 

2241 { 

2242 (registry.expandDataId(instrument="Cam1", detector=2, exposure=1), bias2a), 

2243 (registry.expandDataId(instrument="Cam1", detector=2, exposure=2), bias2a), 

2244 }, 

2245 ) 

2246 self.assertEqual( 

2247 set( 

2248 registry.queryDataIds( 

2249 ["exposure", "detector"], instrument="Cam1", detector=2 

2250 ).findRelatedDatasets("bias", findFirst=True, collections=[collection, "imported_r"]) 

2251 ), 

2252 { 

2253 (registry.expandDataId(instrument="Cam1", detector=2, exposure=1), bias2a), 

2254 (registry.expandDataId(instrument="Cam1", detector=2, exposure=2), bias2a), 

2255 (registry.expandDataId(instrument="Cam1", detector=2, exposure=0), bias2b), 

2256 (registry.expandDataId(instrument="Cam1", detector=2, exposure=3), bias2b), 

2257 }, 

2258 ) 

2259 

2260 # We should not be able to certify 2b with anything overlapping that 

2261 # window. 

2262 with self.assertRaises(ConflictingDefinitionError): 

2263 registry.certify(collection, [bias2b], Timespan(begin=None, end=t3)) 

2264 with self.assertRaises(ConflictingDefinitionError): 

2265 registry.certify(collection, [bias2b], Timespan(begin=None, end=t5)) 

2266 with self.assertRaises(ConflictingDefinitionError): 

2267 registry.certify(collection, [bias2b], Timespan(begin=t1, end=t3)) 

2268 with self.assertRaises(ConflictingDefinitionError): 

2269 registry.certify(collection, [bias2b], Timespan(begin=t1, end=t5)) 

2270 with self.assertRaises(ConflictingDefinitionError): 

2271 registry.certify(collection, [bias2b], Timespan(begin=t1, end=None)) 

2272 with self.assertRaises(ConflictingDefinitionError): 

2273 registry.certify(collection, [bias2b], Timespan(begin=t2, end=t3)) 

2274 with self.assertRaises(ConflictingDefinitionError): 

2275 registry.certify(collection, [bias2b], Timespan(begin=t2, end=t5)) 

2276 with self.assertRaises(ConflictingDefinitionError): 

2277 registry.certify(collection, [bias2b], Timespan(begin=t2, end=None)) 

2278 # We should be able to certify 3a with a range overlapping that window, 

2279 # because it's for a different detector. 

2280 # We'll certify 3a over [t1, t3). 

2281 registry.certify(collection, [bias3a], Timespan(begin=t1, end=t3)) 

2282 # Now we'll certify 2b and 3b together over [t4, ∞). 

2283 registry.certify(collection, [bias2b, bias3b], Timespan(begin=t4, end=None)) 

2284 

2285 # Fetch all associations and check that they are what we expect. 

2286 self.assertCountEqual( 

2287 list( 

2288 registry.queryDatasetAssociations( 

2289 "bias", 

2290 collections=[collection, "imported_g", "imported_r"], 

2291 ) 

2292 ), 

2293 [ 

2294 DatasetAssociation( 

2295 ref=registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"), 

2296 collection="imported_g", 

2297 timespan=None, 

2298 ), 

2299 DatasetAssociation( 

2300 ref=registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

2301 collection="imported_r", 

2302 timespan=None, 

2303 ), 

2304 DatasetAssociation(ref=bias2a, collection="imported_g", timespan=None), 

2305 DatasetAssociation(ref=bias3a, collection="imported_g", timespan=None), 

2306 DatasetAssociation(ref=bias2b, collection="imported_r", timespan=None), 

2307 DatasetAssociation(ref=bias3b, collection="imported_r", timespan=None), 

2308 DatasetAssociation(ref=bias2a, collection=collection, timespan=Timespan(begin=t2, end=t4)), 

2309 DatasetAssociation(ref=bias3a, collection=collection, timespan=Timespan(begin=t1, end=t3)), 

2310 DatasetAssociation(ref=bias2b, collection=collection, timespan=Timespan(begin=t4, end=None)), 

2311 DatasetAssociation(ref=bias3b, collection=collection, timespan=Timespan(begin=t4, end=None)), 

2312 ], 

2313 ) 

2314 

2315 class Ambiguous: 

2316 """Tag class to denote lookups that should be ambiguous.""" 

2317 

2318 pass 

2319 

2320 def _assertLookup( 

2321 detector: int, timespan: Timespan, expected: DatasetRef | type[Ambiguous] | None 

2322 ) -> None: 

2323 """Local function that asserts that a bias lookup returns the given 

2324 expected result. 

2325 """ 

2326 if expected is Ambiguous: 

2327 with self.assertRaises((DatasetTypeError, LookupError)): 

2328 registry.findDataset( 

2329 "bias", 

2330 collections=collection, 

2331 instrument="Cam1", 

2332 detector=detector, 

2333 timespan=timespan, 

2334 ) 

2335 else: 

2336 self.assertEqual( 

2337 expected, 

2338 registry.findDataset( 

2339 "bias", 

2340 collections=collection, 

2341 instrument="Cam1", 

2342 detector=detector, 

2343 timespan=timespan, 

2344 ), 

2345 ) 

2346 

2347 # Systematically test lookups against expected results. 

2348 _assertLookup(detector=2, timespan=Timespan(None, t1), expected=None) 

2349 _assertLookup(detector=2, timespan=Timespan(None, t2), expected=None) 

2350 _assertLookup(detector=2, timespan=Timespan(None, t3), expected=bias2a) 

2351 _assertLookup(detector=2, timespan=Timespan(None, t4), expected=bias2a) 

2352 _assertLookup(detector=2, timespan=Timespan(None, t5), expected=Ambiguous) 

2353 _assertLookup(detector=2, timespan=Timespan(None, None), expected=Ambiguous) 

2354 _assertLookup(detector=2, timespan=Timespan(t1, t2), expected=None) 

2355 _assertLookup(detector=2, timespan=Timespan(t1, t3), expected=bias2a) 

2356 _assertLookup(detector=2, timespan=Timespan(t1, t4), expected=bias2a) 

2357 _assertLookup(detector=2, timespan=Timespan(t1, t5), expected=Ambiguous) 

2358 _assertLookup(detector=2, timespan=Timespan(t1, None), expected=Ambiguous) 

2359 _assertLookup(detector=2, timespan=Timespan(t2, t3), expected=bias2a) 

2360 _assertLookup(detector=2, timespan=Timespan(t2, t4), expected=bias2a) 

2361 _assertLookup(detector=2, timespan=Timespan(t2, t5), expected=Ambiguous) 

2362 _assertLookup(detector=2, timespan=Timespan(t2, None), expected=Ambiguous) 

2363 _assertLookup(detector=2, timespan=Timespan(t3, t4), expected=bias2a) 

2364 _assertLookup(detector=2, timespan=Timespan(t3, t5), expected=Ambiguous) 

2365 _assertLookup(detector=2, timespan=Timespan(t3, None), expected=Ambiguous) 

2366 _assertLookup(detector=2, timespan=Timespan(t4, t5), expected=bias2b) 

2367 _assertLookup(detector=2, timespan=Timespan(t4, None), expected=bias2b) 

2368 _assertLookup(detector=2, timespan=Timespan(t5, None), expected=bias2b) 

2369 _assertLookup(detector=3, timespan=Timespan(None, t1), expected=None) 

2370 _assertLookup(detector=3, timespan=Timespan(None, t2), expected=bias3a) 

2371 _assertLookup(detector=3, timespan=Timespan(None, t3), expected=bias3a) 

2372 _assertLookup(detector=3, timespan=Timespan(None, t4), expected=bias3a) 

2373 _assertLookup(detector=3, timespan=Timespan(None, t5), expected=Ambiguous) 

2374 _assertLookup(detector=3, timespan=Timespan(None, None), expected=Ambiguous) 

2375 _assertLookup(detector=3, timespan=Timespan(t1, t2), expected=bias3a) 

2376 _assertLookup(detector=3, timespan=Timespan(t1, t3), expected=bias3a) 

2377 _assertLookup(detector=3, timespan=Timespan(t1, t4), expected=bias3a) 

2378 _assertLookup(detector=3, timespan=Timespan(t1, t5), expected=Ambiguous) 

2379 _assertLookup(detector=3, timespan=Timespan(t1, None), expected=Ambiguous) 

2380 _assertLookup(detector=3, timespan=Timespan(t2, t3), expected=bias3a) 

2381 _assertLookup(detector=3, timespan=Timespan(t2, t4), expected=bias3a) 

2382 _assertLookup(detector=3, timespan=Timespan(t2, t5), expected=Ambiguous) 

2383 _assertLookup(detector=3, timespan=Timespan(t2, None), expected=Ambiguous) 

2384 _assertLookup(detector=3, timespan=Timespan(t3, t4), expected=None) 

2385 _assertLookup(detector=3, timespan=Timespan(t3, t5), expected=bias3b) 

2386 _assertLookup(detector=3, timespan=Timespan(t3, None), expected=bias3b) 

2387 _assertLookup(detector=3, timespan=Timespan(t4, t5), expected=bias3b) 

2388 _assertLookup(detector=3, timespan=Timespan(t4, None), expected=bias3b) 

2389 _assertLookup(detector=3, timespan=Timespan(t5, None), expected=bias3b) 

2390 

2391 # Test lookups via temporal joins to exposures. 

2392 self.assertEqual( 

2393 set( 

2394 registry.queryDataIds( 

2395 ["exposure", "detector"], instrument="Cam1", detector=2 

2396 ).findRelatedDatasets("bias", collections=[collection]) 

2397 ), 

2398 { 

2399 (registry.expandDataId(instrument="Cam1", exposure=1, detector=2), bias2a), 

2400 (registry.expandDataId(instrument="Cam1", exposure=2, detector=2), bias2a), 

2401 (registry.expandDataId(instrument="Cam1", exposure=3, detector=2), bias2b), 

2402 }, 

2403 ) 

2404 self.assertEqual( 

2405 set( 

2406 registry.queryDataIds( 

2407 ["exposure", "detector"], instrument="Cam1", detector=3 

2408 ).findRelatedDatasets("bias", collections=[collection]) 

2409 ), 

2410 { 

2411 (registry.expandDataId(instrument="Cam1", exposure=0, detector=3), bias3a), 

2412 (registry.expandDataId(instrument="Cam1", exposure=1, detector=3), bias3a), 

2413 (registry.expandDataId(instrument="Cam1", exposure=3, detector=3), bias3b), 

2414 }, 

2415 ) 

2416 self.assertEqual( 

2417 set( 

2418 registry.queryDataIds( 

2419 ["exposure", "detector"], instrument="Cam1", detector=2 

2420 ).findRelatedDatasets("bias", collections=[collection, "imported_g"]) 

2421 ), 

2422 { 

2423 (registry.expandDataId(instrument="Cam1", exposure=0, detector=2), bias2a), 

2424 (registry.expandDataId(instrument="Cam1", exposure=1, detector=2), bias2a), 

2425 (registry.expandDataId(instrument="Cam1", exposure=2, detector=2), bias2a), 

2426 (registry.expandDataId(instrument="Cam1", exposure=3, detector=2), bias2b), 

2427 }, 

2428 ) 

2429 self.assertEqual( 

2430 set( 

2431 registry.queryDataIds( 

2432 ["exposure", "detector"], instrument="Cam1", detector=3 

2433 ).findRelatedDatasets("bias", collections=[collection, "imported_g"]) 

2434 ), 

2435 { 

2436 (registry.expandDataId(instrument="Cam1", exposure=0, detector=3), bias3a), 

2437 (registry.expandDataId(instrument="Cam1", exposure=1, detector=3), bias3a), 

2438 (registry.expandDataId(instrument="Cam1", exposure=2, detector=3), bias3a), 

2439 (registry.expandDataId(instrument="Cam1", exposure=3, detector=3), bias3b), 

2440 }, 

2441 ) 

2442 

2443 # Decertify [t3, t5) for all data IDs, and do test lookups again. 

2444 # This should truncate bias2a to [t2, t3), leave bias3a unchanged at 

2445 # [t1, t3), and truncate bias2b and bias3b to [t5, ∞). 

2446 registry.decertify(collection=collection, datasetType="bias", timespan=Timespan(t3, t5)) 

2447 _assertLookup(detector=2, timespan=Timespan(None, t1), expected=None) 

2448 _assertLookup(detector=2, timespan=Timespan(None, t2), expected=None) 

2449 _assertLookup(detector=2, timespan=Timespan(None, t3), expected=bias2a) 

2450 _assertLookup(detector=2, timespan=Timespan(None, t4), expected=bias2a) 

2451 _assertLookup(detector=2, timespan=Timespan(None, t5), expected=bias2a) 

2452 _assertLookup(detector=2, timespan=Timespan(None, None), expected=Ambiguous) 

2453 _assertLookup(detector=2, timespan=Timespan(t1, t2), expected=None) 

2454 _assertLookup(detector=2, timespan=Timespan(t1, t3), expected=bias2a) 

2455 _assertLookup(detector=2, timespan=Timespan(t1, t4), expected=bias2a) 

2456 _assertLookup(detector=2, timespan=Timespan(t1, t5), expected=bias2a) 

2457 _assertLookup(detector=2, timespan=Timespan(t1, None), expected=Ambiguous) 

2458 _assertLookup(detector=2, timespan=Timespan(t2, t3), expected=bias2a) 

2459 _assertLookup(detector=2, timespan=Timespan(t2, t4), expected=bias2a) 

2460 _assertLookup(detector=2, timespan=Timespan(t2, t5), expected=bias2a) 

2461 _assertLookup(detector=2, timespan=Timespan(t2, None), expected=Ambiguous) 

2462 _assertLookup(detector=2, timespan=Timespan(t3, t4), expected=None) 

2463 _assertLookup(detector=2, timespan=Timespan(t3, t5), expected=None) 

2464 _assertLookup(detector=2, timespan=Timespan(t3, None), expected=bias2b) 

2465 _assertLookup(detector=2, timespan=Timespan(t4, t5), expected=None) 

2466 _assertLookup(detector=2, timespan=Timespan(t4, None), expected=bias2b) 

2467 _assertLookup(detector=2, timespan=Timespan(t5, None), expected=bias2b) 

2468 _assertLookup(detector=3, timespan=Timespan(None, t1), expected=None) 

2469 _assertLookup(detector=3, timespan=Timespan(None, t2), expected=bias3a) 

2470 _assertLookup(detector=3, timespan=Timespan(None, t3), expected=bias3a) 

2471 _assertLookup(detector=3, timespan=Timespan(None, t4), expected=bias3a) 

2472 _assertLookup(detector=3, timespan=Timespan(None, t5), expected=bias3a) 

2473 _assertLookup(detector=3, timespan=Timespan(None, None), expected=Ambiguous) 

2474 _assertLookup(detector=3, timespan=Timespan(t1, t2), expected=bias3a) 

2475 _assertLookup(detector=3, timespan=Timespan(t1, t3), expected=bias3a) 

2476 _assertLookup(detector=3, timespan=Timespan(t1, t4), expected=bias3a) 

2477 _assertLookup(detector=3, timespan=Timespan(t1, t5), expected=bias3a) 

2478 _assertLookup(detector=3, timespan=Timespan(t1, None), expected=Ambiguous) 

2479 _assertLookup(detector=3, timespan=Timespan(t2, t3), expected=bias3a) 

2480 _assertLookup(detector=3, timespan=Timespan(t2, t4), expected=bias3a) 

2481 _assertLookup(detector=3, timespan=Timespan(t2, t5), expected=bias3a) 

2482 _assertLookup(detector=3, timespan=Timespan(t2, None), expected=Ambiguous) 

2483 _assertLookup(detector=3, timespan=Timespan(t3, t4), expected=None) 

2484 _assertLookup(detector=3, timespan=Timespan(t3, t5), expected=None) 

2485 _assertLookup(detector=3, timespan=Timespan(t3, None), expected=bias3b) 

2486 _assertLookup(detector=3, timespan=Timespan(t4, t5), expected=None) 

2487 _assertLookup(detector=3, timespan=Timespan(t4, None), expected=bias3b) 

2488 _assertLookup(detector=3, timespan=Timespan(t5, None), expected=bias3b) 

2489 

2490 # Decertify everything, this time with explicit data IDs, then check 

2491 # that no lookups succeed. 

2492 registry.decertify( 

2493 collection, 

2494 "bias", 

2495 Timespan(None, None), 

2496 dataIds=[ 

2497 dict(instrument="Cam1", detector=2), 

2498 dict(instrument="Cam1", detector=3), 

2499 ], 

2500 ) 

2501 for detector in (2, 3): 

2502 for timespan in allTimespans: 

2503 _assertLookup(detector=detector, timespan=timespan, expected=None) 

2504 # Certify bias2a and bias3a over (-∞, ∞), check that all lookups return 

2505 # those. 

2506 registry.certify( 

2507 collection, 

2508 [bias2a, bias3a], 

2509 Timespan(None, None), 

2510 ) 

2511 for timespan in allTimespans: 

2512 _assertLookup(detector=2, timespan=timespan, expected=bias2a) 

2513 _assertLookup(detector=3, timespan=timespan, expected=bias3a) 

2514 # Decertify just bias2 over [t2, t4). 

2515 # This should split a single certification row into two (and leave the 

2516 # other existing row, for bias3a, alone). 

2517 registry.decertify( 

2518 collection, "bias", Timespan(t2, t4), dataIds=[dict(instrument="Cam1", detector=2)] 

2519 ) 

2520 for timespan in allTimespans: 

2521 _assertLookup(detector=3, timespan=timespan, expected=bias3a) 

2522 overlapsBefore = timespan.overlaps(Timespan(None, t2)) 

2523 overlapsAfter = timespan.overlaps(Timespan(t4, None)) 

2524 if overlapsBefore and overlapsAfter: 

2525 expected = Ambiguous 

2526 elif overlapsBefore or overlapsAfter: 

2527 expected = bias2a 

2528 else: 

2529 expected = None 

2530 _assertLookup(detector=2, timespan=timespan, expected=expected) 

2531 

2532 def testSkipCalibs(self): 

2533 """Test how queries handle skipping of calibration collections.""" 

2534 registry = self.makeRegistry() 

2535 self.loadData(registry, "base.yaml") 

2536 self.loadData(registry, "datasets.yaml") 

2537 

2538 coll_calib = "Cam1/calibs/default" 

2539 registry.registerCollection(coll_calib, type=CollectionType.CALIBRATION) 

2540 

2541 # Add all biases to the calibration collection. 

2542 # Without this, the logic that prunes dataset subqueries based on 

2543 # datasetType-collection summary information will fire before the logic 

2544 # we want to test below. This is a good thing (it avoids the dreaded 

2545 # NotImplementedError a bit more often) everywhere but here. 

2546 registry.certify(coll_calib, registry.queryDatasets("bias", collections=...), Timespan(None, None)) 

2547 

2548 coll_list = [coll_calib, "imported_g", "imported_r"] 

2549 chain = "Cam1/chain" 

2550 registry.registerCollection(chain, type=CollectionType.CHAINED) 

2551 registry.setCollectionChain(chain, coll_list) 

2552 

2553 # explicit list will raise if findFirst=True or there are temporal 

2554 # dimensions 

2555 with self.assertRaises(NotImplementedError): 

2556 registry.queryDatasets("bias", collections=coll_list, findFirst=True) 

2557 with self.assertRaises(NotImplementedError): 

2558 registry.queryDataIds( 

2559 ["instrument", "detector", "exposure"], datasets="bias", collections=coll_list 

2560 ).count() 

2561 

2562 # chain will skip 

2563 datasets = list(registry.queryDatasets("bias", collections=chain)) 

2564 self.assertGreater(len(datasets), 0) 

2565 

2566 dataIds = list(registry.queryDataIds(["instrument", "detector"], datasets="bias", collections=chain)) 

2567 self.assertGreater(len(dataIds), 0) 

2568 

2569 # glob will skip too 

2570 datasets = list(registry.queryDatasets("bias", collections="*d*")) 

2571 self.assertGreater(len(datasets), 0) 

2572 

2573 # regular expression will skip too 

2574 pattern = re.compile(".*") 

2575 datasets = list(registry.queryDatasets("bias", collections=pattern)) 

2576 self.assertGreater(len(datasets), 0) 

2577 

2578 # ellipsis should work as usual 

2579 datasets = list(registry.queryDatasets("bias", collections=...)) 

2580 self.assertGreater(len(datasets), 0) 

2581 

2582 # few tests with findFirst 

2583 datasets = list(registry.queryDatasets("bias", collections=chain, findFirst=True)) 

2584 self.assertGreater(len(datasets), 0) 

2585 

2586 def testIngestTimeQuery(self): 

2587 registry = self.makeRegistry() 

2588 self.loadData(registry, "base.yaml") 

2589 dt0 = datetime.datetime.now(datetime.UTC) 

2590 self.loadData(registry, "datasets.yaml") 

2591 dt1 = datetime.datetime.now(datetime.UTC) 

2592 

2593 datasets = list(registry.queryDatasets(..., collections=...)) 

2594 len0 = len(datasets) 

2595 self.assertGreater(len0, 0) 

2596 

2597 where = "ingest_date > T'2000-01-01'" 

2598 datasets = list(registry.queryDatasets(..., collections=..., where=where)) 

2599 len1 = len(datasets) 

2600 self.assertEqual(len0, len1) 

2601 

2602 # no one will ever use this piece of software in 30 years 

2603 where = "ingest_date > T'2050-01-01'" 

2604 datasets = list(registry.queryDatasets(..., collections=..., where=where)) 

2605 len2 = len(datasets) 

2606 self.assertEqual(len2, 0) 

2607 

2608 # Check more exact timing to make sure there is no 37 seconds offset 

2609 # (after fixing DM-30124). SQLite time precision is 1 second, make 

2610 # sure that we don't test with higher precision. 

2611 tests = [ 

2612 # format: (timestamp, operator, expected_len) 

2613 (dt0 - timedelta(seconds=1), ">", len0), 

2614 (dt0 - timedelta(seconds=1), "<", 0), 

2615 (dt1 + timedelta(seconds=1), "<", len0), 

2616 (dt1 + timedelta(seconds=1), ">", 0), 

2617 ] 

2618 for dt, op, expect_len in tests: 

2619 dt_str = dt.isoformat(sep=" ") 

2620 

2621 where = f"ingest_date {op} T'{dt_str}'" 

2622 datasets = list(registry.queryDatasets(..., collections=..., where=where)) 

2623 self.assertEqual(len(datasets), expect_len) 

2624 

2625 # same with bind using datetime or astropy Time 

2626 where = f"ingest_date {op} ingest_time" 

2627 datasets = list( 

2628 registry.queryDatasets(..., collections=..., where=where, bind={"ingest_time": dt}) 

2629 ) 

2630 self.assertEqual(len(datasets), expect_len) 

2631 

2632 dt_astropy = astropy.time.Time(dt, format="datetime") 

2633 datasets = list( 

2634 registry.queryDatasets(..., collections=..., where=where, bind={"ingest_time": dt_astropy}) 

2635 ) 

2636 self.assertEqual(len(datasets), expect_len) 

2637 

2638 def testTimespanQueries(self): 

2639 """Test query expressions involving timespans.""" 

2640 registry = self.makeRegistry() 

2641 self.loadData(registry, "hsc-rc2-subset.yaml") 

2642 # All exposures in the database; mapping from ID to timespan. 

2643 visits = {record.id: record.timespan for record in registry.queryDimensionRecords("visit")} 

2644 # Just those IDs, sorted (which is also temporal sorting, because HSC 

2645 # exposure IDs are monotonically increasing). 

2646 ids = sorted(visits.keys()) 

2647 self.assertGreater(len(ids), 20) 

2648 # Pick some quasi-random indexes into `ids` to play with. 

2649 i1 = int(len(ids) * 0.1) 

2650 i2 = int(len(ids) * 0.3) 

2651 i3 = int(len(ids) * 0.6) 

2652 i4 = int(len(ids) * 0.8) 

2653 # Extract some times from those: just before the beginning of i1 (which 

2654 # should be after the end of the exposure before), exactly the 

2655 # beginning of i2, just after the beginning of i3 (and before its end), 

2656 # and the exact end of i4. 

2657 t1 = visits[ids[i1]].begin - astropy.time.TimeDelta(1.0, format="sec") 

2658 self.assertGreater(t1, visits[ids[i1 - 1]].end) 

2659 t2 = visits[ids[i2]].begin 

2660 t3 = visits[ids[i3]].begin + astropy.time.TimeDelta(1.0, format="sec") 

2661 self.assertLess(t3, visits[ids[i3]].end) 

2662 t4 = visits[ids[i4]].end 

2663 # Make sure those are actually in order. 

2664 self.assertEqual([t1, t2, t3, t4], sorted([t4, t3, t2, t1])) 

2665 

2666 bind = { 

2667 "t1": t1, 

2668 "t2": t2, 

2669 "t3": t3, 

2670 "t4": t4, 

2671 "ts23": Timespan(t2, t3), 

2672 } 

2673 

2674 def query(where): 

2675 """Return results as a sorted, deduplicated list of visit IDs. 

2676 

2677 Parameters 

2678 ---------- 

2679 where : `str` 

2680 The WHERE clause for the query. 

2681 """ 

2682 return sorted( 

2683 { 

2684 dataId["visit"] 

2685 for dataId in registry.queryDataIds("visit", instrument="HSC", bind=bind, where=where) 

2686 } 

2687 ) 

2688 

2689 # Try a bunch of timespan queries, mixing up the bounds themselves, 

2690 # where they appear in the expression, and how we get the timespan into 

2691 # the expression. 

2692 

2693 # t1 is before the start of i1, so this should not include i1. 

2694 self.assertEqual(ids[:i1], query("visit.timespan OVERLAPS (null, t1)")) 

2695 # t2 is exactly at the start of i2, but ends are exclusive, so these 

2696 # should not include i2. 

2697 self.assertEqual(ids[i1:i2], query("(t1, t2) OVERLAPS visit.timespan")) 

2698 self.assertEqual(ids[:i2], query("visit.timespan < (t2, t4)")) 

2699 # t3 is in the middle of i3, so this should include i3. 

2700 self.assertEqual(ids[i2 : i3 + 1], query("visit.timespan OVERLAPS ts23")) 

2701 # This one should not include t3 by the same reasoning. 

2702 self.assertEqual(ids[i3 + 1 :], query("visit.timespan > (t1, t3)")) 

2703 # t4 is exactly at the end of i4, so this should include i4. 

2704 self.assertEqual(ids[i3 : i4 + 1], query(f"visit.timespan OVERLAPS (T'{t3.tai.isot}', t4)")) 

2705 # i4's upper bound of t4 is exclusive so this should not include t4. 

2706 self.assertEqual(ids[i4 + 1 :], query("visit.timespan OVERLAPS (t4, NULL)")) 

2707 

2708 # Now some timespan vs. time scalar queries. 

2709 self.assertEqual(ids[:i2], query("visit.timespan < t2")) 

2710 self.assertEqual(ids[:i2], query("t2 > visit.timespan")) 

2711 self.assertEqual(ids[i3 + 1 :], query("visit.timespan > t3")) 

2712 self.assertEqual(ids[i3 + 1 :], query("t3 < visit.timespan")) 

2713 self.assertEqual(ids[i3 : i3 + 1], query("visit.timespan OVERLAPS t3")) 

2714 self.assertEqual(ids[i3 : i3 + 1], query(f"T'{t3.tai.isot}' OVERLAPS visit.timespan")) 

2715 

2716 # Empty timespans should not overlap anything. 

2717 self.assertEqual([], query("visit.timespan OVERLAPS (t3, t2)")) 

2718 

2719 def testCollectionSummaries(self): 

2720 """Test recording and retrieval of collection summaries.""" 

2721 self.maxDiff = None 

2722 registry = self.makeRegistry() 

2723 # Importing datasets from yaml should go through the code path where 

2724 # we update collection summaries as we insert datasets. 

2725 self.loadData(registry, "base.yaml") 

2726 self.loadData(registry, "datasets.yaml") 

2727 flat = registry.getDatasetType("flat") 

2728 expected1 = CollectionSummary() 

2729 expected1.dataset_types.add(registry.getDatasetType("bias")) 

2730 expected1.add_data_ids( 

2731 flat, [DataCoordinate.standardize(instrument="Cam1", universe=registry.dimensions)] 

2732 ) 

2733 self.assertEqual(registry.getCollectionSummary("imported_g"), expected1) 

2734 self.assertEqual(registry.getCollectionSummary("imported_r"), expected1) 

2735 # Create a chained collection with both of the imported runs; the 

2736 # summary should be the same, because it's a union with itself. 

2737 chain = "chain" 

2738 registry.registerCollection(chain, CollectionType.CHAINED) 

2739 registry.setCollectionChain(chain, ["imported_r", "imported_g"]) 

2740 self.assertEqual(registry.getCollectionSummary(chain), expected1) 

2741 # Associate flats only into a tagged collection and a calibration 

2742 # collection to check summaries of those. 

2743 tag = "tag" 

2744 registry.registerCollection(tag, CollectionType.TAGGED) 

2745 registry.associate(tag, registry.queryDatasets(flat, collections="imported_g")) 

2746 calibs = "calibs" 

2747 registry.registerCollection(calibs, CollectionType.CALIBRATION) 

2748 registry.certify( 

2749 calibs, registry.queryDatasets(flat, collections="imported_g"), timespan=Timespan(None, None) 

2750 ) 

2751 expected2 = expected1.copy() 

2752 expected2.dataset_types.discard("bias") 

2753 self.assertEqual(registry.getCollectionSummary(tag), expected2) 

2754 self.assertEqual(registry.getCollectionSummary(calibs), expected2) 

2755 # Explicitly calling SqlRegistry.refresh() should load those same 

2756 # summaries, via a totally different code path. 

2757 registry.refresh() 

2758 self.assertEqual(registry.getCollectionSummary("imported_g"), expected1) 

2759 self.assertEqual(registry.getCollectionSummary("imported_r"), expected1) 

2760 self.assertEqual(registry.getCollectionSummary(tag), expected2) 

2761 self.assertEqual(registry.getCollectionSummary(calibs), expected2) 

2762 

2763 def testBindInQueryDatasets(self): 

2764 """Test that the bind parameter is correctly forwarded in 

2765 queryDatasets recursion. 

2766 """ 

2767 registry = self.makeRegistry() 

2768 # Importing datasets from yaml should go through the code path where 

2769 # we update collection summaries as we insert datasets. 

2770 self.loadData(registry, "base.yaml") 

2771 self.loadData(registry, "datasets.yaml") 

2772 self.assertEqual( 

2773 set(registry.queryDatasets("flat", band="r", collections=...)), 

2774 set(registry.queryDatasets("flat", where="band=my_band", bind={"my_band": "r"}, collections=...)), 

2775 ) 

2776 

2777 def testQueryIntRangeExpressions(self): 

2778 """Test integer range expressions in ``where`` arguments. 

2779 

2780 Note that our expressions use inclusive stop values, unlike Python's. 

2781 """ 

2782 registry = self.makeRegistry() 

2783 self.loadData(registry, "base.yaml") 

2784 self.assertEqual( 

2785 set(registry.queryDataIds(["detector"], instrument="Cam1", where="detector IN (1..2)")), 

2786 {registry.expandDataId(instrument="Cam1", detector=n) for n in [1, 2]}, 

2787 ) 

2788 self.assertEqual( 

2789 set(registry.queryDataIds(["detector"], instrument="Cam1", where="detector IN (1..4:2)")), 

2790 {registry.expandDataId(instrument="Cam1", detector=n) for n in [1, 3]}, 

2791 ) 

2792 self.assertEqual( 

2793 set(registry.queryDataIds(["detector"], instrument="Cam1", where="detector IN (2..4:2)")), 

2794 {registry.expandDataId(instrument="Cam1", detector=n) for n in [2, 4]}, 

2795 ) 

2796 

2797 def testQueryResultSummaries(self): 

2798 """Test summary methods like `count`, `any`, and `explain_no_results` 

2799 on `DataCoordinateQueryResults` and `DatasetQueryResults`. 

2800 """ 

2801 registry = self.makeRegistry() 

2802 self.loadData(registry, "base.yaml") 

2803 self.loadData(registry, "datasets.yaml") 

2804 self.loadData(registry, "spatial.yaml") 

2805 # Default test dataset has two collections, each with both flats and 

2806 # biases. Add a new collection with only biases. 

2807 registry.registerCollection("biases", CollectionType.TAGGED) 

2808 registry.associate("biases", registry.queryDatasets("bias", collections=["imported_g"])) 

2809 # First query yields two results, and involves no postprocessing. 

2810 query1 = registry.queryDataIds(["physical_filter"], band="r") 

2811 self.assertTrue(query1.any(execute=False, exact=False)) 

2812 self.assertTrue(query1.any(execute=True, exact=False)) 

2813 self.assertTrue(query1.any(execute=True, exact=True)) 

2814 self.assertEqual(query1.count(exact=False), 2) 

2815 self.assertEqual(query1.count(exact=True), 2) 

2816 self.assertFalse(list(query1.explain_no_results())) 

2817 # Second query should yield no results, which we should see when 

2818 # we attempt to expand the data ID. 

2819 query2 = registry.queryDataIds(["physical_filter"], band="h") 

2820 # There's no execute=False, exact=Fals test here because the behavior 

2821 # not something we want to guarantee in this case (and exact=False 

2822 # says either answer is legal). 

2823 self.assertFalse(query2.any(execute=True, exact=False)) 

2824 self.assertFalse(query2.any(execute=True, exact=True)) 

2825 self.assertEqual(query2.count(exact=False), 0) 

2826 self.assertEqual(query2.count(exact=True), 0) 

2827 self.assertTrue(list(query2.explain_no_results())) 

2828 # These queries yield no results due to various problems that can be 

2829 # spotted prior to execution, yielding helpful diagnostics. 

2830 base_query = registry.queryDataIds(["detector", "physical_filter"]) 

2831 queries_and_snippets = [ 

2832 ( 

2833 # Dataset type name doesn't match any existing dataset types. 

2834 registry.queryDatasets("nonexistent", collections=...), 

2835 ["nonexistent"], 

2836 ), 

2837 ( 

2838 # Dataset type object isn't registered. 

2839 registry.queryDatasets( 

2840 DatasetType( 

2841 "nonexistent", 

2842 dimensions=["instrument"], 

2843 universe=registry.dimensions, 

2844 storageClass="Image", 

2845 ), 

2846 collections=..., 

2847 ), 

2848 ["nonexistent"], 

2849 ), 

2850 ( 

2851 # No datasets of this type in this collection. 

2852 registry.queryDatasets("flat", collections=["biases"]), 

2853 ["flat", "biases"], 

2854 ), 

2855 ( 

2856 # No datasets of this type in this collection. 

2857 base_query.findDatasets("flat", collections=["biases"]), 

2858 ["flat", "biases"], 

2859 ), 

2860 ( 

2861 # No collections matching at all. 

2862 registry.queryDatasets("flat", collections=re.compile("potato.+")), 

2863 ["potato"], 

2864 ), 

2865 ] 

2866 with self.assertRaises(MissingDatasetTypeError): 

2867 # Dataset type name doesn't match any existing dataset types. 

2868 registry.queryDataIds(["detector"], datasets=["nonexistent"], collections=...) 

2869 with self.assertRaises(MissingDatasetTypeError): 

2870 # Dataset type name doesn't match any existing dataset types. 

2871 registry.queryDimensionRecords("detector", datasets=["nonexistent"], collections=...) 

2872 for query, snippets in queries_and_snippets: 

2873 self.assertFalse(query.any(execute=False, exact=False)) 

2874 self.assertFalse(query.any(execute=True, exact=False)) 

2875 self.assertFalse(query.any(execute=True, exact=True)) 

2876 self.assertEqual(query.count(exact=False), 0) 

2877 self.assertEqual(query.count(exact=True), 0) 

2878 messages = list(query.explain_no_results()) 

2879 self.assertTrue(messages) 

2880 # Want all expected snippets to appear in at least one message. 

2881 self.assertTrue( 

2882 any( 

2883 all(snippet in message for snippet in snippets) for message in query.explain_no_results() 

2884 ), 

2885 messages, 

2886 ) 

2887 

2888 # Wildcards on dataset types are not permitted in queryDataIds. 

2889 with self.assertRaises(DatasetTypeExpressionError): 

2890 registry.queryDataIds(["detector"], datasets=re.compile("^nonexistent$"), collections=...) 

2891 

2892 # These queries yield no results due to problems that can be identified 

2893 # by cheap follow-up queries, yielding helpful diagnostics. 

2894 for query, snippets in [ 

2895 ( 

2896 # No records for one of the involved dimensions. 

2897 registry.queryDataIds(["subfilter"]), 

2898 ["no rows", "subfilter"], 

2899 ), 

2900 ( 

2901 # No records for one of the involved dimensions. 

2902 registry.queryDimensionRecords("subfilter"), 

2903 ["no rows", "subfilter"], 

2904 ), 

2905 ]: 

2906 self.assertFalse(query.any(execute=True, exact=False)) 

2907 self.assertFalse(query.any(execute=True, exact=True)) 

2908 self.assertEqual(query.count(exact=True), 0) 

2909 messages = list(query.explain_no_results()) 

2910 self.assertTrue(messages) 

2911 # Want all expected snippets to appear in at least one message. 

2912 self.assertTrue( 

2913 any( 

2914 all(snippet in message for snippet in snippets) for message in query.explain_no_results() 

2915 ), 

2916 messages, 

2917 ) 

2918 

2919 # This query yields four overlaps in the database, but one is filtered 

2920 # out in postprocessing. The count queries aren't accurate because 

2921 # they don't account for duplication that happens due to an internal 

2922 # join against commonSkyPix. 

2923 query3 = registry.queryDataIds(["visit", "tract"], instrument="Cam1", skymap="SkyMap1") 

2924 self.assertEqual( 

2925 { 

2926 DataCoordinate.standardize( 

2927 instrument="Cam1", 

2928 skymap="SkyMap1", 

2929 visit=v, 

2930 tract=t, 

2931 universe=registry.dimensions, 

2932 ) 

2933 for v, t in [(1, 0), (2, 0), (2, 1)] 

2934 }, 

2935 set(query3), 

2936 ) 

2937 self.assertTrue(query3.any(execute=False, exact=False)) 

2938 self.assertTrue(query3.any(execute=True, exact=False)) 

2939 self.assertTrue(query3.any(execute=True, exact=True)) 

2940 self.assertGreaterEqual(query3.count(exact=False), 4) 

2941 self.assertGreaterEqual(query3.count(exact=True, discard=True), 3) 

2942 self.assertFalse(list(query3.explain_no_results())) 

2943 # This query yields overlaps in the database, but all are filtered 

2944 # out in postprocessing. The count queries again aren't very useful. 

2945 # We have to use `where=` here to avoid an optimization that 

2946 # (currently) skips the spatial postprocess-filtering because it 

2947 # recognizes that no spatial join is necessary. That's not ideal, but 

2948 # fixing it is out of scope for this ticket. 

2949 query4 = registry.queryDataIds( 

2950 ["visit", "tract"], 

2951 instrument="Cam1", 

2952 skymap="SkyMap1", 

2953 where="visit=1 AND detector=1 AND tract=0 AND patch=4", 

2954 ) 

2955 self.assertFalse(set(query4)) 

2956 self.assertTrue(query4.any(execute=False, exact=False)) 

2957 self.assertTrue(query4.any(execute=True, exact=False)) 

2958 self.assertFalse(query4.any(execute=True, exact=True)) 

2959 self.assertGreaterEqual(query4.count(exact=False), 1) 

2960 self.assertEqual(query4.count(exact=True, discard=True), 0) 

2961 messages = query4.explain_no_results() 

2962 self.assertTrue(messages) 

2963 self.assertTrue(any("overlap" in message for message in messages)) 

2964 # This query should yield results from one dataset type but not the 

2965 # other, which is not registered. 

2966 query5 = registry.queryDatasets(["bias", "nonexistent"], collections=["biases"]) 

2967 self.assertTrue(set(query5)) 

2968 self.assertTrue(query5.any(execute=False, exact=False)) 

2969 self.assertTrue(query5.any(execute=True, exact=False)) 

2970 self.assertTrue(query5.any(execute=True, exact=True)) 

2971 self.assertGreaterEqual(query5.count(exact=False), 1) 

2972 self.assertGreaterEqual(query5.count(exact=True), 1) 

2973 self.assertFalse(list(query5.explain_no_results())) 

2974 # This query applies a selection that yields no results, fully in the 

2975 # database. Explaining why it fails involves traversing the relation 

2976 # tree and running a LIMIT 1 query at each level that has the potential 

2977 # to remove rows. 

2978 query6 = registry.queryDimensionRecords( 

2979 "detector", where="detector.purpose = 'no-purpose'", instrument="Cam1" 

2980 ) 

2981 self.assertEqual(query6.count(exact=True), 0) 

2982 messages = query6.explain_no_results() 

2983 self.assertTrue(messages) 

2984 self.assertTrue(any("no-purpose" in message for message in messages)) 

2985 

2986 def testQueryDataIdsExpressionError(self): 

2987 """Test error checking of 'where' expressions in queryDataIds.""" 

2988 registry = self.makeRegistry() 

2989 self.loadData(registry, "base.yaml") 

2990 bind = {"time": astropy.time.Time("2020-01-01T01:00:00", format="isot", scale="tai")} 

2991 with self.assertRaisesRegex(LookupError, r"No dimension element with name 'foo' in 'foo\.bar'\."): 

2992 registry.queryDataIds(["detector"], where="foo.bar = 12") 

2993 with self.assertRaisesRegex( 

2994 LookupError, "Dimension element name cannot be inferred in this context." 

2995 ): 

2996 registry.queryDataIds(["detector"], where="timespan.end < time", bind=bind) 

2997 

2998 def testQueryDataIdsOrderBy(self): 

2999 """Test order_by and limit on result returned by queryDataIds().""" 

3000 registry = self.makeRegistry() 

3001 self.loadData(registry, "base.yaml") 

3002 self.loadData(registry, "datasets.yaml") 

3003 self.loadData(registry, "spatial.yaml") 

3004 

3005 def do_query(dimensions=("visit", "tract"), datasets=None, collections=None): 

3006 return registry.queryDataIds( 

3007 dimensions, datasets=datasets, collections=collections, instrument="Cam1", skymap="SkyMap1" 

3008 ) 

3009 

3010 Test = namedtuple( 

3011 "testQueryDataIdsOrderByTest", 

3012 ("order_by", "keys", "result", "limit", "datasets", "collections"), 

3013 defaults=(None, None, None), 

3014 ) 

3015 

3016 test_data = ( 

3017 Test("tract,visit", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))), 

3018 Test("-tract,visit", "tract,visit", ((1, 2), (1, 2), (0, 1), (0, 1), (0, 2), (0, 2))), 

3019 Test("tract,-visit", "tract,visit", ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2))), 

3020 Test("-tract,-visit", "tract,visit", ((1, 2), (1, 2), (0, 2), (0, 2), (0, 1), (0, 1))), 

3021 Test( 

3022 "tract.id,visit.id", 

3023 "tract,visit", 

3024 ((0, 1), (0, 1), (0, 2)), 

3025 limit=(3,), 

3026 ), 

3027 Test("-tract,-visit", "tract,visit", ((1, 2), (1, 2), (0, 2)), limit=(3,)), 

3028 Test("tract,visit", "tract,visit", ((0, 2), (1, 2), (1, 2)), limit=(3, 3)), 

3029 Test("-tract,-visit", "tract,visit", ((0, 1),), limit=(3, 5)), 

3030 Test( 

3031 "tract,visit.exposure_time", "tract,visit", ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2)) 

3032 ), 

3033 Test( 

3034 "-tract,-visit.exposure_time", "tract,visit", ((1, 2), (1, 2), (0, 1), (0, 1), (0, 2), (0, 2)) 

3035 ), 

3036 Test("tract,-exposure_time", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))), 

3037 Test("tract,visit.name", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))), 

3038 Test( 

3039 "tract,-visit.timespan.begin,visit.timespan.end", 

3040 "tract,visit", 

3041 ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2)), 

3042 ), 

3043 Test("visit.day_obs,exposure.day_obs", "visit,exposure", ()), 

3044 Test("visit.timespan.begin,-exposure.timespan.begin", "visit,exposure", ()), 

3045 Test( 

3046 "tract,detector", 

3047 "tract,detector", 

3048 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)), 

3049 datasets="flat", 

3050 collections="imported_r", 

3051 ), 

3052 Test( 

3053 "tract,detector.full_name", 

3054 "tract,detector", 

3055 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)), 

3056 datasets="flat", 

3057 collections="imported_r", 

3058 ), 

3059 Test( 

3060 "tract,detector.raft,detector.name_in_raft", 

3061 "tract,detector", 

3062 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)), 

3063 datasets="flat", 

3064 collections="imported_r", 

3065 ), 

3066 ) 

3067 

3068 for test in test_data: 

3069 order_by = test.order_by.split(",") 

3070 keys = test.keys.split(",") 

3071 query = do_query(keys, test.datasets, test.collections).order_by(*order_by) 

3072 if test.limit is not None: 

3073 query = query.limit(*test.limit) 

3074 dataIds = tuple(tuple(dataId[k] for k in keys) for dataId in query) 

3075 self.assertEqual(dataIds, test.result) 

3076 

3077 # and materialize 

3078 query = do_query(keys).order_by(*order_by) 

3079 if test.limit is not None: 

3080 query = query.limit(*test.limit) 

3081 with self.assertRaises(RelationalAlgebraError): 

3082 with query.materialize(): 

3083 pass 

3084 

3085 # errors in a name 

3086 for order_by in ("", "-"): 

3087 with self.assertRaisesRegex(ValueError, "Empty dimension name in ORDER BY"): 

3088 list(do_query().order_by(order_by)) 

3089 

3090 for order_by in ("undimension.name", "-undimension.name"): 

3091 with self.assertRaisesRegex(ValueError, "Unknown dimension element 'undimension'"): 

3092 list(do_query().order_by(order_by)) 

3093 

3094 for order_by in ("attract", "-attract"): 

3095 with self.assertRaisesRegex(ValueError, "Metadata 'attract' cannot be found in any dimension"): 

3096 list(do_query().order_by(order_by)) 

3097 

3098 with self.assertRaisesRegex(ValueError, "Metadata 'exposure_time' exists in more than one dimension"): 

3099 list(do_query(("exposure", "visit")).order_by("exposure_time")) 

3100 

3101 with self.assertRaisesRegex( 

3102 ValueError, 

3103 r"Timespan exists in more than one dimension element \(day_obs, exposure, visit\); " 

3104 r"qualify timespan with specific dimension name\.", 

3105 ): 

3106 list(do_query(("exposure", "visit")).order_by("timespan.begin")) 

3107 

3108 with self.assertRaisesRegex( 

3109 ValueError, "Cannot find any temporal dimension element for 'timespan.begin'" 

3110 ): 

3111 list(do_query("tract").order_by("timespan.begin")) 

3112 

3113 with self.assertRaisesRegex(ValueError, "Cannot use 'timespan.begin' with non-temporal element"): 

3114 list(do_query("tract").order_by("tract.timespan.begin")) 

3115 

3116 with self.assertRaisesRegex(ValueError, "Field 'name' does not exist in 'tract'."): 

3117 list(do_query("tract").order_by("tract.name")) 

3118 

3119 with self.assertRaisesRegex( 

3120 ValueError, r"Unknown dimension element 'timestamp'; perhaps you meant 'timespan.begin'\?" 

3121 ): 

3122 list(do_query("visit").order_by("timestamp.begin")) 

3123 

3124 def testQueryDataIdsGovernorExceptions(self): 

3125 """Test exceptions raised by queryDataIds() for incorrect governors.""" 

3126 registry = self.makeRegistry() 

3127 self.loadData(registry, "base.yaml") 

3128 self.loadData(registry, "datasets.yaml") 

3129 self.loadData(registry, "spatial.yaml") 

3130 

3131 def do_query(dimensions, dataId=None, where="", bind=None, **kwargs): 

3132 return registry.queryDataIds(dimensions, dataId=dataId, where=where, bind=bind, **kwargs) 

3133 

3134 Test = namedtuple( 

3135 "testQueryDataIdExceptionsTest", 

3136 ("dimensions", "dataId", "where", "bind", "kwargs", "exception", "count"), 

3137 defaults=(None, None, None, {}, None, 0), 

3138 ) 

3139 

3140 test_data = ( 

3141 Test("tract,visit", count=6), 

3142 Test("tract,visit", kwargs={"instrument": "Cam1", "skymap": "SkyMap1"}, count=6), 

3143 Test( 

3144 "tract,visit", kwargs={"instrument": "Cam2", "skymap": "SkyMap1"}, exception=DataIdValueError 

3145 ), 

3146 Test("tract,visit", dataId={"instrument": "Cam1", "skymap": "SkyMap1"}, count=6), 

3147 Test( 

3148 "tract,visit", dataId={"instrument": "Cam1", "skymap": "SkyMap2"}, exception=DataIdValueError 

3149 ), 

3150 Test("tract,visit", where="instrument='Cam1' AND skymap='SkyMap1'", count=6), 

3151 Test("tract,visit", where="instrument='Cam1' AND skymap='SkyMap5'", exception=DataIdValueError), 

3152 Test( 

3153 "tract,visit", 

3154 where="instrument=cam AND skymap=map", 

3155 bind={"cam": "Cam1", "map": "SkyMap1"}, 

3156 count=6, 

3157 ), 

3158 Test( 

3159 "tract,visit", 

3160 where="instrument=cam AND skymap=map", 

3161 bind={"cam": "Cam", "map": "SkyMap"}, 

3162 exception=DataIdValueError, 

3163 ), 

3164 ) 

3165 

3166 for test in test_data: 

3167 dimensions = test.dimensions.split(",") 

3168 if test.exception: 

3169 with self.assertRaises(test.exception): 

3170 do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs).count() 

3171 else: 

3172 query = do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs) 

3173 self.assertEqual(query.count(discard=True), test.count) 

3174 

3175 # and materialize 

3176 if test.exception: 

3177 with self.assertRaises(test.exception): 

3178 query = do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs) 

3179 with query.materialize() as materialized: 

3180 materialized.count(discard=True) 

3181 else: 

3182 query = do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs) 

3183 with query.materialize() as materialized: 

3184 self.assertEqual(materialized.count(discard=True), test.count) 

3185 

3186 def testQueryDimensionRecordsOrderBy(self): 

3187 """Test order_by and limit on result returned by 

3188 queryDimensionRecords(). 

3189 """ 

3190 registry = self.makeRegistry() 

3191 self.loadData(registry, "base.yaml") 

3192 self.loadData(registry, "datasets.yaml") 

3193 self.loadData(registry, "spatial.yaml") 

3194 

3195 def do_query(element, datasets=None, collections=None): 

3196 return registry.queryDimensionRecords( 

3197 element, instrument="Cam1", datasets=datasets, collections=collections 

3198 ) 

3199 

3200 query = do_query("detector") 

3201 self.assertEqual(len(list(query)), 4) 

3202 

3203 Test = namedtuple( 

3204 "testQueryDataIdsOrderByTest", 

3205 ("element", "order_by", "result", "limit", "datasets", "collections"), 

3206 defaults=(None, None, None), 

3207 ) 

3208 

3209 test_data = ( 

3210 Test("detector", "detector", (1, 2, 3, 4)), 

3211 Test("detector", "-detector", (4, 3, 2, 1)), 

3212 Test("detector", "raft,-name_in_raft", (2, 1, 4, 3)), 

3213 Test("detector", "-detector.purpose", (4,), limit=(1,)), 

3214 Test("detector", "-purpose,detector.raft,name_in_raft", (2, 3), limit=(2, 2)), 

3215 Test("visit", "visit", (1, 2)), 

3216 Test("visit", "-visit.id", (2, 1)), 

3217 Test("visit", "zenith_angle", (1, 2)), 

3218 Test("visit", "-visit.name", (2, 1)), 

3219 Test("visit", "day_obs,-timespan.begin", (2, 1)), 

3220 ) 

3221 

3222 for test in test_data: 

3223 order_by = test.order_by.split(",") 

3224 query = do_query(test.element).order_by(*order_by) 

3225 if test.limit is not None: 

3226 query = query.limit(*test.limit) 

3227 dataIds = tuple(rec.id for rec in query) 

3228 self.assertEqual(dataIds, test.result) 

3229 

3230 # errors in a name 

3231 for order_by in ("", "-"): 

3232 with self.assertRaisesRegex(ValueError, "Empty dimension name in ORDER BY"): 

3233 list(do_query("detector").order_by(order_by)) 

3234 

3235 for order_by in ("undimension.name", "-undimension.name"): 

3236 with self.assertRaisesRegex(ValueError, "Element name mismatch: 'undimension'"): 

3237 list(do_query("detector").order_by(order_by)) 

3238 

3239 for order_by in ("attract", "-attract"): 

3240 with self.assertRaisesRegex(ValueError, "Field 'attract' does not exist in 'detector'."): 

3241 list(do_query("detector").order_by(order_by)) 

3242 

3243 for order_by in ("timestamp.begin", "-timestamp.begin"): 

3244 with self.assertRaisesRegex( 

3245 ValueError, 

3246 r"Element name mismatch: 'timestamp' instead of 'visit'; " 

3247 r"perhaps you meant 'timespan.begin'\?", 

3248 ): 

3249 list(do_query("visit").order_by(order_by)) 

3250 

3251 def testQueryDimensionRecordsExceptions(self): 

3252 """Test exceptions raised by queryDimensionRecords().""" 

3253 registry = self.makeRegistry() 

3254 self.loadData(registry, "base.yaml") 

3255 self.loadData(registry, "datasets.yaml") 

3256 self.loadData(registry, "spatial.yaml") 

3257 

3258 result = registry.queryDimensionRecords("detector") 

3259 self.assertEqual(result.count(), 4) 

3260 result = registry.queryDimensionRecords("detector", instrument="Cam1") 

3261 self.assertEqual(result.count(), 4) 

3262 result = registry.queryDimensionRecords("detector", dataId={"instrument": "Cam1"}) 

3263 self.assertEqual(result.count(), 4) 

3264 result = registry.queryDimensionRecords("detector", where="instrument='Cam1'") 

3265 self.assertEqual(result.count(), 4) 

3266 result = registry.queryDimensionRecords("detector", where="instrument=instr", bind={"instr": "Cam1"}) 

3267 self.assertEqual(result.count(), 4) 

3268 

3269 with self.assertRaisesRegex(DataIdValueError, "dimension instrument"): 

3270 result = registry.queryDimensionRecords("detector", instrument="NotCam1") 

3271 result.count() 

3272 

3273 with self.assertRaisesRegex(DataIdValueError, "dimension instrument"): 

3274 result = registry.queryDimensionRecords("detector", dataId={"instrument": "NotCam1"}) 

3275 result.count() 

3276 

3277 with self.assertRaisesRegex(DataIdValueError, "Unknown values specified for governor dimension"): 

3278 result = registry.queryDimensionRecords("detector", where="instrument='NotCam1'") 

3279 result.count() 

3280 

3281 with self.assertRaisesRegex(DataIdValueError, "Unknown values specified for governor dimension"): 

3282 result = registry.queryDimensionRecords( 

3283 "detector", where="instrument=instr", bind={"instr": "NotCam1"} 

3284 ) 

3285 result.count() 

3286 

3287 def testDatasetConstrainedDimensionRecordQueries(self): 

3288 """Test that queryDimensionRecords works even when given a dataset 

3289 constraint whose dimensions extend beyond the requested dimension 

3290 element's. 

3291 """ 

3292 registry = self.makeRegistry() 

3293 self.loadData(registry, "base.yaml") 

3294 self.loadData(registry, "datasets.yaml") 

3295 # Query for physical_filter dimension records, using a dataset that 

3296 # has both physical_filter and dataset dimensions. 

3297 records = registry.queryDimensionRecords( 

3298 "physical_filter", 

3299 datasets=["flat"], 

3300 collections="imported_r", 

3301 ) 

3302 self.assertEqual({record.name for record in records}, {"Cam1-R1", "Cam1-R2"}) 

3303 # Trying to constrain by all dataset types is an error. 

3304 with self.assertRaises(TypeError): 

3305 list(registry.queryDimensionRecords("physical_filter", datasets=..., collections="imported_r")) 

3306 

3307 def testSkyPixDatasetQueries(self): 

3308 """Test that we can build queries involving skypix dimensions as long 

3309 as a dataset type that uses those dimensions is included. 

3310 """ 

3311 registry = self.makeRegistry() 

3312 self.loadData(registry, "base.yaml") 

3313 dataset_type = DatasetType( 

3314 "a", dimensions=["htm7", "instrument"], universe=registry.dimensions, storageClass="int" 

3315 ) 

3316 registry.registerDatasetType(dataset_type) 

3317 run = "r" 

3318 registry.registerRun(run) 

3319 # First try queries where there are no datasets; the concern is whether 

3320 # we can even build and execute these queries without raising, even 

3321 # when "doomed" query shortcuts are in play. 

3322 self.assertFalse( 

3323 list(registry.queryDataIds(["htm7", "instrument"], datasets=dataset_type, collections=run)) 

3324 ) 

3325 self.assertFalse(list(registry.queryDatasets(dataset_type, collections=run))) 

3326 # Now add a dataset and see that we can get it back. 

3327 htm7 = registry.dimensions.skypix["htm"][7].pixelization 

3328 data_id = registry.expandDataId(instrument="Cam1", htm7=htm7.universe()[0][0]) 

3329 (ref,) = registry.insertDatasets(dataset_type, [data_id], run=run) 

3330 self.assertEqual( 

3331 set(registry.queryDataIds(["htm7", "instrument"], datasets=dataset_type, collections=run)), 

3332 {data_id}, 

3333 ) 

3334 self.assertEqual(set(registry.queryDatasets(dataset_type, collections=run)), {ref}) 

3335 

3336 def testDatasetIdFactory(self): 

3337 """Simple test for DatasetIdFactory, mostly to catch potential changes 

3338 in its API. 

3339 """ 

3340 registry = self.makeRegistry() 

3341 factory = DatasetIdFactory() 

3342 dataset_type = DatasetType( 

3343 "datasetType", 

3344 dimensions=["detector", "instrument"], 

3345 universe=registry.dimensions, 

3346 storageClass="int", 

3347 ) 

3348 run = "run" 

3349 data_id = DataCoordinate.standardize( 

3350 instrument="Cam1", detector=1, dimensions=dataset_type.dimensions 

3351 ) 

3352 

3353 datasetId = factory.makeDatasetId(run, dataset_type, data_id, DatasetIdGenEnum.UNIQUE) 

3354 self.assertIsInstance(datasetId, uuid.UUID) 

3355 self.assertEqual(datasetId.version, 4) 

3356 

3357 datasetId = factory.makeDatasetId(run, dataset_type, data_id, DatasetIdGenEnum.DATAID_TYPE) 

3358 self.assertIsInstance(datasetId, uuid.UUID) 

3359 self.assertEqual(datasetId.version, 5) 

3360 

3361 datasetId = factory.makeDatasetId(run, dataset_type, data_id, DatasetIdGenEnum.DATAID_TYPE_RUN) 

3362 self.assertIsInstance(datasetId, uuid.UUID) 

3363 self.assertEqual(datasetId.version, 5) 

3364 

3365 def testExposureQueries(self): 

3366 """Test query methods using arguments sourced from the exposure log 

3367 service. 

3368 

3369 The most complete test dataset currently available to daf_butler tests 

3370 is hsc-rc2-subset.yaml export (which is unfortunately distinct from the 

3371 the lsst/rc2_subset GitHub repo), but that does not have 'exposure' 

3372 dimension records as it was focused on providing nontrivial spatial 

3373 overlaps between visit+detector and tract+patch. So in this test we 

3374 need to translate queries that originally used the exposure dimension 

3375 to use the (very similar) visit dimension instead. 

3376 """ 

3377 registry = self.makeRegistry() 

3378 self.loadData(registry, "hsc-rc2-subset.yaml") 

3379 self.assertEqual( 

3380 [ 

3381 record.id 

3382 for record in registry.queryDimensionRecords("visit", instrument="HSC") 

3383 .order_by("id") 

3384 .limit(5) 

3385 ], 

3386 [318, 322, 326, 330, 332], 

3387 ) 

3388 self.assertEqual( 

3389 [ 

3390 data_id["visit"] 

3391 for data_id in registry.queryDataIds(["visit"], instrument="HSC").order_by("visit").limit(5) 

3392 ], 

3393 [318, 322, 326, 330, 332], 

3394 ) 

3395 self.assertEqual( 

3396 [ 

3397 record.id 

3398 for record in registry.queryDimensionRecords("detector", instrument="HSC") 

3399 .order_by("full_name") 

3400 .limit(5) 

3401 ], 

3402 [73, 72, 71, 70, 65], 

3403 ) 

3404 self.assertEqual( 

3405 [ 

3406 data_id["detector"] 

3407 for data_id in registry.queryDataIds(["detector"], instrument="HSC") 

3408 .order_by("full_name") 

3409 .limit(5) 

3410 ], 

3411 [73, 72, 71, 70, 65], 

3412 ) 

3413 

3414 def test_long_query_names(self) -> None: 

3415 """Test that queries involving very long names are handled correctly. 

3416 

3417 This is especially important for PostgreSQL, which truncates symbols 

3418 longer than 64 chars, but it's worth testing for all DBs. 

3419 """ 

3420 registry = self.makeRegistry() 

3421 name = "abcd" * 17 

3422 registry.registerDatasetType( 

3423 DatasetType( 

3424 name, 

3425 dimensions=(), 

3426 storageClass="Exposure", 

3427 universe=registry.dimensions, 

3428 ) 

3429 ) 

3430 # Need to search more than one collection actually containing a 

3431 # matching dataset to avoid optimizations that sidestep bugs due to 

3432 # truncation by making findFirst=True a no-op. 

3433 run1 = "run1" 

3434 registry.registerRun(run1) 

3435 run2 = "run2" 

3436 registry.registerRun(run2) 

3437 (ref1,) = registry.insertDatasets(name, [DataCoordinate.make_empty(registry.dimensions)], run1) 

3438 registry.insertDatasets(name, [DataCoordinate.make_empty(registry.dimensions)], run2) 

3439 self.assertEqual( 

3440 set(registry.queryDatasets(name, collections=[run1, run2], findFirst=True)), 

3441 {ref1}, 

3442 ) 

3443 

3444 def test_skypix_constraint_queries(self) -> None: 

3445 """Test queries spatially constrained by a skypix data ID.""" 

3446 registry = self.makeRegistry() 

3447 self.loadData(registry, "hsc-rc2-subset.yaml") 

3448 patch_regions = { 

3449 (data_id["tract"], data_id["patch"]): data_id.region 

3450 for data_id in registry.queryDataIds(["patch"]).expanded() 

3451 } 

3452 skypix_dimension: SkyPixDimension = registry.dimensions["htm11"] 

3453 # This check ensures the test doesn't become trivial due to a config 

3454 # change; if it does, just pick a different HTML level. 

3455 self.assertNotEqual(skypix_dimension, registry.dimensions.commonSkyPix) 

3456 # Gather all skypix IDs that definitely overlap at least one of these 

3457 # patches. 

3458 relevant_skypix_ids = lsst.sphgeom.RangeSet() 

3459 for patch_region in patch_regions.values(): 

3460 relevant_skypix_ids |= skypix_dimension.pixelization.interior(patch_region) 

3461 # Look for a "nontrivial" skypix_id that overlaps at least one patch 

3462 # and does not overlap at least one other patch. 

3463 for skypix_id in itertools.chain.from_iterable( 

3464 range(begin, end) for begin, end in relevant_skypix_ids 

3465 ): 

3466 skypix_region = skypix_dimension.pixelization.pixel(skypix_id) 

3467 overlapping_patches = { 

3468 patch_key 

3469 for patch_key, patch_region in patch_regions.items() 

3470 if not patch_region.isDisjointFrom(skypix_region) 

3471 } 

3472 if overlapping_patches and overlapping_patches != patch_regions.keys(): 

3473 break 

3474 else: 

3475 raise RuntimeError("Could not find usable skypix ID for this dimension configuration.") 

3476 self.assertEqual( 

3477 { 

3478 (data_id["tract"], data_id["patch"]) 

3479 for data_id in registry.queryDataIds( 

3480 ["patch"], 

3481 dataId={skypix_dimension.name: skypix_id}, 

3482 ) 

3483 }, 

3484 overlapping_patches, 

3485 ) 

3486 # Test that a three-way join that includes the common skypix system in 

3487 # the dimensions doesn't generate redundant join terms in the query. 

3488 full_data_ids = set( 

3489 registry.queryDataIds( 

3490 ["tract", "visit", "htm7"], skymap="hsc_rings_v1", instrument="HSC" 

3491 ).expanded() 

3492 ) 

3493 self.assertGreater(len(full_data_ids), 0) 

3494 for data_id in full_data_ids: 

3495 self.assertFalse(data_id.records["tract"].region.isDisjointFrom(data_id.records["htm7"].region)) 

3496 self.assertFalse(data_id.records["visit"].region.isDisjointFrom(data_id.records["htm7"].region)) 

3497 

3498 def test_spatial_constraint_queries(self) -> None: 

3499 """Test queries in which one spatial dimension in the constraint (data 

3500 ID or ``where`` string) constrains a different spatial dimension in the 

3501 query result columns. 

3502 """ 

3503 registry = self.makeRegistry() 

3504 self.loadData(registry, "hsc-rc2-subset.yaml") 

3505 patch_regions = { 

3506 (data_id["tract"], data_id["patch"]): data_id.region 

3507 for data_id in registry.queryDataIds(["patch"]).expanded() 

3508 } 

3509 observation_regions = { 

3510 (data_id["visit"], data_id["detector"]): data_id.region 

3511 for data_id in registry.queryDataIds(["visit", "detector"]).expanded() 

3512 } 

3513 all_combos = { 

3514 (patch_key, observation_key) 

3515 for patch_key, observation_key in itertools.product(patch_regions, observation_regions) 

3516 } 

3517 overlapping_combos = { 

3518 (patch_key, observation_key) 

3519 for patch_key, observation_key in all_combos 

3520 if not patch_regions[patch_key].isDisjointFrom(observation_regions[observation_key]) 

3521 } 

3522 # Check a direct spatial join with no constraint first. 

3523 self.assertEqual( 

3524 { 

3525 ((data_id["tract"], data_id["patch"]), (data_id["visit"], data_id["detector"])) 

3526 for data_id in registry.queryDataIds(["patch", "visit", "detector"]) 

3527 }, 

3528 overlapping_combos, 

3529 ) 

3530 overlaps_by_patch: defaultdict[tuple[int, int], set[tuple[str, str]]] = defaultdict(set) 

3531 overlaps_by_observation: defaultdict[tuple[int, int], set[tuple[str, str]]] = defaultdict(set) 

3532 for patch_key, observation_key in overlapping_combos: 

3533 overlaps_by_patch[patch_key].add(observation_key) 

3534 overlaps_by_observation[observation_key].add(patch_key) 

3535 # Find patches and observations that overlap at least one of the other 

3536 # but not all of the other. 

3537 nontrivial_patch = next( 

3538 iter( 

3539 patch_key 

3540 for patch_key, observation_keys in overlaps_by_patch.items() 

3541 if observation_keys and observation_keys != observation_regions.keys() 

3542 ) 

3543 ) 

3544 nontrivial_observation = next( 

3545 iter( 

3546 observation_key 

3547 for observation_key, patch_keys in overlaps_by_observation.items() 

3548 if patch_keys and patch_keys != patch_regions.keys() 

3549 ) 

3550 ) 

3551 # Use the nontrivial patches and observations as constraints on the 

3552 # other dimensions in various ways, first via a 'where' expression. 

3553 # It's better in general to us 'bind' instead of f-strings, but these 

3554 # all integers so there are no quoting concerns. 

3555 self.assertEqual( 

3556 { 

3557 (data_id["visit"], data_id["detector"]) 

3558 for data_id in registry.queryDataIds( 

3559 ["visit", "detector"], 

3560 where=f"tract={nontrivial_patch[0]} AND patch={nontrivial_patch[1]}", 

3561 skymap="hsc_rings_v1", 

3562 ) 

3563 }, 

3564 overlaps_by_patch[nontrivial_patch], 

3565 ) 

3566 self.assertEqual( 

3567 { 

3568 (data_id["tract"], data_id["patch"]) 

3569 for data_id in registry.queryDataIds( 

3570 ["patch"], 

3571 where=f"visit={nontrivial_observation[0]} AND detector={nontrivial_observation[1]}", 

3572 instrument="HSC", 

3573 ) 

3574 }, 

3575 overlaps_by_observation[nontrivial_observation], 

3576 ) 

3577 # and then via the dataId argument. 

3578 self.assertEqual( 

3579 { 

3580 (data_id["visit"], data_id["detector"]) 

3581 for data_id in registry.queryDataIds( 

3582 ["visit", "detector"], 

3583 dataId={ 

3584 "tract": nontrivial_patch[0], 

3585 "patch": nontrivial_patch[1], 

3586 }, 

3587 skymap="hsc_rings_v1", 

3588 ) 

3589 }, 

3590 overlaps_by_patch[nontrivial_patch], 

3591 ) 

3592 self.assertEqual( 

3593 { 

3594 (data_id["tract"], data_id["patch"]) 

3595 for data_id in registry.queryDataIds( 

3596 ["patch"], 

3597 dataId={ 

3598 "visit": nontrivial_observation[0], 

3599 "detector": nontrivial_observation[1], 

3600 }, 

3601 instrument="HSC", 

3602 ) 

3603 }, 

3604 overlaps_by_observation[nontrivial_observation], 

3605 ) 

3606 

3607 def test_query_projection_drop_postprocessing(self) -> None: 

3608 """Test that projections and deduplications on query objects can 

3609 drop post-query region filtering to ensure the query remains in 

3610 the SQL engine. 

3611 """ 

3612 registry = self.makeRegistry() 

3613 self.loadData(registry, "base.yaml") 

3614 self.loadData(registry, "spatial.yaml") 

3615 

3616 def pop_transfer(tree: Relation) -> Relation: 

3617 """If a relation tree terminates with a transfer to a new engine, 

3618 return the relation prior to that transfer. If not, return the 

3619 original relation. 

3620 

3621 Parameters 

3622 ---------- 

3623 tree : `Relation` 

3624 The relation tree to modify. 

3625 """ 

3626 match tree: 

3627 case Transfer(target=target): 

3628 return target 

3629 case _: 

3630 return tree 

3631 

3632 # There's no public way to get a Query object yet, so we get one from a 

3633 # DataCoordinateQueryResults private attribute. When a public API is 

3634 # available this test should use it. 

3635 query = registry.queryDataIds(["visit", "detector", "tract", "patch"])._query 

3636 # We expect this query to terminate in the iteration engine originally, 

3637 # because region-filtering is necessary. 

3638 self.assertIsInstance(pop_transfer(query.relation).engine, iteration.Engine) 

3639 # If we deduplicate, we usually have to do that downstream of the 

3640 # filtering. That means the deduplication has to happen in the 

3641 # iteration engine. 

3642 self.assertIsInstance(pop_transfer(query.projected(unique=True).relation).engine, iteration.Engine) 

3643 # If we pass drop_postprocessing, we instead drop the region filtering 

3644 # so the deduplication can happen in SQL (though there might still be 

3645 # transfer to iteration at the tail of the tree that we can ignore; 

3646 # that's what the pop_transfer takes care of here). 

3647 self.assertIsInstance( 

3648 pop_transfer(query.projected(unique=True, drop_postprocessing=True).relation).engine, 

3649 sql.Engine, 

3650 ) 

3651 

3652 def test_query_find_datasets_drop_postprocessing(self) -> None: 

3653 """Test that DataCoordinateQueryResults.findDatasets avoids commutator 

3654 problems with the FindFirstDataset relation operation. 

3655 """ 

3656 # Setup: load some visit, tract, and patch records, and insert two 

3657 # datasets with dimensions {visit, patch}, with one in each of two 

3658 # RUN collections. 

3659 registry = self.makeRegistry() 

3660 self.loadData(registry, "base.yaml") 

3661 self.loadData(registry, "spatial.yaml") 

3662 storage_class = StorageClass("Warpy") 

3663 registry.storageClasses.registerStorageClass(storage_class) 

3664 dataset_type = DatasetType( 

3665 "warp", {"visit", "patch"}, storageClass=storage_class, universe=registry.dimensions 

3666 ) 

3667 registry.registerDatasetType(dataset_type) 

3668 (data_id,) = registry.queryDataIds(["visit", "patch"]).limit(1) 

3669 registry.registerRun("run1") 

3670 registry.registerRun("run2") 

3671 (ref1,) = registry.insertDatasets(dataset_type, [data_id], run="run1") 

3672 (ref2,) = registry.insertDatasets(dataset_type, [data_id], run="run2") 

3673 # Query for the dataset using queryDataIds(...).findDatasets(...) 

3674 # against only one of the two collections. This should work even 

3675 # though the relation returned by queryDataIds ends with 

3676 # iteration-engine region-filtering, because we can recognize before 

3677 # running the query that there is only one collecton to search and 

3678 # hence the (default) findFirst=True is irrelevant, and joining in the 

3679 # dataset query commutes past the iteration-engine postprocessing. 

3680 query1 = registry.queryDataIds( 

3681 {"visit", "patch"}, visit=data_id["visit"], instrument=data_id["instrument"] 

3682 ) 

3683 self.assertEqual( 

3684 set(query1.findDatasets(dataset_type.name, collections=["run1"])), 

3685 {ref1}, 

3686 ) 

3687 # Query for the dataset using queryDataIds(...).findDatasets(...) 

3688 # against both collections. This can only work if the FindFirstDataset 

3689 # operation can be commuted past the iteration-engine options into SQL. 

3690 query2 = registry.queryDataIds( 

3691 {"visit", "patch"}, visit=data_id["visit"], instrument=data_id["instrument"] 

3692 ) 

3693 self.assertEqual( 

3694 set(query2.findDatasets(dataset_type.name, collections=["run2", "run1"])), 

3695 {ref2}, 

3696 ) 

3697 

3698 def test_query_empty_collections(self) -> None: 

3699 """Test for registry query methods with empty collections. The methods 

3700 should return empty result set (or None when applicable) and provide 

3701 "doomed" diagnostics. 

3702 """ 

3703 registry = self.makeRegistry() 

3704 self.loadData(registry, "base.yaml") 

3705 self.loadData(registry, "datasets.yaml") 

3706 

3707 # Tests for registry.findDataset() 

3708 with self.assertRaises(NoDefaultCollectionError): 

3709 registry.findDataset("bias", instrument="Cam1", detector=1) 

3710 self.assertIsNotNone(registry.findDataset("bias", instrument="Cam1", detector=1, collections=...)) 

3711 self.assertIsNone(registry.findDataset("bias", instrument="Cam1", detector=1, collections=[])) 

3712 

3713 # Tests for registry.queryDatasets() 

3714 with self.assertRaises(NoDefaultCollectionError): 

3715 registry.queryDatasets("bias") 

3716 self.assertTrue(list(registry.queryDatasets("bias", collections=...))) 

3717 

3718 result = registry.queryDatasets("bias", collections=[]) 

3719 self.assertEqual(len(list(result)), 0) 

3720 messages = list(result.explain_no_results()) 

3721 self.assertTrue(messages) 

3722 self.assertTrue(any("because collection list is empty" in message for message in messages)) 

3723 

3724 # Tests for registry.queryDataIds() 

3725 with self.assertRaises(NoDefaultCollectionError): 

3726 registry.queryDataIds("detector", datasets="bias") 

3727 self.assertTrue(list(registry.queryDataIds("detector", datasets="bias", collections=...))) 

3728 

3729 result = registry.queryDataIds("detector", datasets="bias", collections=[]) 

3730 self.assertEqual(len(list(result)), 0) 

3731 messages = list(result.explain_no_results()) 

3732 self.assertTrue(messages) 

3733 self.assertTrue(any("because collection list is empty" in message for message in messages)) 

3734 

3735 # Tests for registry.queryDimensionRecords() 

3736 with self.assertRaises(NoDefaultCollectionError): 

3737 registry.queryDimensionRecords("detector", datasets="bias") 

3738 self.assertTrue(list(registry.queryDimensionRecords("detector", datasets="bias", collections=...))) 

3739 

3740 result = registry.queryDimensionRecords("detector", datasets="bias", collections=[]) 

3741 self.assertEqual(len(list(result)), 0) 

3742 messages = list(result.explain_no_results()) 

3743 self.assertTrue(messages) 

3744 self.assertTrue(any("because collection list is empty" in message for message in messages)) 

3745 

3746 def test_dataset_followup_spatial_joins(self) -> None: 

3747 """Test queryDataIds(...).findRelatedDatasets(...) where a spatial join 

3748 is involved. 

3749 """ 

3750 registry = self.makeRegistry() 

3751 self.loadData(registry, "base.yaml") 

3752 self.loadData(registry, "spatial.yaml") 

3753 pvi_dataset_type = DatasetType( 

3754 "pvi", {"visit", "detector"}, storageClass="StructuredDataDict", universe=registry.dimensions 

3755 ) 

3756 registry.registerDatasetType(pvi_dataset_type) 

3757 collection = "datasets" 

3758 registry.registerRun(collection) 

3759 (pvi1,) = registry.insertDatasets( 

3760 pvi_dataset_type, [{"instrument": "Cam1", "visit": 1, "detector": 1}], run=collection 

3761 ) 

3762 (pvi2,) = registry.insertDatasets( 

3763 pvi_dataset_type, [{"instrument": "Cam1", "visit": 1, "detector": 2}], run=collection 

3764 ) 

3765 (pvi3,) = registry.insertDatasets( 

3766 pvi_dataset_type, [{"instrument": "Cam1", "visit": 1, "detector": 3}], run=collection 

3767 ) 

3768 self.assertEqual( 

3769 set( 

3770 registry.queryDataIds(["patch"], skymap="SkyMap1", tract=0) 

3771 .expanded() 

3772 .findRelatedDatasets("pvi", [collection]) 

3773 ), 

3774 { 

3775 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=0), pvi1), 

3776 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=0), pvi2), 

3777 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=1), pvi2), 

3778 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=2), pvi1), 

3779 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=2), pvi2), 

3780 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=2), pvi3), 

3781 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=3), pvi2), 

3782 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=4), pvi3), 

3783 }, 

3784 )