Coverage for python/lsst/daf/butler/registry/tests/_registry.py: 6%

1534 statements  

« prev     ^ index     » next       coverage.py v7.4.4, created at 2024-04-05 10:00 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27from __future__ import annotations 

28 

29from ... import ddl 

30 

31__all__ = ["RegistryTests"] 

32 

33import datetime 

34import itertools 

35import os 

36import re 

37import time 

38import unittest 

39import uuid 

40from abc import ABC, abstractmethod 

41from collections import defaultdict, namedtuple 

42from collections.abc import Callable, Iterator 

43from concurrent.futures import ThreadPoolExecutor 

44from datetime import timedelta 

45from threading import Barrier 

46 

47import astropy.time 

48import sqlalchemy 

49 

50try: 

51 import numpy as np 

52except ImportError: 

53 np = None 

54 

55import lsst.sphgeom 

56from lsst.daf.relation import Relation, RelationalAlgebraError, Transfer, iteration, sql 

57 

58from ..._dataset_association import DatasetAssociation 

59from ..._dataset_ref import DatasetIdFactory, DatasetIdGenEnum, DatasetRef 

60from ..._dataset_type import DatasetType 

61from ..._exceptions import CollectionTypeError, MissingCollectionError, MissingDatasetTypeError 

62from ..._exceptions_legacy import DatasetTypeError 

63from ..._storage_class import StorageClass 

64from ..._timespan import Timespan 

65from ...dimensions import DataCoordinate, DataCoordinateSet, SkyPixDimension 

66from .._collection_summary import CollectionSummary 

67from .._collection_type import CollectionType 

68from .._config import RegistryConfig 

69from .._exceptions import ( 

70 ArgumentError, 

71 CollectionError, 

72 ConflictingDefinitionError, 

73 DataIdValueError, 

74 DatasetTypeExpressionError, 

75 InconsistentDataIdError, 

76 NoDefaultCollectionError, 

77 OrphanedRecordError, 

78) 

79from .._registry import Registry 

80from ..interfaces import ButlerAttributeExistsError 

81from ..sql_registry import SqlRegistry 

82 

83 

84class RegistryTests(ABC): 

85 """Generic tests for the `SqlRegistry` class that can be subclassed to 

86 generate tests for different configurations. 

87 """ 

88 

89 collectionsManager: str | None = None 

90 """Name of the collections manager class, if subclass provides value for 

91 this member then it overrides name specified in default configuration 

92 (`str`). 

93 """ 

94 

95 datasetsManager: str | dict[str, str] | None = None 

96 """Name or configuration dictionary of the datasets manager class, if 

97 subclass provides value for this member then it overrides name specified 

98 in default configuration (`str` or `dict`). 

99 """ 

100 

101 supportsCollectionRegex: bool = True 

102 """True if the registry class being tested supports regex searches for 

103 collections.""" 

104 

105 @classmethod 

106 @abstractmethod 

107 def getDataDir(cls) -> str: 

108 """Return the root directory containing test data YAML files.""" 

109 raise NotImplementedError() 

110 

111 def makeRegistryConfig(self) -> RegistryConfig: 

112 """Create RegistryConfig used to create a registry. 

113 

114 This method should be called by a subclass from `makeRegistry`. 

115 Returned instance will be pre-configured based on the values of class 

116 members, and default-configured for all other parameters. Subclasses 

117 that need default configuration should just instantiate 

118 `RegistryConfig` directly. 

119 """ 

120 config = RegistryConfig() 

121 if self.collectionsManager: 

122 config["managers", "collections"] = self.collectionsManager 

123 if self.datasetsManager: 

124 config["managers", "datasets"] = self.datasetsManager 

125 return config 

126 

127 @abstractmethod 

128 def makeRegistry(self, share_repo_with: Registry | None = None) -> Registry | None: 

129 """Return the Registry instance to be tested. 

130 

131 Parameters 

132 ---------- 

133 share_repo_with : `Registry`, optional 

134 If provided, the new registry should point to the same data 

135 repository as this existing registry. 

136 

137 Returns 

138 ------- 

139 registry : `Registry` 

140 New `Registry` instance, or `None` *only* if `share_repo_with` 

141 is not `None` and this test case does not support that argument 

142 (e.g. it is impossible with in-memory SQLite DBs). 

143 """ 

144 raise NotImplementedError() 

145 

146 def loadData(self, registry: SqlRegistry, filename: str) -> None: 

147 """Load registry test data from ``getDataDir/<filename>``, 

148 which should be a YAML import/export file. 

149 

150 Parameters 

151 ---------- 

152 registry : `SqlRegistry` 

153 The registry to load into. 

154 filename : `str` 

155 The name of the file to load. 

156 """ 

157 from ...transfers import YamlRepoImportBackend 

158 

159 with open(os.path.join(self.getDataDir(), filename)) as stream: 

160 backend = YamlRepoImportBackend(stream, registry) 

161 backend.register() 

162 backend.load(datastore=None) 

163 

164 def checkQueryResults(self, results, expected): 

165 """Check that a query results object contains expected values. 

166 

167 Parameters 

168 ---------- 

169 results : `DataCoordinateQueryResults` or `DatasetQueryResults` 

170 A lazy-evaluation query results object. 

171 expected : `list` 

172 A list of `DataCoordinate` o `DatasetRef` objects that should be 

173 equal to results of the query, aside from ordering. 

174 """ 

175 self.assertCountEqual(list(results), expected) 

176 self.assertEqual(results.count(), len(expected)) 

177 if expected: 

178 self.assertTrue(results.any()) 

179 else: 

180 self.assertFalse(results.any()) 

181 

182 def testOpaque(self): 

183 """Tests for `SqlRegistry.registerOpaqueTable`, 

184 `SqlRegistry.insertOpaqueData`, `SqlRegistry.fetchOpaqueData`, and 

185 `SqlRegistry.deleteOpaqueData`. 

186 """ 

187 registry = self.makeRegistry() 

188 table = "opaque_table_for_testing" 

189 registry.registerOpaqueTable( 

190 table, 

191 spec=ddl.TableSpec( 

192 fields=[ 

193 ddl.FieldSpec("id", dtype=sqlalchemy.BigInteger, primaryKey=True), 

194 ddl.FieldSpec("name", dtype=sqlalchemy.String, length=16, nullable=False), 

195 ddl.FieldSpec("count", dtype=sqlalchemy.SmallInteger, nullable=True), 

196 ], 

197 ), 

198 ) 

199 rows = [ 

200 {"id": 1, "name": "one", "count": None}, 

201 {"id": 2, "name": "two", "count": 5}, 

202 {"id": 3, "name": "three", "count": 6}, 

203 ] 

204 registry.insertOpaqueData(table, *rows) 

205 self.assertCountEqual(rows, list(registry.fetchOpaqueData(table))) 

206 self.assertEqual(rows[0:1], list(registry.fetchOpaqueData(table, id=1))) 

207 self.assertEqual(rows[1:2], list(registry.fetchOpaqueData(table, name="two"))) 

208 self.assertEqual(rows[0:1], list(registry.fetchOpaqueData(table, id=(1, 3), name=("one", "two")))) 

209 self.assertEqual(rows, list(registry.fetchOpaqueData(table, id=(1, 2, 3)))) 

210 # Test very long IN clause which exceeds sqlite limit on number of 

211 # parameters. SQLite says the limit is 32k but it looks like it is 

212 # much higher. 

213 self.assertEqual(rows, list(registry.fetchOpaqueData(table, id=list(range(300_000))))) 

214 # Two IN clauses, each longer than 1k batch size, first with 

215 # duplicates, second has matching elements in different batches (after 

216 # sorting). 

217 self.assertEqual( 

218 rows[0:2], 

219 list( 

220 registry.fetchOpaqueData( 

221 table, 

222 id=list(range(1000)) + list(range(100, 0, -1)), 

223 name=["one"] + [f"q{i}" for i in range(2200)] + ["two"], 

224 ) 

225 ), 

226 ) 

227 self.assertEqual([], list(registry.fetchOpaqueData(table, id=1, name="two"))) 

228 registry.deleteOpaqueData(table, id=3) 

229 self.assertCountEqual(rows[:2], list(registry.fetchOpaqueData(table))) 

230 registry.deleteOpaqueData(table) 

231 self.assertEqual([], list(registry.fetchOpaqueData(table))) 

232 

233 def testDatasetType(self): 

234 """Tests for `SqlRegistry.registerDatasetType` and 

235 `SqlRegistry.getDatasetType`. 

236 """ 

237 registry = self.makeRegistry() 

238 # Check valid insert 

239 datasetTypeName = "test" 

240 storageClass = StorageClass("testDatasetType") 

241 registry.storageClasses.registerStorageClass(storageClass) 

242 dimensions = registry.dimensions.conform(("instrument", "visit")) 

243 differentDimensions = registry.dimensions.conform(("instrument", "patch")) 

244 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

245 # Inserting for the first time should return True 

246 self.assertTrue(registry.registerDatasetType(inDatasetType)) 

247 outDatasetType1 = registry.getDatasetType(datasetTypeName) 

248 self.assertEqual(outDatasetType1, inDatasetType) 

249 

250 # Re-inserting should work 

251 self.assertFalse(registry.registerDatasetType(inDatasetType)) 

252 # Except when they are not identical 

253 with self.assertRaises(ConflictingDefinitionError): 

254 nonIdenticalDatasetType = DatasetType(datasetTypeName, differentDimensions, storageClass) 

255 registry.registerDatasetType(nonIdenticalDatasetType) 

256 

257 # Template can be None 

258 datasetTypeName = "testNoneTemplate" 

259 storageClass = StorageClass("testDatasetType2") 

260 registry.storageClasses.registerStorageClass(storageClass) 

261 dimensions = registry.dimensions.conform(("instrument", "visit")) 

262 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

263 registry.registerDatasetType(inDatasetType) 

264 outDatasetType2 = registry.getDatasetType(datasetTypeName) 

265 self.assertEqual(outDatasetType2, inDatasetType) 

266 

267 allTypes = set(registry.queryDatasetTypes()) 

268 self.assertEqual(allTypes, {outDatasetType1, outDatasetType2}) 

269 

270 def testDimensions(self): 

271 """Tests for `SqlRegistry.insertDimensionData`, 

272 `SqlRegistry.syncDimensionData`, and `SqlRegistry.expandDataId`. 

273 """ 

274 registry = self.makeRegistry() 

275 dimensionName = "instrument" 

276 dimension = registry.dimensions[dimensionName] 

277 dimensionValue = { 

278 "name": "DummyCam", 

279 "visit_max": 10, 

280 "visit_system": 0, 

281 "exposure_max": 10, 

282 "detector_max": 2, 

283 "class_name": "lsst.pipe.base.Instrument", 

284 } 

285 registry.insertDimensionData(dimensionName, dimensionValue) 

286 # Inserting the same value twice should fail 

287 with self.assertRaises(sqlalchemy.exc.IntegrityError): 

288 registry.insertDimensionData(dimensionName, dimensionValue) 

289 # expandDataId should retrieve the record we just inserted 

290 self.assertEqual( 

291 registry.expandDataId(instrument="DummyCam", dimensions=dimension.minimal_group) 

292 .records[dimensionName] 

293 .toDict(), 

294 dimensionValue, 

295 ) 

296 # expandDataId should raise if there is no record with the given ID. 

297 with self.assertRaises(DataIdValueError): 

298 registry.expandDataId({"instrument": "Unknown"}, dimensions=dimension.minimal_group) 

299 # band doesn't have a table; insert should fail. 

300 with self.assertRaises(TypeError): 

301 registry.insertDimensionData("band", {"band": "i"}) 

302 dimensionName2 = "physical_filter" 

303 dimension2 = registry.dimensions[dimensionName2] 

304 dimensionValue2 = {"name": "DummyCam_i", "band": "i"} 

305 # Missing required dependency ("instrument") should fail 

306 with self.assertRaises(KeyError): 

307 registry.insertDimensionData(dimensionName2, dimensionValue2) 

308 # Adding required dependency should fix the failure 

309 dimensionValue2["instrument"] = "DummyCam" 

310 registry.insertDimensionData(dimensionName2, dimensionValue2) 

311 # expandDataId should retrieve the record we just inserted. 

312 self.assertEqual( 

313 registry.expandDataId( 

314 instrument="DummyCam", physical_filter="DummyCam_i", dimensions=dimension2.minimal_group 

315 ) 

316 .records[dimensionName2] 

317 .toDict(), 

318 dimensionValue2, 

319 ) 

320 # Use syncDimensionData to insert a new record successfully. 

321 dimensionName3 = "detector" 

322 dimensionValue3 = { 

323 "instrument": "DummyCam", 

324 "id": 1, 

325 "full_name": "one", 

326 "name_in_raft": "zero", 

327 "purpose": "SCIENCE", 

328 } 

329 self.assertTrue(registry.syncDimensionData(dimensionName3, dimensionValue3)) 

330 # Sync that again. Note that one field ("raft") is NULL, and that 

331 # should be okay. 

332 self.assertFalse(registry.syncDimensionData(dimensionName3, dimensionValue3)) 

333 # Now try that sync with the same primary key but a different value. 

334 # This should fail. 

335 with self.assertRaises(ConflictingDefinitionError): 

336 registry.syncDimensionData( 

337 dimensionName3, 

338 { 

339 "instrument": "DummyCam", 

340 "id": 1, 

341 "full_name": "one", 

342 "name_in_raft": "four", 

343 "purpose": "SCIENCE", 

344 }, 

345 ) 

346 

347 @unittest.skipIf(np is None, "numpy not available.") 

348 def testNumpyDataId(self): 

349 """Test that we can use a numpy int in a dataId.""" 

350 registry = self.makeRegistry() 

351 dimensionEntries = [ 

352 ("instrument", {"instrument": "DummyCam"}), 

353 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}), 

354 ("day_obs", {"instrument": "DummyCam", "id": 20250101}), 

355 # Using an np.int64 here fails unless Records.fromDict is also 

356 # patched to look for numbers.Integral 

357 ( 

358 "visit", 

359 { 

360 "instrument": "DummyCam", 

361 "id": 42, 

362 "name": "fortytwo", 

363 "physical_filter": "d-r", 

364 "day_obs": 20250101, 

365 }, 

366 ), 

367 ] 

368 for args in dimensionEntries: 

369 registry.insertDimensionData(*args) 

370 

371 # Try a normal integer and something that looks like an int but 

372 # is not. 

373 for visit_id in (42, np.int64(42)): 

374 with self.subTest(visit_id=visit_id, id_type=type(visit_id).__name__): 

375 expanded = registry.expandDataId({"instrument": "DummyCam", "visit": visit_id}) 

376 self.assertEqual(expanded["visit"], int(visit_id)) 

377 self.assertIsInstance(expanded["visit"], int) 

378 

379 def testDataIdRelationships(self): 

380 """Test that `SqlRegistry.expandDataId` raises an exception when the 

381 given keys are inconsistent. 

382 """ 

383 registry = self.makeRegistry() 

384 self.loadData(registry, "base.yaml") 

385 # Insert a few more dimension records for the next test. 

386 registry.insertDimensionData( 

387 "day_obs", 

388 {"instrument": "Cam1", "id": 20250101}, 

389 ) 

390 registry.insertDimensionData( 

391 "group", 

392 {"instrument": "Cam1", "name": "group1"}, 

393 ) 

394 registry.insertDimensionData( 

395 "exposure", 

396 { 

397 "instrument": "Cam1", 

398 "id": 1, 

399 "obs_id": "one", 

400 "physical_filter": "Cam1-G", 

401 "group": "group1", 

402 "day_obs": 20250101, 

403 }, 

404 ) 

405 registry.insertDimensionData( 

406 "group", 

407 {"instrument": "Cam1", "name": "group2"}, 

408 ) 

409 registry.insertDimensionData( 

410 "exposure", 

411 { 

412 "instrument": "Cam1", 

413 "id": 2, 

414 "obs_id": "two", 

415 "physical_filter": "Cam1-G", 

416 "group": "group2", 

417 "day_obs": 20250101, 

418 }, 

419 ) 

420 registry.insertDimensionData( 

421 "visit_system", 

422 {"instrument": "Cam1", "id": 0, "name": "one-to-one"}, 

423 ) 

424 registry.insertDimensionData( 

425 "visit", 

426 {"instrument": "Cam1", "id": 1, "name": "one", "physical_filter": "Cam1-G", "day_obs": 20250101}, 

427 ) 

428 registry.insertDimensionData( 

429 "visit_definition", 

430 {"instrument": "Cam1", "visit": 1, "exposure": 1}, 

431 ) 

432 with self.assertRaises(InconsistentDataIdError): 

433 registry.expandDataId( 

434 {"instrument": "Cam1", "visit": 1, "exposure": 2}, 

435 ) 

436 

437 def testDataset(self): 

438 """Basic tests for `SqlRegistry.insertDatasets`, 

439 `SqlRegistry.getDataset`, and `SqlRegistry.removeDatasets`. 

440 """ 

441 registry = self.makeRegistry() 

442 self.loadData(registry, "base.yaml") 

443 run = "tésτ" 

444 registry.registerRun(run) 

445 datasetType = registry.getDatasetType("bias") 

446 dataId = {"instrument": "Cam1", "detector": 2} 

447 (ref,) = registry.insertDatasets(datasetType, dataIds=[dataId], run=run) 

448 outRef = registry.getDataset(ref.id) 

449 self.assertIsNotNone(ref.id) 

450 self.assertEqual(ref, outRef) 

451 with self.assertRaises(ConflictingDefinitionError): 

452 registry.insertDatasets(datasetType, dataIds=[dataId], run=run) 

453 registry.removeDatasets([ref]) 

454 self.assertIsNone(registry.findDataset(datasetType, dataId, collections=[run])) 

455 

456 def testFindDataset(self): 

457 """Tests for `SqlRegistry.findDataset`.""" 

458 registry = self.makeRegistry() 

459 self.loadData(registry, "base.yaml") 

460 run = "tésτ" 

461 datasetType = registry.getDatasetType("bias") 

462 dataId = {"instrument": "Cam1", "detector": 4} 

463 registry.registerRun(run) 

464 (inputRef,) = registry.insertDatasets(datasetType, dataIds=[dataId], run=run) 

465 outputRef = registry.findDataset(datasetType, dataId, collections=[run]) 

466 self.assertEqual(outputRef, inputRef) 

467 # Check that retrieval with invalid dataId raises 

468 with self.assertRaises(LookupError): 

469 dataId = {"instrument": "Cam1"} # no detector 

470 registry.findDataset(datasetType, dataId, collections=run) 

471 # Check that different dataIds match to different datasets 

472 dataId1 = {"instrument": "Cam1", "detector": 1} 

473 (inputRef1,) = registry.insertDatasets(datasetType, dataIds=[dataId1], run=run) 

474 dataId2 = {"instrument": "Cam1", "detector": 2} 

475 (inputRef2,) = registry.insertDatasets(datasetType, dataIds=[dataId2], run=run) 

476 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef1) 

477 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef2) 

478 self.assertNotEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef2) 

479 self.assertNotEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef1) 

480 # Check that requesting a non-existing dataId returns None 

481 nonExistingDataId = {"instrument": "Cam1", "detector": 3} 

482 self.assertIsNone(registry.findDataset(datasetType, nonExistingDataId, collections=run)) 

483 # Search more than one collection, in which two have the right 

484 # dataset type and another does not. 

485 registry.registerRun("empty") 

486 self.loadData(registry, "datasets.yaml") 

487 bias1 = registry.findDataset("bias", instrument="Cam1", detector=2, collections=["imported_g"]) 

488 self.assertIsNotNone(bias1) 

489 bias2 = registry.findDataset("bias", instrument="Cam1", detector=2, collections=["imported_r"]) 

490 self.assertIsNotNone(bias2) 

491 self.assertEqual( 

492 bias1, 

493 registry.findDataset( 

494 "bias", instrument="Cam1", detector=2, collections=["empty", "imported_g", "imported_r"] 

495 ), 

496 ) 

497 self.assertEqual( 

498 bias2, 

499 registry.findDataset( 

500 "bias", instrument="Cam1", detector=2, collections=["empty", "imported_r", "imported_g"] 

501 ), 

502 ) 

503 # Search more than one collection, with one of them a CALIBRATION 

504 # collection. 

505 registry.registerCollection("Cam1/calib", CollectionType.CALIBRATION) 

506 timespan = Timespan( 

507 begin=astropy.time.Time("2020-01-01T01:00:00", format="isot", scale="tai"), 

508 end=astropy.time.Time("2020-01-01T02:00:00", format="isot", scale="tai"), 

509 ) 

510 registry.certify("Cam1/calib", [bias2], timespan=timespan) 

511 self.assertEqual( 

512 bias1, 

513 registry.findDataset( 

514 "bias", 

515 instrument="Cam1", 

516 detector=2, 

517 collections=["empty", "imported_g", "Cam1/calib"], 

518 timespan=timespan, 

519 ), 

520 ) 

521 self.assertEqual( 

522 bias2, 

523 registry.findDataset( 

524 "bias", 

525 instrument="Cam1", 

526 detector=2, 

527 collections=["empty", "Cam1/calib", "imported_g"], 

528 timespan=timespan, 

529 ), 

530 ) 

531 # If we try to search those same collections without a timespan, it 

532 # should still work, since the CALIBRATION collection is ignored. 

533 self.assertEqual( 

534 bias1, 

535 registry.findDataset( 

536 "bias", instrument="Cam1", detector=2, collections=["empty", "imported_g", "Cam1/calib"] 

537 ), 

538 ) 

539 self.assertEqual( 

540 bias1, 

541 registry.findDataset( 

542 "bias", instrument="Cam1", detector=2, collections=["empty", "Cam1/calib", "imported_g"] 

543 ), 

544 ) 

545 

546 def testRemoveDatasetTypeSuccess(self): 

547 """Test that SqlRegistry.removeDatasetType works when there are no 

548 datasets of that type present. 

549 """ 

550 registry = self.makeRegistry() 

551 self.loadData(registry, "base.yaml") 

552 registry.removeDatasetType("flat") 

553 with self.assertRaises(MissingDatasetTypeError): 

554 registry.getDatasetType("flat") 

555 

556 def testRemoveDatasetTypeFailure(self): 

557 """Test that SqlRegistry.removeDatasetType raises when there are 

558 datasets of that type present or if the dataset type is for a 

559 component. 

560 """ 

561 registry = self.makeRegistry() 

562 self.loadData(registry, "base.yaml") 

563 self.loadData(registry, "datasets.yaml") 

564 with self.assertRaises(OrphanedRecordError): 

565 registry.removeDatasetType("flat") 

566 with self.assertRaises(DatasetTypeError): 

567 registry.removeDatasetType(DatasetType.nameWithComponent("flat", "image")) 

568 

569 def testImportDatasetsUUID(self): 

570 """Test for `SqlRegistry._importDatasets` with UUID dataset ID.""" 

571 if isinstance(self.datasetsManager, str): 

572 if not self.datasetsManager.endswith(".ByDimensionsDatasetRecordStorageManagerUUID"): 

573 self.skipTest(f"Unexpected dataset manager {self.datasetsManager}") 

574 elif isinstance(self.datasetsManager, dict) and not self.datasetsManager["cls"].endswith( 

575 ".ByDimensionsDatasetRecordStorageManagerUUID" 

576 ): 

577 self.skipTest(f"Unexpected dataset manager {self.datasetsManager['cls']}") 

578 

579 registry = self.makeRegistry() 

580 self.loadData(registry, "base.yaml") 

581 for run in range(6): 

582 registry.registerRun(f"run{run}") 

583 datasetTypeBias = registry.getDatasetType("bias") 

584 datasetTypeFlat = registry.getDatasetType("flat") 

585 dataIdBias1 = {"instrument": "Cam1", "detector": 1} 

586 dataIdBias2 = {"instrument": "Cam1", "detector": 2} 

587 dataIdFlat1 = {"instrument": "Cam1", "detector": 1, "physical_filter": "Cam1-G", "band": "g"} 

588 

589 ref = DatasetRef(datasetTypeBias, dataIdBias1, run="run0") 

590 (ref1,) = registry._importDatasets([ref]) 

591 # UUID is used without change 

592 self.assertEqual(ref.id, ref1.id) 

593 

594 # All different failure modes 

595 refs = ( 

596 # Importing same DatasetRef with different dataset ID is an error 

597 DatasetRef(datasetTypeBias, dataIdBias1, run="run0"), 

598 # Same DatasetId but different DataId 

599 DatasetRef(datasetTypeBias, dataIdBias2, id=ref1.id, run="run0"), 

600 DatasetRef(datasetTypeFlat, dataIdFlat1, id=ref1.id, run="run0"), 

601 # Same DatasetRef and DatasetId but different run 

602 DatasetRef(datasetTypeBias, dataIdBias1, id=ref1.id, run="run1"), 

603 ) 

604 for ref in refs: 

605 with self.assertRaises(ConflictingDefinitionError): 

606 registry._importDatasets([ref]) 

607 

608 # Test for non-unique IDs, they can be re-imported multiple times. 

609 for run, idGenMode in ((2, DatasetIdGenEnum.DATAID_TYPE), (4, DatasetIdGenEnum.DATAID_TYPE_RUN)): 

610 with self.subTest(idGenMode=idGenMode): 

611 # Make dataset ref with reproducible dataset ID. 

612 ref = DatasetRef(datasetTypeBias, dataIdBias1, run=f"run{run}", id_generation_mode=idGenMode) 

613 (ref1,) = registry._importDatasets([ref]) 

614 self.assertIsInstance(ref1.id, uuid.UUID) 

615 self.assertEqual(ref1.id.version, 5) 

616 self.assertEqual(ref1.id, ref.id) 

617 

618 # Importing it again is OK 

619 (ref2,) = registry._importDatasets([ref1]) 

620 self.assertEqual(ref2.id, ref1.id) 

621 

622 # Cannot import to different run with the same ID 

623 ref = DatasetRef(datasetTypeBias, dataIdBias1, id=ref1.id, run=f"run{run+1}") 

624 with self.assertRaises(ConflictingDefinitionError): 

625 registry._importDatasets([ref]) 

626 

627 ref = DatasetRef( 

628 datasetTypeBias, dataIdBias1, run=f"run{run+1}", id_generation_mode=idGenMode 

629 ) 

630 if idGenMode is DatasetIdGenEnum.DATAID_TYPE: 

631 # Cannot import same DATAID_TYPE ref into a new run 

632 with self.assertRaises(ConflictingDefinitionError): 

633 (ref2,) = registry._importDatasets([ref]) 

634 else: 

635 # DATAID_TYPE_RUN ref can be imported into a new run 

636 (ref2,) = registry._importDatasets([ref]) 

637 

638 def testComponentLookups(self): 

639 """Test searching for component datasets via their parents. 

640 

641 Components can no longer be found by registry. This test checks 

642 that this now fails. 

643 """ 

644 registry = self.makeRegistry() 

645 self.loadData(registry, "base.yaml") 

646 self.loadData(registry, "datasets.yaml") 

647 # Test getting the child dataset type (which does still exist in the 

648 # Registry), and check for consistency with 

649 # DatasetRef.makeComponentRef. 

650 collection = "imported_g" 

651 parentType = registry.getDatasetType("bias") 

652 childType = registry.getDatasetType("bias.wcs") 

653 parentRefResolved = registry.findDataset( 

654 parentType, collections=collection, instrument="Cam1", detector=1 

655 ) 

656 self.assertIsInstance(parentRefResolved, DatasetRef) 

657 self.assertEqual(childType, parentRefResolved.makeComponentRef("wcs").datasetType) 

658 # Search for a single dataset with findDataset. 

659 with self.assertRaises(DatasetTypeError): 

660 registry.findDataset("bias.wcs", collections=collection, dataId=parentRefResolved.dataId) 

661 

662 def testCollections(self): 

663 """Tests for registry methods that manage collections.""" 

664 registry = self.makeRegistry() 

665 other_registry = self.makeRegistry(share_repo_with=registry) 

666 self.loadData(registry, "base.yaml") 

667 self.loadData(registry, "datasets.yaml") 

668 run1 = "imported_g" 

669 run2 = "imported_r" 

670 # Test setting a collection docstring after it has been created. 

671 registry.setCollectionDocumentation(run1, "doc for run1") 

672 self.assertEqual(registry.getCollectionDocumentation(run1), "doc for run1") 

673 registry.setCollectionDocumentation(run1, None) 

674 self.assertIsNone(registry.getCollectionDocumentation(run1)) 

675 datasetType = "bias" 

676 # Find some datasets via their run's collection. 

677 dataId1 = {"instrument": "Cam1", "detector": 1} 

678 ref1 = registry.findDataset(datasetType, dataId1, collections=run1) 

679 self.assertIsNotNone(ref1) 

680 dataId2 = {"instrument": "Cam1", "detector": 2} 

681 ref2 = registry.findDataset(datasetType, dataId2, collections=run1) 

682 self.assertIsNotNone(ref2) 

683 # Associate those into a new collection, then look for them there. 

684 tag1 = "tag1" 

685 registry.registerCollection(tag1, type=CollectionType.TAGGED, doc="doc for tag1") 

686 # Check that we can query for old and new collections by type. 

687 self.assertEqual(set(registry.queryCollections(collectionTypes=CollectionType.RUN)), {run1, run2}) 

688 self.assertEqual( 

689 set(registry.queryCollections(collectionTypes={CollectionType.TAGGED, CollectionType.RUN})), 

690 {tag1, run1, run2}, 

691 ) 

692 self.assertEqual(registry.getCollectionDocumentation(tag1), "doc for tag1") 

693 registry.associate(tag1, [ref1, ref2]) 

694 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1) 

695 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

696 # Disassociate one and verify that we can't it there anymore... 

697 registry.disassociate(tag1, [ref1]) 

698 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=tag1)) 

699 # ...but we can still find ref2 in tag1, and ref1 in the run. 

700 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run1), ref1) 

701 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

702 collections = set(registry.queryCollections()) 

703 self.assertEqual(collections, {run1, run2, tag1}) 

704 # Associate both refs into tag1 again; ref2 is already there, but that 

705 # should be a harmless no-op. 

706 registry.associate(tag1, [ref1, ref2]) 

707 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1) 

708 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

709 # Get a different dataset (from a different run) that has the same 

710 # dataset type and data ID as ref2. 

711 ref2b = registry.findDataset(datasetType, dataId2, collections=run2) 

712 self.assertNotEqual(ref2, ref2b) 

713 # Attempting to associate that into tag1 should be an error. 

714 with self.assertRaises(ConflictingDefinitionError): 

715 registry.associate(tag1, [ref2b]) 

716 # That error shouldn't have messed up what we had before. 

717 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1) 

718 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

719 # Attempt to associate the conflicting dataset again, this time with 

720 # a dataset that isn't in the collection and won't cause a conflict. 

721 # Should also fail without modifying anything. 

722 dataId3 = {"instrument": "Cam1", "detector": 3} 

723 ref3 = registry.findDataset(datasetType, dataId3, collections=run1) 

724 with self.assertRaises(ConflictingDefinitionError): 

725 registry.associate(tag1, [ref3, ref2b]) 

726 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1) 

727 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

728 self.assertIsNone(registry.findDataset(datasetType, dataId3, collections=tag1)) 

729 # Register a chained collection that searches [tag1, run2] 

730 chain1 = "chain1" 

731 registry.registerCollection(chain1, type=CollectionType.CHAINED) 

732 self.assertIs(registry.getCollectionType(chain1), CollectionType.CHAINED) 

733 # Chained collection exists, but has no collections in it. 

734 self.assertFalse(registry.getCollectionChain(chain1)) 

735 # If we query for all collections, we should get the chained collection 

736 # only if we don't ask to flatten it (i.e. yield only its children). 

737 self.assertEqual(set(registry.queryCollections(flattenChains=False)), {tag1, run1, run2, chain1}) 

738 self.assertEqual(set(registry.queryCollections(flattenChains=True)), {tag1, run1, run2}) 

739 # Attempt to set its child collections to something circular; that 

740 # should fail. 

741 with self.assertRaises(ValueError): 

742 registry.setCollectionChain(chain1, [tag1, chain1]) 

743 # Add the child collections. 

744 registry.setCollectionChain(chain1, [tag1, run2]) 

745 self.assertEqual(list(registry.getCollectionChain(chain1)), [tag1, run2]) 

746 self.assertEqual(registry.getCollectionParentChains(tag1), {chain1}) 

747 self.assertEqual(registry.getCollectionParentChains(run2), {chain1}) 

748 # Refresh the other registry that points to the same repo, and make 

749 # sure it can see the things we've done (note that this does require 

750 # an explicit refresh(); that's the documented behavior, because 

751 # caching is ~impossible otherwise). 

752 if other_registry is not None: 

753 other_registry.refresh() 

754 self.assertEqual(list(other_registry.getCollectionChain(chain1)), [tag1, run2]) 

755 self.assertEqual(other_registry.getCollectionParentChains(tag1), {chain1}) 

756 self.assertEqual(other_registry.getCollectionParentChains(run2), {chain1}) 

757 # Searching for dataId1 or dataId2 in the chain should return ref1 and 

758 # ref2, because both are in tag1. 

759 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain1), ref1) 

760 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain1), ref2) 

761 # Now disassociate ref2 from tag1. The search (for bias) with 

762 # dataId2 in chain1 should then: 

763 # 1. not find it in tag1 

764 # 2. find a different dataset in run2 

765 registry.disassociate(tag1, [ref2]) 

766 ref2b = registry.findDataset(datasetType, dataId2, collections=chain1) 

767 self.assertNotEqual(ref2b, ref2) 

768 self.assertEqual(ref2b, registry.findDataset(datasetType, dataId2, collections=run2)) 

769 # Define a new chain so we can test recursive chains. 

770 chain2 = "chain2" 

771 registry.registerCollection(chain2, type=CollectionType.CHAINED) 

772 registry.setCollectionChain(chain2, [run2, chain1]) 

773 self.assertEqual(registry.getCollectionParentChains(chain1), {chain2}) 

774 self.assertEqual(registry.getCollectionParentChains(run2), {chain1, chain2}) 

775 

776 if self.supportsCollectionRegex: 

777 # Query for collections matching a regex. 

778 self.assertCountEqual( 

779 list(registry.queryCollections(re.compile("imported_."), flattenChains=False)), 

780 ["imported_r", "imported_g"], 

781 ) 

782 # Query for collections matching a regex or an explicit str. 

783 self.assertCountEqual( 

784 list(registry.queryCollections([re.compile("imported_."), "chain1"], flattenChains=False)), 

785 ["imported_r", "imported_g", "chain1"], 

786 ) 

787 # Same queries as the regex ones above, but using globs instead of 

788 # regex. 

789 self.assertCountEqual( 

790 list(registry.queryCollections("imported_*", flattenChains=False)), 

791 ["imported_r", "imported_g"], 

792 ) 

793 # Query for collections matching a regex or an explicit str. 

794 self.assertCountEqual( 

795 list(registry.queryCollections(["imported_*", "chain1"], flattenChains=False)), 

796 ["imported_r", "imported_g", "chain1"], 

797 ) 

798 

799 # Search for bias with dataId1 should find it via tag1 in chain2, 

800 # recursing, because is not in run1. 

801 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=run2)) 

802 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain2), ref1) 

803 # Search for bias with dataId2 should find it in run2 (ref2b). 

804 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain2), ref2b) 

805 # Search for a flat that is in run2. That should not be found 

806 # at the front of chain2, because of the restriction to bias 

807 # on run2 there, but it should be found in at the end of chain1. 

808 dataId4 = {"instrument": "Cam1", "detector": 3, "physical_filter": "Cam1-R2"} 

809 ref4 = registry.findDataset("flat", dataId4, collections=run2) 

810 self.assertIsNotNone(ref4) 

811 self.assertEqual(ref4, registry.findDataset("flat", dataId4, collections=chain2)) 

812 # Deleting a collection that's part of a CHAINED collection is not 

813 # allowed, and is exception-safe. 

814 with self.assertRaises(sqlalchemy.exc.IntegrityError): 

815 registry.removeCollection(run2) 

816 self.assertEqual(registry.getCollectionType(run2), CollectionType.RUN) 

817 with self.assertRaises(sqlalchemy.exc.IntegrityError): 

818 registry.removeCollection(chain1) 

819 self.assertEqual(registry.getCollectionType(chain1), CollectionType.CHAINED) 

820 # Actually remove chain2, test that it's gone by asking for its type. 

821 registry.removeCollection(chain2) 

822 with self.assertRaises(MissingCollectionError): 

823 registry.getCollectionType(chain2) 

824 # Actually remove run2 and chain1, which should work now. 

825 registry.removeCollection(chain1) 

826 registry.removeCollection(run2) 

827 with self.assertRaises(MissingCollectionError): 

828 registry.getCollectionType(run2) 

829 with self.assertRaises(MissingCollectionError): 

830 registry.getCollectionType(chain1) 

831 # Remove tag1 as well, just to test that we can remove TAGGED 

832 # collections. 

833 registry.removeCollection(tag1) 

834 with self.assertRaises(MissingCollectionError): 

835 registry.getCollectionType(tag1) 

836 

837 def testCollectionChainFlatten(self): 

838 """Test that `SqlRegistry.setCollectionChain` obeys its 'flatten' 

839 option. 

840 """ 

841 registry = self.makeRegistry() 

842 registry.registerCollection("inner", CollectionType.CHAINED) 

843 registry.registerCollection("innermost", CollectionType.RUN) 

844 registry.setCollectionChain("inner", ["innermost"]) 

845 registry.registerCollection("outer", CollectionType.CHAINED) 

846 registry.setCollectionChain("outer", ["inner"], flatten=False) 

847 self.assertEqual(list(registry.getCollectionChain("outer")), ["inner"]) 

848 registry.setCollectionChain("outer", ["inner"], flatten=True) 

849 self.assertEqual(list(registry.getCollectionChain("outer")), ["innermost"]) 

850 

851 def testCollectionChainPrependConcurrency(self): 

852 """Verify that locking via database row locks is working as 

853 expected. 

854 """ 

855 

856 def blocked_thread_func(registry: SqlRegistry): 

857 # This call will become blocked after it has decided on positions 

858 # for the new children in the collection chain, but before 

859 # inserting them. 

860 registry._managers.collections.prepend_collection_chain("chain", ["a"]) 

861 

862 def unblocked_thread_func(registry: SqlRegistry): 

863 registry._managers.collections.prepend_collection_chain("chain", ["b"]) 

864 

865 registry = self._do_collection_concurrency_test(blocked_thread_func, unblocked_thread_func) 

866 

867 # blocked_thread_func should have finished first, inserting "a". 

868 # unblocked_thread_func should have finished second, prepending "b". 

869 self.assertEqual(("b", "a"), registry.getCollectionChain("chain")) 

870 

871 def testCollectionChainReplaceConcurrency(self): 

872 """Verify that locking via database row locks is working as 

873 expected. 

874 """ 

875 

876 def blocked_thread_func(registry: SqlRegistry): 

877 # This call will become blocked after deleting children, but before 

878 # inserting new ones. 

879 registry.setCollectionChain("chain", ["a"]) 

880 

881 def unblocked_thread_func(registry: SqlRegistry): 

882 registry.setCollectionChain("chain", ["b"]) 

883 

884 registry = self._do_collection_concurrency_test(blocked_thread_func, unblocked_thread_func) 

885 

886 # blocked_thread_func should have finished first. 

887 # unblocked_thread_func should have finished second, overwriting the 

888 # chain with "b". 

889 self.assertEqual(("b",), registry.getCollectionChain("chain")) 

890 

891 def _do_collection_concurrency_test( 

892 self, blocked_thread_func: Callable[[SqlRegistry]], unblocked_thread_func: Callable[[SqlRegistry]] 

893 ) -> SqlRegistry: 

894 # This function: 

895 # 1. Sets up two registries pointing at the same database. 

896 # 2. Start running 'blocked_thread_func' in a background thread, 

897 # arranging for it to become blocked during a critical section in 

898 # the collections manager. 

899 # 3. Wait for 'blocked_thread_func' to reach the critical section 

900 # 4. Start running 'unblocked_thread_func'. 

901 # 5. Allow both functions to run to completion. 

902 

903 # Set up two registries pointing to the same DB 

904 registry1 = self.makeRegistry() 

905 assert isinstance(registry1, SqlRegistry) 

906 registry2 = self.makeRegistry(share_repo_with=registry1) 

907 if registry2 is None: 

908 # This will happen for in-memory SQL databases. 

909 raise unittest.SkipTest("Testing concurrency requires two connections to the same DB.") 

910 

911 registry1.registerCollection("chain", CollectionType.CHAINED) 

912 for collection in ["a", "b"]: 

913 registry1.registerCollection(collection) 

914 

915 # Arrange for registry1 to block during its critical section, allowing 

916 # us to detect this and control when it becomes unblocked. 

917 enter_barrier = Barrier(2, timeout=60) 

918 exit_barrier = Barrier(2, timeout=60) 

919 

920 def wait_for_barrier(): 

921 enter_barrier.wait() 

922 exit_barrier.wait() 

923 

924 registry1._managers.collections._block_for_concurrency_test = wait_for_barrier 

925 

926 with ThreadPoolExecutor(max_workers=1) as exec1: 

927 with ThreadPoolExecutor(max_workers=1) as exec2: 

928 future1 = exec1.submit(blocked_thread_func, registry1) 

929 enter_barrier.wait() 

930 

931 # At this point registry 1 has entered the critical section and 

932 # is waiting for us to release it. Start the other thread. 

933 future2 = exec2.submit(unblocked_thread_func, registry2) 

934 # thread2 should block inside a database call, but we have no 

935 # way to detect when it is in this state. 

936 time.sleep(0.200) 

937 

938 # Let the threads run to completion. 

939 exit_barrier.wait() 

940 future1.result() 

941 future2.result() 

942 

943 return registry1 

944 

945 def testBasicTransaction(self): 

946 """Test that all operations within a single transaction block are 

947 rolled back if an exception propagates out of the block. 

948 """ 

949 registry = self.makeRegistry() 

950 storageClass = StorageClass("testDatasetType") 

951 registry.storageClasses.registerStorageClass(storageClass) 

952 with registry.transaction(): 

953 registry.insertDimensionData("instrument", {"name": "Cam1", "class_name": "A"}) 

954 with self.assertRaises(ValueError): 

955 with registry.transaction(): 

956 registry.insertDimensionData("instrument", {"name": "Cam2"}) 

957 raise ValueError("Oops, something went wrong") 

958 # Cam1 should exist 

959 self.assertEqual(registry.expandDataId(instrument="Cam1").records["instrument"].class_name, "A") 

960 # But Cam2 and Cam3 should both not exist 

961 with self.assertRaises(DataIdValueError): 

962 registry.expandDataId(instrument="Cam2") 

963 with self.assertRaises(DataIdValueError): 

964 registry.expandDataId(instrument="Cam3") 

965 

966 def testNestedTransaction(self): 

967 """Test that operations within a transaction block are not rolled back 

968 if an exception propagates out of an inner transaction block and is 

969 then caught. 

970 """ 

971 registry = self.makeRegistry() 

972 dimension = registry.dimensions["instrument"] 

973 dataId1 = {"instrument": "DummyCam"} 

974 dataId2 = {"instrument": "DummyCam2"} 

975 checkpointReached = False 

976 with registry.transaction(): 

977 # This should be added and (ultimately) committed. 

978 registry.insertDimensionData(dimension, dataId1) 

979 with self.assertRaises(sqlalchemy.exc.IntegrityError): 

980 with registry.transaction(savepoint=True): 

981 # This does not conflict, and should succeed (but not 

982 # be committed). 

983 registry.insertDimensionData(dimension, dataId2) 

984 checkpointReached = True 

985 # This should conflict and raise, triggerring a rollback 

986 # of the previous insertion within the same transaction 

987 # context, but not the original insertion in the outer 

988 # block. 

989 registry.insertDimensionData(dimension, dataId1) 

990 self.assertTrue(checkpointReached) 

991 self.assertIsNotNone(registry.expandDataId(dataId1, dimensions=dimension.minimal_group)) 

992 with self.assertRaises(DataIdValueError): 

993 registry.expandDataId(dataId2, dimensions=dimension.minimal_group) 

994 

995 def testInstrumentDimensions(self): 

996 """Test queries involving only instrument dimensions, with no joins to 

997 skymap. 

998 """ 

999 registry = self.makeRegistry() 

1000 

1001 # need a bunch of dimensions and datasets for test 

1002 registry.insertDimensionData( 

1003 "instrument", dict(name="DummyCam", visit_max=25, exposure_max=300, detector_max=6) 

1004 ) 

1005 registry.insertDimensionData("day_obs", dict(instrument="DummyCam", id=20250101)) 

1006 registry.insertDimensionData( 

1007 "physical_filter", 

1008 dict(instrument="DummyCam", name="dummy_r", band="r"), 

1009 dict(instrument="DummyCam", name="dummy_i", band="i"), 

1010 ) 

1011 registry.insertDimensionData( 

1012 "detector", *[dict(instrument="DummyCam", id=i, full_name=str(i)) for i in range(1, 6)] 

1013 ) 

1014 registry.insertDimensionData( 

1015 "visit", 

1016 dict(instrument="DummyCam", id=10, name="ten", physical_filter="dummy_i", day_obs=20250101), 

1017 dict(instrument="DummyCam", id=11, name="eleven", physical_filter="dummy_r", day_obs=20250101), 

1018 dict(instrument="DummyCam", id=20, name="twelve", physical_filter="dummy_r", day_obs=20250101), 

1019 ) 

1020 registry.insertDimensionData( 

1021 "group", 

1022 dict(instrument="DummyCam", name="ten"), 

1023 dict(instrument="DummyCam", name="eleven"), 

1024 dict(instrument="DummyCam", name="twelve"), 

1025 ) 

1026 for i in range(1, 6): 

1027 registry.insertDimensionData( 

1028 "visit_detector_region", 

1029 dict(instrument="DummyCam", visit=10, detector=i), 

1030 dict(instrument="DummyCam", visit=11, detector=i), 

1031 dict(instrument="DummyCam", visit=20, detector=i), 

1032 ) 

1033 registry.insertDimensionData( 

1034 "exposure", 

1035 dict( 

1036 instrument="DummyCam", 

1037 id=100, 

1038 obs_id="100", 

1039 physical_filter="dummy_i", 

1040 group="ten", 

1041 day_obs=20250101, 

1042 ), 

1043 dict( 

1044 instrument="DummyCam", 

1045 id=101, 

1046 obs_id="101", 

1047 physical_filter="dummy_i", 

1048 group="ten", 

1049 day_obs=20250101, 

1050 ), 

1051 dict( 

1052 instrument="DummyCam", 

1053 id=110, 

1054 obs_id="110", 

1055 physical_filter="dummy_r", 

1056 group="eleven", 

1057 day_obs=20250101, 

1058 ), 

1059 dict( 

1060 instrument="DummyCam", 

1061 id=111, 

1062 obs_id="111", 

1063 physical_filter="dummy_r", 

1064 group="eleven", 

1065 day_obs=20250101, 

1066 ), 

1067 dict( 

1068 instrument="DummyCam", 

1069 id=200, 

1070 obs_id="200", 

1071 physical_filter="dummy_r", 

1072 group="twelve", 

1073 day_obs=20250101, 

1074 ), 

1075 dict( 

1076 instrument="DummyCam", 

1077 id=201, 

1078 obs_id="201", 

1079 physical_filter="dummy_r", 

1080 group="twelve", 

1081 day_obs=20250101, 

1082 ), 

1083 ) 

1084 registry.insertDimensionData( 

1085 "visit_definition", 

1086 dict(instrument="DummyCam", exposure=100, visit=10), 

1087 dict(instrument="DummyCam", exposure=101, visit=10), 

1088 dict(instrument="DummyCam", exposure=110, visit=11), 

1089 dict(instrument="DummyCam", exposure=111, visit=11), 

1090 dict(instrument="DummyCam", exposure=200, visit=20), 

1091 dict(instrument="DummyCam", exposure=201, visit=20), 

1092 ) 

1093 # dataset types 

1094 run1 = "test1_r" 

1095 run2 = "test2_r" 

1096 tagged2 = "test2_t" 

1097 registry.registerRun(run1) 

1098 registry.registerRun(run2) 

1099 registry.registerCollection(tagged2) 

1100 storageClass = StorageClass("testDataset") 

1101 registry.storageClasses.registerStorageClass(storageClass) 

1102 rawType = DatasetType( 

1103 name="RAW", 

1104 dimensions=registry.dimensions.conform(("instrument", "exposure", "detector")), 

1105 storageClass=storageClass, 

1106 ) 

1107 registry.registerDatasetType(rawType) 

1108 calexpType = DatasetType( 

1109 name="CALEXP", 

1110 dimensions=registry.dimensions.conform(("instrument", "visit", "detector")), 

1111 storageClass=storageClass, 

1112 ) 

1113 registry.registerDatasetType(calexpType) 

1114 

1115 # add pre-existing datasets 

1116 for exposure in (100, 101, 110, 111): 

1117 for detector in (1, 2, 3): 

1118 # note that only 3 of 5 detectors have datasets 

1119 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector) 

1120 (ref,) = registry.insertDatasets(rawType, dataIds=[dataId], run=run1) 

1121 # exposures 100 and 101 appear in both run1 and tagged2. 

1122 # 100 has different datasets in the different collections 

1123 # 101 has the same dataset in both collections. 

1124 if exposure == 100: 

1125 (ref,) = registry.insertDatasets(rawType, dataIds=[dataId], run=run2) 

1126 if exposure in (100, 101): 

1127 registry.associate(tagged2, [ref]) 

1128 # Add pre-existing datasets to tagged2. 

1129 for exposure in (200, 201): 

1130 for detector in (3, 4, 5): 

1131 # note that only 3 of 5 detectors have datasets 

1132 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector) 

1133 (ref,) = registry.insertDatasets(rawType, dataIds=[dataId], run=run2) 

1134 registry.associate(tagged2, [ref]) 

1135 

1136 dimensions = registry.dimensions.conform( 

1137 rawType.dimensions.required.names | calexpType.dimensions.required.names 

1138 ) 

1139 # Test that single dim string works as well as list of str 

1140 rows = registry.queryDataIds("visit", datasets=rawType, collections=run1).expanded().toSet() 

1141 rowsI = registry.queryDataIds(["visit"], datasets=rawType, collections=run1).expanded().toSet() 

1142 self.assertEqual(rows, rowsI) 

1143 # with empty expression 

1144 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1).expanded().toSet() 

1145 self.assertEqual(len(rows), 4 * 3) # 4 exposures times 3 detectors 

1146 self.assertCountEqual({dataId["exposure"] for dataId in rows}, (100, 101, 110, 111)) 

1147 self.assertCountEqual({dataId["visit"] for dataId in rows}, (10, 11)) 

1148 self.assertCountEqual({dataId["detector"] for dataId in rows}, (1, 2, 3)) 

1149 

1150 # second collection 

1151 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=tagged2).toSet() 

1152 self.assertEqual(len(rows), 4 * 3) # 4 exposures times 3 detectors 

1153 for dataId in rows: 

1154 self.assertCountEqual(dataId.dimensions.required, ("instrument", "detector", "exposure", "visit")) 

1155 self.assertCountEqual({dataId["exposure"] for dataId in rows}, (100, 101, 200, 201)) 

1156 self.assertCountEqual({dataId["visit"] for dataId in rows}, (10, 20)) 

1157 self.assertCountEqual({dataId["detector"] for dataId in rows}, (1, 2, 3, 4, 5)) 

1158 

1159 # with two input datasets 

1160 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=[run1, tagged2]).toSet() 

1161 self.assertEqual(len(set(rows)), 6 * 3) # 6 exposures times 3 detectors; set needed to de-dupe 

1162 for dataId in rows: 

1163 self.assertCountEqual(dataId.dimensions.required, ("instrument", "detector", "exposure", "visit")) 

1164 self.assertCountEqual({dataId["exposure"] for dataId in rows}, (100, 101, 110, 111, 200, 201)) 

1165 self.assertCountEqual({dataId["visit"] for dataId in rows}, (10, 11, 20)) 

1166 self.assertCountEqual({dataId["detector"] for dataId in rows}, (1, 2, 3, 4, 5)) 

1167 

1168 # limit to single visit 

1169 rows = registry.queryDataIds( 

1170 dimensions, datasets=rawType, collections=run1, where="visit = 10", instrument="DummyCam" 

1171 ).toSet() 

1172 self.assertEqual(len(rows), 2 * 3) # 2 exposures times 3 detectors 

1173 self.assertCountEqual({dataId["exposure"] for dataId in rows}, (100, 101)) 

1174 self.assertCountEqual({dataId["visit"] for dataId in rows}, (10,)) 

1175 self.assertCountEqual({dataId["detector"] for dataId in rows}, (1, 2, 3)) 

1176 

1177 # more limiting expression, using link names instead of Table.column 

1178 rows = registry.queryDataIds( 

1179 dimensions, 

1180 datasets=rawType, 

1181 collections=run1, 

1182 where="visit = 10 and detector > 1 and 'DummyCam'=instrument", 

1183 ).toSet() 

1184 self.assertEqual(len(rows), 2 * 2) # 2 exposures times 2 detectors 

1185 self.assertCountEqual({dataId["exposure"] for dataId in rows}, (100, 101)) 

1186 self.assertCountEqual({dataId["visit"] for dataId in rows}, (10,)) 

1187 self.assertCountEqual({dataId["detector"] for dataId in rows}, (2, 3)) 

1188 

1189 # queryDataIds with only one of `datasets` and `collections` is an 

1190 # error. 

1191 with self.assertRaises(CollectionError): 

1192 registry.queryDataIds(dimensions, datasets=rawType) 

1193 with self.assertRaises(ArgumentError): 

1194 registry.queryDataIds(dimensions, collections=run1) 

1195 

1196 # expression excludes everything 

1197 rows = registry.queryDataIds( 

1198 dimensions, datasets=rawType, collections=run1, where="visit > 1000", instrument="DummyCam" 

1199 ).toSet() 

1200 self.assertEqual(len(rows), 0) 

1201 

1202 # Selecting by physical_filter, this is not in the dimensions, but it 

1203 # is a part of the full expression so it should work too. 

1204 rows = registry.queryDataIds( 

1205 dimensions, 

1206 datasets=rawType, 

1207 collections=run1, 

1208 where="physical_filter = 'dummy_r'", 

1209 instrument="DummyCam", 

1210 ).toSet() 

1211 self.assertEqual(len(rows), 2 * 3) # 2 exposures times 3 detectors 

1212 self.assertCountEqual({dataId["exposure"] for dataId in rows}, (110, 111)) 

1213 self.assertCountEqual({dataId["visit"] for dataId in rows}, (11,)) 

1214 self.assertCountEqual({dataId["detector"] for dataId in rows}, (1, 2, 3)) 

1215 

1216 def testSkyMapDimensions(self): 

1217 """Tests involving only skymap dimensions, no joins to instrument.""" 

1218 registry = self.makeRegistry() 

1219 

1220 # need a bunch of dimensions and datasets for test, we want 

1221 # "band" in the test so also have to add physical_filter 

1222 # dimensions 

1223 registry.insertDimensionData("instrument", dict(instrument="DummyCam")) 

1224 registry.insertDimensionData( 

1225 "physical_filter", 

1226 dict(instrument="DummyCam", name="dummy_r", band="r"), 

1227 dict(instrument="DummyCam", name="dummy_i", band="i"), 

1228 ) 

1229 registry.insertDimensionData("skymap", dict(name="DummyMap", hash=b"sha!")) 

1230 for tract in range(10): 

1231 registry.insertDimensionData("tract", dict(skymap="DummyMap", id=tract)) 

1232 registry.insertDimensionData( 

1233 "patch", 

1234 *[dict(skymap="DummyMap", tract=tract, id=patch, cell_x=0, cell_y=0) for patch in range(10)], 

1235 ) 

1236 

1237 # dataset types 

1238 run = "tésτ" 

1239 registry.registerRun(run) 

1240 storageClass = StorageClass("testDataset") 

1241 registry.storageClasses.registerStorageClass(storageClass) 

1242 calexpType = DatasetType( 

1243 name="deepCoadd_calexp", 

1244 dimensions=registry.dimensions.conform(("skymap", "tract", "patch", "band")), 

1245 storageClass=storageClass, 

1246 ) 

1247 registry.registerDatasetType(calexpType) 

1248 mergeType = DatasetType( 

1249 name="deepCoadd_mergeDet", 

1250 dimensions=registry.dimensions.conform(("skymap", "tract", "patch")), 

1251 storageClass=storageClass, 

1252 ) 

1253 registry.registerDatasetType(mergeType) 

1254 measType = DatasetType( 

1255 name="deepCoadd_meas", 

1256 dimensions=registry.dimensions.conform(("skymap", "tract", "patch", "band")), 

1257 storageClass=storageClass, 

1258 ) 

1259 registry.registerDatasetType(measType) 

1260 

1261 dimensions = registry.dimensions.conform( 

1262 calexpType.dimensions.required.names 

1263 | mergeType.dimensions.required.names 

1264 | measType.dimensions.required.names 

1265 ) 

1266 

1267 # add pre-existing datasets 

1268 for tract in (1, 3, 5): 

1269 for patch in (2, 4, 6, 7): 

1270 dataId = dict(skymap="DummyMap", tract=tract, patch=patch) 

1271 registry.insertDatasets(mergeType, dataIds=[dataId], run=run) 

1272 for aFilter in ("i", "r"): 

1273 dataId = dict(skymap="DummyMap", tract=tract, patch=patch, band=aFilter) 

1274 registry.insertDatasets(calexpType, dataIds=[dataId], run=run) 

1275 

1276 # with empty expression 

1277 rows = registry.queryDataIds(dimensions, datasets=[calexpType, mergeType], collections=run).toSet() 

1278 self.assertEqual(len(rows), 3 * 4 * 2) # 4 tracts x 4 patches x 2 filters 

1279 for dataId in rows: 

1280 self.assertCountEqual(dataId.dimensions.required, ("skymap", "tract", "patch", "band")) 

1281 self.assertCountEqual({dataId["tract"] for dataId in rows}, (1, 3, 5)) 

1282 self.assertCountEqual({dataId["patch"] for dataId in rows}, (2, 4, 6, 7)) 

1283 self.assertCountEqual({dataId["band"] for dataId in rows}, ("i", "r")) 

1284 

1285 # limit to 2 tracts and 2 patches 

1286 rows = registry.queryDataIds( 

1287 dimensions, 

1288 datasets=[calexpType, mergeType], 

1289 collections=run, 

1290 where="tract IN (1, 5) AND patch IN (2, 7)", 

1291 skymap="DummyMap", 

1292 ).toSet() 

1293 self.assertEqual(len(rows), 2 * 2 * 2) # 2 tracts x 2 patches x 2 filters 

1294 self.assertCountEqual({dataId["tract"] for dataId in rows}, (1, 5)) 

1295 self.assertCountEqual({dataId["patch"] for dataId in rows}, (2, 7)) 

1296 self.assertCountEqual({dataId["band"] for dataId in rows}, ("i", "r")) 

1297 

1298 # limit to single filter 

1299 rows = registry.queryDataIds( 

1300 dimensions, datasets=[calexpType, mergeType], collections=run, where="band = 'i'" 

1301 ).toSet() 

1302 self.assertEqual(len(rows), 3 * 4 * 1) # 4 tracts x 4 patches x 2 filters 

1303 self.assertCountEqual({dataId["tract"] for dataId in rows}, (1, 3, 5)) 

1304 self.assertCountEqual({dataId["patch"] for dataId in rows}, (2, 4, 6, 7)) 

1305 self.assertCountEqual({dataId["band"] for dataId in rows}, ("i",)) 

1306 

1307 # Specifying non-existing skymap is an exception 

1308 with self.assertRaisesRegex(DataIdValueError, "Unknown values specified for governor dimension"): 

1309 rows = registry.queryDataIds( 

1310 dimensions, datasets=[calexpType, mergeType], collections=run, where="skymap = 'Mars'" 

1311 ).toSet() 

1312 

1313 def testSpatialJoin(self): 

1314 """Test queries that involve spatial overlap joins.""" 

1315 registry = self.makeRegistry() 

1316 self.loadData(registry, "hsc-rc2-subset.yaml") 

1317 

1318 # Dictionary of spatial DatabaseDimensionElements, keyed by the name of 

1319 # the TopologicalFamily they belong to. We'll relate all elements in 

1320 # each family to all of the elements in each other family. 

1321 families = defaultdict(set) 

1322 # Dictionary of {element.name: {dataId: region}}. 

1323 regions = {} 

1324 for element in registry.dimensions.database_elements: 

1325 if element.spatial is not None: 

1326 families[element.spatial.name].add(element) 

1327 regions[element.name] = { 

1328 record.dataId: record.region for record in registry.queryDimensionRecords(element) 

1329 } 

1330 

1331 # If this check fails, it's not necessarily a problem - it may just be 

1332 # a reasonable change to the default dimension definitions - but the 

1333 # test below depends on there being more than one family to do anything 

1334 # useful. 

1335 self.assertEqual(len(families), 2) 

1336 

1337 # Overlap DatabaseDimensionElements with each other. 

1338 for family1, family2 in itertools.combinations(families, 2): 

1339 for element1, element2 in itertools.product(families[family1], families[family2]): 

1340 dimensions = element1.minimal_group | element2.minimal_group 

1341 # Construct expected set of overlapping data IDs via a 

1342 # brute-force comparison of the regions we've already fetched. 

1343 expected = { 

1344 DataCoordinate.standardize( 

1345 {**dataId1.required, **dataId2.required}, dimensions=dimensions 

1346 ) 

1347 for (dataId1, region1), (dataId2, region2) in itertools.product( 

1348 regions[element1.name].items(), regions[element2.name].items() 

1349 ) 

1350 if not region1.isDisjointFrom(region2) 

1351 } 

1352 self.assertGreater(len(expected), 2, msg="Test that we aren't just comparing empty sets.") 

1353 queried = set(registry.queryDataIds(dimensions)) 

1354 self.assertEqual(expected, queried) 

1355 

1356 # Overlap each DatabaseDimensionElement with the commonSkyPix system. 

1357 commonSkyPix = registry.dimensions.commonSkyPix 

1358 for elementName, these_regions in regions.items(): 

1359 dimensions = registry.dimensions[elementName].minimal_group | commonSkyPix.minimal_group 

1360 expected = set() 

1361 for dataId, region in these_regions.items(): 

1362 for begin, end in commonSkyPix.pixelization.envelope(region): 

1363 expected.update( 

1364 DataCoordinate.standardize( 

1365 {commonSkyPix.name: index, **dataId.required}, dimensions=dimensions 

1366 ) 

1367 for index in range(begin, end) 

1368 ) 

1369 self.assertGreater(len(expected), 2, msg="Test that we aren't just comparing empty sets.") 

1370 queried = set(registry.queryDataIds(dimensions)) 

1371 self.assertEqual(expected, queried) 

1372 

1373 def testAbstractQuery(self): 

1374 """Test that we can run a query that just lists the known 

1375 bands. This is tricky because band is 

1376 backed by a query against physical_filter. 

1377 """ 

1378 registry = self.makeRegistry() 

1379 registry.insertDimensionData("instrument", dict(name="DummyCam")) 

1380 registry.insertDimensionData( 

1381 "physical_filter", 

1382 dict(instrument="DummyCam", name="dummy_i", band="i"), 

1383 dict(instrument="DummyCam", name="dummy_i2", band="i"), 

1384 dict(instrument="DummyCam", name="dummy_r", band="r"), 

1385 ) 

1386 rows = registry.queryDataIds(["band"]).toSet() 

1387 self.assertCountEqual( 

1388 rows, 

1389 [ 

1390 DataCoordinate.standardize(band="i", universe=registry.dimensions), 

1391 DataCoordinate.standardize(band="r", universe=registry.dimensions), 

1392 ], 

1393 ) 

1394 

1395 def testAttributeManager(self): 

1396 """Test basic functionality of attribute manager.""" 

1397 # number of attributes with schema versions in a fresh database, 

1398 # 6 managers with 2 records per manager, plus config for dimensions 

1399 VERSION_COUNT = 6 * 2 + 1 

1400 

1401 registry = self.makeRegistry() 

1402 attributes = registry._managers.attributes 

1403 

1404 # check what get() returns for non-existing key 

1405 self.assertIsNone(attributes.get("attr")) 

1406 self.assertEqual(attributes.get("attr", ""), "") 

1407 self.assertEqual(attributes.get("attr", "Value"), "Value") 

1408 self.assertEqual(len(list(attributes.items())), VERSION_COUNT) 

1409 

1410 # cannot store empty key or value 

1411 with self.assertRaises(ValueError): 

1412 attributes.set("", "value") 

1413 with self.assertRaises(ValueError): 

1414 attributes.set("attr", "") 

1415 

1416 # set value of non-existing key 

1417 attributes.set("attr", "value") 

1418 self.assertEqual(len(list(attributes.items())), VERSION_COUNT + 1) 

1419 self.assertEqual(attributes.get("attr"), "value") 

1420 

1421 # update value of existing key 

1422 with self.assertRaises(ButlerAttributeExistsError): 

1423 attributes.set("attr", "value2") 

1424 

1425 attributes.set("attr", "value2", force=True) 

1426 self.assertEqual(len(list(attributes.items())), VERSION_COUNT + 1) 

1427 self.assertEqual(attributes.get("attr"), "value2") 

1428 

1429 # delete existing key 

1430 self.assertTrue(attributes.delete("attr")) 

1431 self.assertEqual(len(list(attributes.items())), VERSION_COUNT) 

1432 

1433 # delete non-existing key 

1434 self.assertFalse(attributes.delete("non-attr")) 

1435 

1436 # store bunch of keys and get the list back 

1437 data = [ 

1438 ("version.core", "1.2.3"), 

1439 ("version.dimensions", "3.2.1"), 

1440 ("config.managers.opaque", "ByNameOpaqueTableStorageManager"), 

1441 ] 

1442 for key, value in data: 

1443 attributes.set(key, value) 

1444 items = dict(attributes.items()) 

1445 for key, value in data: 

1446 self.assertEqual(items[key], value) 

1447 

1448 def testQueryDatasetsDeduplication(self): 

1449 """Test that the findFirst option to queryDatasets selects datasets 

1450 from collections in the order given". 

1451 """ 

1452 registry = self.makeRegistry() 

1453 self.loadData(registry, "base.yaml") 

1454 self.loadData(registry, "datasets.yaml") 

1455 self.assertCountEqual( 

1456 list(registry.queryDatasets("bias", collections=["imported_g", "imported_r"])), 

1457 [ 

1458 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"), 

1459 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g"), 

1460 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g"), 

1461 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r"), 

1462 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r"), 

1463 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

1464 ], 

1465 ) 

1466 self.assertCountEqual( 

1467 list(registry.queryDatasets("bias", collections=["imported_g", "imported_r"], findFirst=True)), 

1468 [ 

1469 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"), 

1470 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g"), 

1471 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g"), 

1472 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

1473 ], 

1474 ) 

1475 self.assertCountEqual( 

1476 list(registry.queryDatasets("bias", collections=["imported_r", "imported_g"], findFirst=True)), 

1477 [ 

1478 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"), 

1479 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r"), 

1480 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r"), 

1481 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

1482 ], 

1483 ) 

1484 

1485 def testQueryResults(self): 

1486 """Test querying for data IDs and then manipulating the QueryResults 

1487 object returned to perform other queries. 

1488 """ 

1489 registry = self.makeRegistry() 

1490 self.loadData(registry, "base.yaml") 

1491 self.loadData(registry, "datasets.yaml") 

1492 bias = registry.getDatasetType("bias") 

1493 flat = registry.getDatasetType("flat") 

1494 # Obtain expected results from methods other than those we're testing 

1495 # here. That includes: 

1496 # - the dimensions of the data IDs we want to query: 

1497 expected_dimensions = registry.dimensions.conform(["detector", "physical_filter"]) 

1498 # - the dimensions of some other data IDs we'll extract from that: 

1499 expected_subset_dimensions = registry.dimensions.conform(["detector"]) 

1500 # - the data IDs we expect to obtain from the first queries: 

1501 expectedDataIds = DataCoordinateSet( 

1502 { 

1503 DataCoordinate.standardize( 

1504 instrument="Cam1", detector=d, physical_filter=p, universe=registry.dimensions 

1505 ) 

1506 for d, p in itertools.product({1, 2, 3}, {"Cam1-G", "Cam1-R1", "Cam1-R2"}) 

1507 }, 

1508 dimensions=expected_dimensions, 

1509 hasFull=False, 

1510 hasRecords=False, 

1511 ) 

1512 # - the flat datasets we expect to find from those data IDs, in just 

1513 # one collection (so deduplication is irrelevant): 

1514 expectedFlats = [ 

1515 registry.findDataset( 

1516 flat, instrument="Cam1", detector=1, physical_filter="Cam1-R1", collections="imported_r" 

1517 ), 

1518 registry.findDataset( 

1519 flat, instrument="Cam1", detector=2, physical_filter="Cam1-R1", collections="imported_r" 

1520 ), 

1521 registry.findDataset( 

1522 flat, instrument="Cam1", detector=3, physical_filter="Cam1-R2", collections="imported_r" 

1523 ), 

1524 ] 

1525 # - the data IDs we expect to extract from that: 

1526 expectedSubsetDataIds = expectedDataIds.subset(expected_subset_dimensions) 

1527 # - the bias datasets we expect to find from those data IDs, after we 

1528 # subset-out the physical_filter dimension, both with duplicates: 

1529 expectedAllBiases = [ 

1530 registry.findDataset(bias, instrument="Cam1", detector=1, collections="imported_g"), 

1531 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_g"), 

1532 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_g"), 

1533 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_r"), 

1534 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_r"), 

1535 ] 

1536 # - ...and without duplicates: 

1537 expectedDeduplicatedBiases = [ 

1538 registry.findDataset(bias, instrument="Cam1", detector=1, collections="imported_g"), 

1539 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_r"), 

1540 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_r"), 

1541 ] 

1542 # Test against those expected results, using a "lazy" query for the 

1543 # data IDs (which re-executes that query each time we use it to do 

1544 # something new). 

1545 dataIds = registry.queryDataIds( 

1546 ["detector", "physical_filter"], 

1547 where="detector.purpose = 'SCIENCE'", # this rejects detector=4 

1548 instrument="Cam1", 

1549 ) 

1550 self.assertEqual(dataIds.dimensions, expected_dimensions) 

1551 self.assertEqual(dataIds.toSet(), expectedDataIds) 

1552 self.assertCountEqual( 

1553 list( 

1554 dataIds.findDatasets( 

1555 flat, 

1556 collections=["imported_r"], 

1557 ) 

1558 ), 

1559 expectedFlats, 

1560 ) 

1561 subsetDataIds = dataIds.subset(expected_subset_dimensions, unique=True) 

1562 self.assertEqual(subsetDataIds.dimensions, expected_subset_dimensions) 

1563 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds) 

1564 self.assertCountEqual( 

1565 list(subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=False)), 

1566 expectedAllBiases, 

1567 ) 

1568 self.assertCountEqual( 

1569 list(subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=True)), 

1570 expectedDeduplicatedBiases, 

1571 ) 

1572 

1573 # Searching for a dataset with dimensions we had projected away 

1574 # restores those dimensions. 

1575 self.assertCountEqual( 

1576 list(subsetDataIds.findDatasets("flat", collections=["imported_r"], findFirst=True)), 

1577 expectedFlats, 

1578 ) 

1579 

1580 # Use a named dataset type that does not exist and a dataset type 

1581 # object that does not exist. 

1582 unknown_type = DatasetType("not_known", dimensions=bias.dimensions, storageClass="Exposure") 

1583 

1584 # Test both string name and dataset type object. 

1585 test_type: str | DatasetType 

1586 for test_type, test_type_name in ( 

1587 (unknown_type, unknown_type.name), 

1588 (unknown_type.name, unknown_type.name), 

1589 ): 

1590 with self.assertRaisesRegex(DatasetTypeError, expected_regex=test_type_name): 

1591 list( 

1592 subsetDataIds.findDatasets( 

1593 test_type, collections=["imported_r", "imported_g"], findFirst=True 

1594 ) 

1595 ) 

1596 

1597 # Materialize the bias dataset queries (only) by putting the results 

1598 # into temporary tables, then repeat those tests. 

1599 with subsetDataIds.findDatasets( 

1600 bias, collections=["imported_r", "imported_g"], findFirst=False 

1601 ).materialize() as biases: 

1602 self.assertCountEqual(list(biases), expectedAllBiases) 

1603 with subsetDataIds.findDatasets( 

1604 bias, collections=["imported_r", "imported_g"], findFirst=True 

1605 ).materialize() as biases: 

1606 self.assertCountEqual(list(biases), expectedDeduplicatedBiases) 

1607 # Materialize the data ID subset query, but not the dataset queries. 

1608 with subsetDataIds.materialize() as subsetDataIds: 

1609 self.assertEqual(subsetDataIds.dimensions, expected_subset_dimensions) 

1610 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds) 

1611 self.assertCountEqual( 

1612 list( 

1613 subsetDataIds.findDatasets( 

1614 bias, collections=["imported_r", "imported_g"], findFirst=False 

1615 ) 

1616 ), 

1617 expectedAllBiases, 

1618 ) 

1619 self.assertCountEqual( 

1620 list( 

1621 subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=True) 

1622 ), 

1623 expectedDeduplicatedBiases, 

1624 ) 

1625 # Materialize the dataset queries, too. 

1626 with subsetDataIds.findDatasets( 

1627 bias, collections=["imported_r", "imported_g"], findFirst=False 

1628 ).materialize() as biases: 

1629 self.assertCountEqual(list(biases), expectedAllBiases) 

1630 with subsetDataIds.findDatasets( 

1631 bias, collections=["imported_r", "imported_g"], findFirst=True 

1632 ).materialize() as biases: 

1633 self.assertCountEqual(list(biases), expectedDeduplicatedBiases) 

1634 # Materialize the original query, but none of the follow-up queries. 

1635 with dataIds.materialize() as dataIds: 

1636 self.assertEqual(dataIds.dimensions, expected_dimensions) 

1637 self.assertEqual(dataIds.toSet(), expectedDataIds) 

1638 self.assertCountEqual( 

1639 list( 

1640 dataIds.findDatasets( 

1641 flat, 

1642 collections=["imported_r"], 

1643 ) 

1644 ), 

1645 expectedFlats, 

1646 ) 

1647 subsetDataIds = dataIds.subset(expected_subset_dimensions, unique=True) 

1648 self.assertEqual(subsetDataIds.dimensions, expected_subset_dimensions) 

1649 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds) 

1650 self.assertCountEqual( 

1651 list( 

1652 subsetDataIds.findDatasets( 

1653 bias, collections=["imported_r", "imported_g"], findFirst=False 

1654 ) 

1655 ), 

1656 expectedAllBiases, 

1657 ) 

1658 self.assertCountEqual( 

1659 list( 

1660 subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=True) 

1661 ), 

1662 expectedDeduplicatedBiases, 

1663 ) 

1664 # Materialize just the bias dataset queries. 

1665 with subsetDataIds.findDatasets( 

1666 bias, collections=["imported_r", "imported_g"], findFirst=False 

1667 ).materialize() as biases: 

1668 self.assertCountEqual(list(biases), expectedAllBiases) 

1669 with subsetDataIds.findDatasets( 

1670 bias, collections=["imported_r", "imported_g"], findFirst=True 

1671 ).materialize() as biases: 

1672 self.assertCountEqual(list(biases), expectedDeduplicatedBiases) 

1673 # Materialize the subset data ID query, but not the dataset 

1674 # queries. 

1675 with subsetDataIds.materialize() as subsetDataIds: 

1676 self.assertEqual(subsetDataIds.dimensions, expected_subset_dimensions) 

1677 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds) 

1678 self.assertCountEqual( 

1679 list( 

1680 subsetDataIds.findDatasets( 

1681 bias, collections=["imported_r", "imported_g"], findFirst=False 

1682 ) 

1683 ), 

1684 expectedAllBiases, 

1685 ) 

1686 self.assertCountEqual( 

1687 list( 

1688 subsetDataIds.findDatasets( 

1689 bias, collections=["imported_r", "imported_g"], findFirst=True 

1690 ) 

1691 ), 

1692 expectedDeduplicatedBiases, 

1693 ) 

1694 # Materialize the bias dataset queries, too, so now we're 

1695 # materializing every single step. 

1696 with subsetDataIds.findDatasets( 

1697 bias, collections=["imported_r", "imported_g"], findFirst=False 

1698 ).materialize() as biases: 

1699 self.assertCountEqual(list(biases), expectedAllBiases) 

1700 with subsetDataIds.findDatasets( 

1701 bias, collections=["imported_r", "imported_g"], findFirst=True 

1702 ).materialize() as biases: 

1703 self.assertCountEqual(list(biases), expectedDeduplicatedBiases) 

1704 

1705 def testStorageClassPropagation(self): 

1706 """Test that queries for datasets respect the storage class passed in 

1707 as part of a full dataset type. 

1708 """ 

1709 registry = self.makeRegistry() 

1710 self.loadData(registry, "base.yaml") 

1711 dataset_type_in_registry = DatasetType( 

1712 "tbl", dimensions=["instrument"], storageClass="Packages", universe=registry.dimensions 

1713 ) 

1714 registry.registerDatasetType(dataset_type_in_registry) 

1715 run = "run1" 

1716 registry.registerRun(run) 

1717 (inserted_ref,) = registry.insertDatasets( 

1718 dataset_type_in_registry, [registry.expandDataId(instrument="Cam1")], run=run 

1719 ) 

1720 self.assertEqual(inserted_ref.datasetType, dataset_type_in_registry) 

1721 query_dataset_type = DatasetType( 

1722 "tbl", dimensions=["instrument"], storageClass="StructuredDataDict", universe=registry.dimensions 

1723 ) 

1724 self.assertNotEqual(dataset_type_in_registry, query_dataset_type) 

1725 query_datasets_result = registry.queryDatasets(query_dataset_type, collections=[run]) 

1726 self.assertEqual(query_datasets_result.parentDatasetType, query_dataset_type) # type: ignore 

1727 (query_datasets_ref,) = query_datasets_result 

1728 self.assertEqual(query_datasets_ref.datasetType, query_dataset_type) 

1729 query_data_ids_find_datasets_result = registry.queryDataIds(["instrument"]).findDatasets( 

1730 query_dataset_type, collections=[run] 

1731 ) 

1732 self.assertEqual(query_data_ids_find_datasets_result.parentDatasetType, query_dataset_type) 

1733 (query_data_ids_find_datasets_ref,) = query_data_ids_find_datasets_result 

1734 self.assertEqual(query_data_ids_find_datasets_ref.datasetType, query_dataset_type) 

1735 query_dataset_types_result = registry.queryDatasetTypes(query_dataset_type) 

1736 self.assertEqual(list(query_dataset_types_result), [query_dataset_type]) 

1737 find_dataset_ref = registry.findDataset(query_dataset_type, instrument="Cam1", collections=[run]) 

1738 self.assertEqual(find_dataset_ref.datasetType, query_dataset_type) 

1739 

1740 def testEmptyDimensionsQueries(self): 

1741 """Test Query and QueryResults objects in the case where there are no 

1742 dimensions. 

1743 """ 

1744 # Set up test data: one dataset type, two runs, one dataset in each. 

1745 registry = self.makeRegistry() 

1746 self.loadData(registry, "base.yaml") 

1747 schema = DatasetType("schema", dimensions=registry.dimensions.empty, storageClass="Catalog") 

1748 registry.registerDatasetType(schema) 

1749 dataId = DataCoordinate.make_empty(registry.dimensions) 

1750 run1 = "run1" 

1751 run2 = "run2" 

1752 registry.registerRun(run1) 

1753 registry.registerRun(run2) 

1754 (dataset1,) = registry.insertDatasets(schema, dataIds=[dataId], run=run1) 

1755 (dataset2,) = registry.insertDatasets(schema, dataIds=[dataId], run=run2) 

1756 # Query directly for both of the datasets, and each one, one at a time. 

1757 self.checkQueryResults( 

1758 registry.queryDatasets(schema, collections=[run1, run2], findFirst=False), [dataset1, dataset2] 

1759 ) 

1760 self.checkQueryResults( 

1761 registry.queryDatasets(schema, collections=[run1, run2], findFirst=True), 

1762 [dataset1], 

1763 ) 

1764 self.checkQueryResults( 

1765 registry.queryDatasets(schema, collections=[run2, run1], findFirst=True), 

1766 [dataset2], 

1767 ) 

1768 # Query for data IDs with no dimensions. 

1769 dataIds = registry.queryDataIds([]) 

1770 self.checkQueryResults(dataIds, [dataId]) 

1771 # Use queried data IDs to find the datasets. 

1772 self.checkQueryResults( 

1773 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False), 

1774 [dataset1, dataset2], 

1775 ) 

1776 self.checkQueryResults( 

1777 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), 

1778 [dataset1], 

1779 ) 

1780 self.checkQueryResults( 

1781 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), 

1782 [dataset2], 

1783 ) 

1784 # Now materialize the data ID query results and repeat those tests. 

1785 with dataIds.materialize() as dataIds: 

1786 self.checkQueryResults(dataIds, [dataId]) 

1787 self.checkQueryResults( 

1788 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), 

1789 [dataset1], 

1790 ) 

1791 self.checkQueryResults( 

1792 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), 

1793 [dataset2], 

1794 ) 

1795 # Query for non-empty data IDs, then subset that to get the empty one. 

1796 # Repeat the above tests starting from that. 

1797 dataIds = registry.queryDataIds(["instrument"]).subset(registry.dimensions.empty, unique=True) 

1798 self.checkQueryResults(dataIds, [dataId]) 

1799 self.checkQueryResults( 

1800 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False), 

1801 [dataset1, dataset2], 

1802 ) 

1803 self.checkQueryResults( 

1804 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), 

1805 [dataset1], 

1806 ) 

1807 self.checkQueryResults( 

1808 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), 

1809 [dataset2], 

1810 ) 

1811 with dataIds.materialize() as dataIds: 

1812 self.checkQueryResults(dataIds, [dataId]) 

1813 self.checkQueryResults( 

1814 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False), 

1815 [dataset1, dataset2], 

1816 ) 

1817 self.checkQueryResults( 

1818 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), 

1819 [dataset1], 

1820 ) 

1821 self.checkQueryResults( 

1822 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), 

1823 [dataset2], 

1824 ) 

1825 # Query for non-empty data IDs, then materialize, then subset to get 

1826 # the empty one. Repeat again. 

1827 with registry.queryDataIds(["instrument"]).materialize() as nonEmptyDataIds: 

1828 dataIds = nonEmptyDataIds.subset(registry.dimensions.empty, unique=True) 

1829 self.checkQueryResults(dataIds, [dataId]) 

1830 self.checkQueryResults( 

1831 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False), 

1832 [dataset1, dataset2], 

1833 ) 

1834 self.checkQueryResults( 

1835 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), 

1836 [dataset1], 

1837 ) 

1838 self.checkQueryResults( 

1839 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), 

1840 [dataset2], 

1841 ) 

1842 with dataIds.materialize() as dataIds: 

1843 self.checkQueryResults(dataIds, [dataId]) 

1844 self.checkQueryResults( 

1845 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False), 

1846 [dataset1, dataset2], 

1847 ) 

1848 self.checkQueryResults( 

1849 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), 

1850 [dataset1], 

1851 ) 

1852 self.checkQueryResults( 

1853 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), 

1854 [dataset2], 

1855 ) 

1856 # Repeat the materialization tests with a dimension element that isn't 

1857 # cached, so there's no way we can know when building the query where 

1858 # there are any rows are not (there aren't). 

1859 dataIds = registry.queryDataIds(["exposure"]).subset(registry.dimensions.empty, unique=True) 

1860 with dataIds.materialize() as dataIds: 

1861 self.checkQueryResults(dataIds, []) 

1862 self.checkQueryResults( 

1863 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False), [] 

1864 ) 

1865 self.checkQueryResults(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), []) 

1866 self.checkQueryResults(dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), []) 

1867 # Query for non-empty data IDs with a constraint on an empty-data-ID 

1868 # dataset that exists. 

1869 dataIds = registry.queryDataIds(["instrument"], datasets="schema", collections=...) 

1870 self.checkQueryResults( 

1871 dataIds.subset(unique=True), 

1872 [DataCoordinate.standardize(instrument="Cam1", universe=registry.dimensions)], 

1873 ) 

1874 # Again query for non-empty data IDs with a constraint on empty-data-ID 

1875 # datasets, but when the datasets don't exist. We delete the existing 

1876 # dataset and query just that collection rather than creating a new 

1877 # empty collection because this is a bit less likely for our build-time 

1878 # logic to shortcut-out (via the collection summaries), and such a 

1879 # shortcut would make this test a bit more trivial than we'd like. 

1880 registry.removeDatasets([dataset2]) 

1881 dataIds = registry.queryDataIds(["instrument"], datasets="schema", collections=run2) 

1882 self.checkQueryResults(dataIds, []) 

1883 

1884 def testDimensionDataModifications(self): 

1885 """Test that modifying dimension records via: 

1886 syncDimensionData(..., update=True) and 

1887 insertDimensionData(..., replace=True) works as expected, even in the 

1888 presence of datasets using those dimensions and spatial overlap 

1889 relationships. 

1890 """ 

1891 

1892 def _unpack_range_set(ranges: lsst.sphgeom.RangeSet) -> Iterator[int]: 

1893 """Unpack a sphgeom.RangeSet into the integers it contains.""" 

1894 for begin, end in ranges: 

1895 yield from range(begin, end) 

1896 

1897 def _range_set_hull( 

1898 ranges: lsst.sphgeom.RangeSet, 

1899 pixelization: lsst.sphgeom.HtmPixelization, 

1900 ) -> lsst.sphgeom.ConvexPolygon: 

1901 """Create a ConvexPolygon hull of the region defined by a set of 

1902 HTM pixelization index ranges. 

1903 """ 

1904 points = [] 

1905 for index in _unpack_range_set(ranges): 

1906 points.extend(pixelization.triangle(index).getVertices()) 

1907 return lsst.sphgeom.ConvexPolygon(points) 

1908 

1909 # Use HTM to set up an initial parent region (one arbitrary trixel) 

1910 # and four child regions (the trixels within the parent at the next 

1911 # level. We'll use the parent as a tract/visit region and the children 

1912 # as its patch/visit_detector regions. 

1913 registry = self.makeRegistry() 

1914 htm6 = registry.dimensions.skypix["htm"][6].pixelization 

1915 commonSkyPix = registry.dimensions.commonSkyPix.pixelization 

1916 index = 12288 

1917 child_ranges_small = lsst.sphgeom.RangeSet(index).scaled(4) 

1918 assert htm6.universe().contains(child_ranges_small) 

1919 child_regions_small = [htm6.triangle(i) for i in _unpack_range_set(child_ranges_small)] 

1920 parent_region_small = lsst.sphgeom.ConvexPolygon( 

1921 list(itertools.chain.from_iterable(c.getVertices() for c in child_regions_small)) 

1922 ) 

1923 assert all(parent_region_small.contains(c) for c in child_regions_small) 

1924 # Make a larger version of each child region, defined to be the set of 

1925 # htm6 trixels that overlap the original's bounding circle. Make a new 

1926 # parent that's the convex hull of the new children. 

1927 child_regions_large = [ 

1928 _range_set_hull(htm6.envelope(c.getBoundingCircle()), htm6) for c in child_regions_small 

1929 ] 

1930 assert all( 

1931 large.contains(small) 

1932 for large, small in zip(child_regions_large, child_regions_small, strict=True) 

1933 ) 

1934 parent_region_large = lsst.sphgeom.ConvexPolygon( 

1935 list(itertools.chain.from_iterable(c.getVertices() for c in child_regions_large)) 

1936 ) 

1937 assert all(parent_region_large.contains(c) for c in child_regions_large) 

1938 assert parent_region_large.contains(parent_region_small) 

1939 assert not parent_region_small.contains(parent_region_large) 

1940 assert not all(parent_region_small.contains(c) for c in child_regions_large) 

1941 # Find some commonSkyPix indices that overlap the large regions but not 

1942 # overlap the small regions. We use commonSkyPix here to make sure the 

1943 # real tests later involve what's in the database, not just post-query 

1944 # filtering of regions. 

1945 child_difference_indices = [] 

1946 for large, small in zip(child_regions_large, child_regions_small, strict=True): 

1947 difference = list(_unpack_range_set(commonSkyPix.envelope(large) - commonSkyPix.envelope(small))) 

1948 assert difference, "if this is empty, we can't test anything useful with these regions" 

1949 assert all( 

1950 not commonSkyPix.triangle(d).isDisjointFrom(large) 

1951 and commonSkyPix.triangle(d).isDisjointFrom(small) 

1952 for d in difference 

1953 ) 

1954 child_difference_indices.append(difference) 

1955 parent_difference_indices = list( 

1956 _unpack_range_set( 

1957 commonSkyPix.envelope(parent_region_large) - commonSkyPix.envelope(parent_region_small) 

1958 ) 

1959 ) 

1960 assert parent_difference_indices, "if this is empty, we can't test anything useful with these regions" 

1961 assert all( 

1962 ( 

1963 not commonSkyPix.triangle(d).isDisjointFrom(parent_region_large) 

1964 and commonSkyPix.triangle(d).isDisjointFrom(parent_region_small) 

1965 ) 

1966 for d in parent_difference_indices 

1967 ) 

1968 # Now that we've finally got those regions, we'll insert the large ones 

1969 # as tract/patch dimension records. 

1970 skymap_name = "testing_v1" 

1971 registry.insertDimensionData( 

1972 "skymap", 

1973 { 

1974 "name": skymap_name, 

1975 "hash": bytes([42]), 

1976 "tract_max": 1, 

1977 "patch_nx_max": 2, 

1978 "patch_ny_max": 2, 

1979 }, 

1980 ) 

1981 registry.insertDimensionData("tract", {"skymap": skymap_name, "id": 0, "region": parent_region_large}) 

1982 registry.insertDimensionData( 

1983 "patch", 

1984 *[ 

1985 {"skymap": skymap_name, "tract": 0, "id": n, "cell_x": n % 2, "cell_y": n // 2, "region": c} 

1986 for n, c in enumerate(child_regions_large) 

1987 ], 

1988 ) 

1989 # Add at dataset that uses these dimensions to make sure that modifying 

1990 # them doesn't disrupt foreign keys (need to make sure DB doesn't 

1991 # implement insert with replace=True as delete-then-insert). 

1992 dataset_type = DatasetType( 

1993 "coadd", 

1994 dimensions=["tract", "patch"], 

1995 universe=registry.dimensions, 

1996 storageClass="Exposure", 

1997 ) 

1998 registry.registerDatasetType(dataset_type) 

1999 registry.registerCollection("the_run", CollectionType.RUN) 

2000 registry.insertDatasets( 

2001 dataset_type, 

2002 [{"skymap": skymap_name, "tract": 0, "patch": 2}], 

2003 run="the_run", 

2004 ) 

2005 # Query for tracts and patches that overlap some "difference" htm9 

2006 # pixels; there should be overlaps, because the database has 

2007 # the "large" suite of regions. 

2008 self.assertEqual( 

2009 {0}, 

2010 { 

2011 data_id["tract"] 

2012 for data_id in registry.queryDataIds( 

2013 ["tract"], 

2014 skymap=skymap_name, 

2015 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]}, 

2016 ) 

2017 }, 

2018 ) 

2019 for patch_id, patch_difference_indices in enumerate(child_difference_indices): 

2020 self.assertIn( 

2021 patch_id, 

2022 { 

2023 data_id["patch"] 

2024 for data_id in registry.queryDataIds( 

2025 ["patch"], 

2026 skymap=skymap_name, 

2027 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]}, 

2028 ) 

2029 }, 

2030 ) 

2031 # Use sync to update the tract region and insert to update the regions 

2032 # of the patches, to the "small" suite. 

2033 updated = registry.syncDimensionData( 

2034 "tract", 

2035 {"skymap": skymap_name, "id": 0, "region": parent_region_small}, 

2036 update=True, 

2037 ) 

2038 self.assertEqual(updated, {"region": parent_region_large}) 

2039 registry.insertDimensionData( 

2040 "patch", 

2041 *[ 

2042 {"skymap": skymap_name, "tract": 0, "id": n, "cell_x": n % 2, "cell_y": n // 2, "region": c} 

2043 for n, c in enumerate(child_regions_small) 

2044 ], 

2045 replace=True, 

2046 ) 

2047 # Query again; there now should be no such overlaps, because the 

2048 # database has the "small" suite of regions. 

2049 self.assertFalse( 

2050 set( 

2051 registry.queryDataIds( 

2052 ["tract"], 

2053 skymap=skymap_name, 

2054 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]}, 

2055 ) 

2056 ) 

2057 ) 

2058 for patch_id, patch_difference_indices in enumerate(child_difference_indices): 

2059 self.assertNotIn( 

2060 patch_id, 

2061 { 

2062 data_id["patch"] 

2063 for data_id in registry.queryDataIds( 

2064 ["patch"], 

2065 skymap=skymap_name, 

2066 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]}, 

2067 ) 

2068 }, 

2069 ) 

2070 # Update back to the large regions and query one more time. 

2071 updated = registry.syncDimensionData( 

2072 "tract", 

2073 {"skymap": skymap_name, "id": 0, "region": parent_region_large}, 

2074 update=True, 

2075 ) 

2076 self.assertEqual(updated, {"region": parent_region_small}) 

2077 registry.insertDimensionData( 

2078 "patch", 

2079 *[ 

2080 {"skymap": skymap_name, "tract": 0, "id": n, "cell_x": n % 2, "cell_y": n // 2, "region": c} 

2081 for n, c in enumerate(child_regions_large) 

2082 ], 

2083 replace=True, 

2084 ) 

2085 self.assertEqual( 

2086 {0}, 

2087 { 

2088 data_id["tract"] 

2089 for data_id in registry.queryDataIds( 

2090 ["tract"], 

2091 skymap=skymap_name, 

2092 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]}, 

2093 ) 

2094 }, 

2095 ) 

2096 for patch_id, patch_difference_indices in enumerate(child_difference_indices): 

2097 self.assertIn( 

2098 patch_id, 

2099 { 

2100 data_id["patch"] 

2101 for data_id in registry.queryDataIds( 

2102 ["patch"], 

2103 skymap=skymap_name, 

2104 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]}, 

2105 ) 

2106 }, 

2107 ) 

2108 

2109 def testCalibrationCollections(self): 

2110 """Test operations on `~CollectionType.CALIBRATION` collections, 

2111 including `SqlRegistry.certify`, `SqlRegistry.decertify`, 

2112 `SqlRegistry.findDataset`, and 

2113 `DataCoordinateQueryResults.findRelatedDatasets`. 

2114 """ 

2115 # Setup - make a Registry, fill it with some datasets in 

2116 # non-calibration collections. 

2117 registry = self.makeRegistry() 

2118 self.loadData(registry, "base.yaml") 

2119 self.loadData(registry, "datasets.yaml") 

2120 # Set up some timestamps. 

2121 t1 = astropy.time.Time("2020-01-01T01:00:00", format="isot", scale="tai") 

2122 t2 = astropy.time.Time("2020-01-01T02:00:00", format="isot", scale="tai") 

2123 t3 = astropy.time.Time("2020-01-01T03:00:00", format="isot", scale="tai") 

2124 t4 = astropy.time.Time("2020-01-01T04:00:00", format="isot", scale="tai") 

2125 t5 = astropy.time.Time("2020-01-01T05:00:00", format="isot", scale="tai") 

2126 allTimespans = [ 

2127 Timespan(a, b) for a, b in itertools.combinations([None, t1, t2, t3, t4, t5, None], r=2) 

2128 ] 

2129 # Insert some exposure records with timespans between each sequential 

2130 # pair of those. 

2131 registry.insertDimensionData( 

2132 "day_obs", {"instrument": "Cam1", "id": 20200101, "timespan": Timespan(t1, t5)} 

2133 ) 

2134 registry.insertDimensionData( 

2135 "group", 

2136 {"instrument": "Cam1", "name": "group0"}, 

2137 {"instrument": "Cam1", "name": "group1"}, 

2138 {"instrument": "Cam1", "name": "group2"}, 

2139 {"instrument": "Cam1", "name": "group3"}, 

2140 ) 

2141 registry.insertDimensionData( 

2142 "exposure", 

2143 { 

2144 "instrument": "Cam1", 

2145 "id": 0, 

2146 "group": "group0", 

2147 "obs_id": "zero", 

2148 "physical_filter": "Cam1-G", 

2149 "day_obs": 20200101, 

2150 "timespan": Timespan(t1, t2), 

2151 }, 

2152 { 

2153 "instrument": "Cam1", 

2154 "id": 1, 

2155 "group": "group1", 

2156 "obs_id": "one", 

2157 "physical_filter": "Cam1-G", 

2158 "day_obs": 20200101, 

2159 "timespan": Timespan(t2, t3), 

2160 }, 

2161 { 

2162 "instrument": "Cam1", 

2163 "id": 2, 

2164 "group": "group2", 

2165 "obs_id": "two", 

2166 "physical_filter": "Cam1-G", 

2167 "day_obs": 20200101, 

2168 "timespan": Timespan(t3, t4), 

2169 }, 

2170 { 

2171 "instrument": "Cam1", 

2172 "id": 3, 

2173 "group": "group3", 

2174 "obs_id": "three", 

2175 "physical_filter": "Cam1-G", 

2176 "day_obs": 20200101, 

2177 "timespan": Timespan(t4, t5), 

2178 }, 

2179 ) 

2180 # Get references to some datasets. 

2181 bias2a = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g") 

2182 bias3a = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g") 

2183 bias2b = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r") 

2184 bias3b = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r") 

2185 # Register the main calibration collection we'll be working with. 

2186 collection = "Cam1/calibs/default" 

2187 registry.registerCollection(collection, type=CollectionType.CALIBRATION) 

2188 # Cannot associate into a calibration collection (no timespan). 

2189 with self.assertRaises(CollectionTypeError): 

2190 registry.associate(collection, [bias2a]) 

2191 # Certify 2a dataset with [t2, t4) validity. 

2192 registry.certify(collection, [bias2a], Timespan(begin=t2, end=t4)) 

2193 # Test that we can query for this dataset via the new collection, both 

2194 # on its own and with a RUN collection. 

2195 self.assertEqual( 

2196 set(registry.queryDatasets("bias", findFirst=False, collections=collection)), 

2197 {bias2a}, 

2198 ) 

2199 self.assertEqual( 

2200 set(registry.queryDatasets("bias", findFirst=False, collections=[collection, "imported_r"])), 

2201 { 

2202 bias2a, 

2203 bias2b, 

2204 bias3b, 

2205 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

2206 }, 

2207 ) 

2208 self.assertEqual( 

2209 set(registry.queryDataIds("detector", datasets="bias", collections=collection)), 

2210 {registry.expandDataId(instrument="Cam1", detector=2)}, 

2211 ) 

2212 self.assertEqual( 

2213 set(registry.queryDataIds("detector", datasets="bias", collections=[collection, "imported_r"])), 

2214 { 

2215 registry.expandDataId(instrument="Cam1", detector=2), 

2216 registry.expandDataId(instrument="Cam1", detector=3), 

2217 registry.expandDataId(instrument="Cam1", detector=4), 

2218 }, 

2219 ) 

2220 self.assertEqual( 

2221 set( 

2222 registry.queryDataIds(["exposure", "detector"]).findRelatedDatasets( 

2223 "bias", findFirst=True, collections=[collection] 

2224 ) 

2225 ), 

2226 { 

2227 (registry.expandDataId(instrument="Cam1", detector=2, exposure=1), bias2a), 

2228 (registry.expandDataId(instrument="Cam1", detector=2, exposure=2), bias2a), 

2229 }, 

2230 ) 

2231 self.assertEqual( 

2232 set( 

2233 registry.queryDataIds( 

2234 ["exposure", "detector"], instrument="Cam1", detector=2 

2235 ).findRelatedDatasets("bias", findFirst=True, collections=[collection, "imported_r"]) 

2236 ), 

2237 { 

2238 (registry.expandDataId(instrument="Cam1", detector=2, exposure=1), bias2a), 

2239 (registry.expandDataId(instrument="Cam1", detector=2, exposure=2), bias2a), 

2240 (registry.expandDataId(instrument="Cam1", detector=2, exposure=0), bias2b), 

2241 (registry.expandDataId(instrument="Cam1", detector=2, exposure=3), bias2b), 

2242 }, 

2243 ) 

2244 

2245 # We should not be able to certify 2b with anything overlapping that 

2246 # window. 

2247 with self.assertRaises(ConflictingDefinitionError): 

2248 registry.certify(collection, [bias2b], Timespan(begin=None, end=t3)) 

2249 with self.assertRaises(ConflictingDefinitionError): 

2250 registry.certify(collection, [bias2b], Timespan(begin=None, end=t5)) 

2251 with self.assertRaises(ConflictingDefinitionError): 

2252 registry.certify(collection, [bias2b], Timespan(begin=t1, end=t3)) 

2253 with self.assertRaises(ConflictingDefinitionError): 

2254 registry.certify(collection, [bias2b], Timespan(begin=t1, end=t5)) 

2255 with self.assertRaises(ConflictingDefinitionError): 

2256 registry.certify(collection, [bias2b], Timespan(begin=t1, end=None)) 

2257 with self.assertRaises(ConflictingDefinitionError): 

2258 registry.certify(collection, [bias2b], Timespan(begin=t2, end=t3)) 

2259 with self.assertRaises(ConflictingDefinitionError): 

2260 registry.certify(collection, [bias2b], Timespan(begin=t2, end=t5)) 

2261 with self.assertRaises(ConflictingDefinitionError): 

2262 registry.certify(collection, [bias2b], Timespan(begin=t2, end=None)) 

2263 # We should be able to certify 3a with a range overlapping that window, 

2264 # because it's for a different detector. 

2265 # We'll certify 3a over [t1, t3). 

2266 registry.certify(collection, [bias3a], Timespan(begin=t1, end=t3)) 

2267 # Now we'll certify 2b and 3b together over [t4, ∞). 

2268 registry.certify(collection, [bias2b, bias3b], Timespan(begin=t4, end=None)) 

2269 

2270 # Fetch all associations and check that they are what we expect. 

2271 self.assertCountEqual( 

2272 list( 

2273 registry.queryDatasetAssociations( 

2274 "bias", 

2275 collections=[collection, "imported_g", "imported_r"], 

2276 ) 

2277 ), 

2278 [ 

2279 DatasetAssociation( 

2280 ref=registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"), 

2281 collection="imported_g", 

2282 timespan=None, 

2283 ), 

2284 DatasetAssociation( 

2285 ref=registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

2286 collection="imported_r", 

2287 timespan=None, 

2288 ), 

2289 DatasetAssociation(ref=bias2a, collection="imported_g", timespan=None), 

2290 DatasetAssociation(ref=bias3a, collection="imported_g", timespan=None), 

2291 DatasetAssociation(ref=bias2b, collection="imported_r", timespan=None), 

2292 DatasetAssociation(ref=bias3b, collection="imported_r", timespan=None), 

2293 DatasetAssociation(ref=bias2a, collection=collection, timespan=Timespan(begin=t2, end=t4)), 

2294 DatasetAssociation(ref=bias3a, collection=collection, timespan=Timespan(begin=t1, end=t3)), 

2295 DatasetAssociation(ref=bias2b, collection=collection, timespan=Timespan(begin=t4, end=None)), 

2296 DatasetAssociation(ref=bias3b, collection=collection, timespan=Timespan(begin=t4, end=None)), 

2297 ], 

2298 ) 

2299 

2300 class Ambiguous: 

2301 """Tag class to denote lookups that should be ambiguous.""" 

2302 

2303 pass 

2304 

2305 def _assertLookup( 

2306 detector: int, timespan: Timespan, expected: DatasetRef | type[Ambiguous] | None 

2307 ) -> None: 

2308 """Local function that asserts that a bias lookup returns the given 

2309 expected result. 

2310 """ 

2311 if expected is Ambiguous: 

2312 with self.assertRaises((DatasetTypeError, LookupError)): 

2313 registry.findDataset( 

2314 "bias", 

2315 collections=collection, 

2316 instrument="Cam1", 

2317 detector=detector, 

2318 timespan=timespan, 

2319 ) 

2320 else: 

2321 self.assertEqual( 

2322 expected, 

2323 registry.findDataset( 

2324 "bias", 

2325 collections=collection, 

2326 instrument="Cam1", 

2327 detector=detector, 

2328 timespan=timespan, 

2329 ), 

2330 ) 

2331 

2332 # Systematically test lookups against expected results. 

2333 _assertLookup(detector=2, timespan=Timespan(None, t1), expected=None) 

2334 _assertLookup(detector=2, timespan=Timespan(None, t2), expected=None) 

2335 _assertLookup(detector=2, timespan=Timespan(None, t3), expected=bias2a) 

2336 _assertLookup(detector=2, timespan=Timespan(None, t4), expected=bias2a) 

2337 _assertLookup(detector=2, timespan=Timespan(None, t5), expected=Ambiguous) 

2338 _assertLookup(detector=2, timespan=Timespan(None, None), expected=Ambiguous) 

2339 _assertLookup(detector=2, timespan=Timespan(t1, t2), expected=None) 

2340 _assertLookup(detector=2, timespan=Timespan(t1, t3), expected=bias2a) 

2341 _assertLookup(detector=2, timespan=Timespan(t1, t4), expected=bias2a) 

2342 _assertLookup(detector=2, timespan=Timespan(t1, t5), expected=Ambiguous) 

2343 _assertLookup(detector=2, timespan=Timespan(t1, None), expected=Ambiguous) 

2344 _assertLookup(detector=2, timespan=Timespan(t2, t3), expected=bias2a) 

2345 _assertLookup(detector=2, timespan=Timespan(t2, t4), expected=bias2a) 

2346 _assertLookup(detector=2, timespan=Timespan(t2, t5), expected=Ambiguous) 

2347 _assertLookup(detector=2, timespan=Timespan(t2, None), expected=Ambiguous) 

2348 _assertLookup(detector=2, timespan=Timespan(t3, t4), expected=bias2a) 

2349 _assertLookup(detector=2, timespan=Timespan(t3, t5), expected=Ambiguous) 

2350 _assertLookup(detector=2, timespan=Timespan(t3, None), expected=Ambiguous) 

2351 _assertLookup(detector=2, timespan=Timespan(t4, t5), expected=bias2b) 

2352 _assertLookup(detector=2, timespan=Timespan(t4, None), expected=bias2b) 

2353 _assertLookup(detector=2, timespan=Timespan(t5, None), expected=bias2b) 

2354 _assertLookup(detector=3, timespan=Timespan(None, t1), expected=None) 

2355 _assertLookup(detector=3, timespan=Timespan(None, t2), expected=bias3a) 

2356 _assertLookup(detector=3, timespan=Timespan(None, t3), expected=bias3a) 

2357 _assertLookup(detector=3, timespan=Timespan(None, t4), expected=bias3a) 

2358 _assertLookup(detector=3, timespan=Timespan(None, t5), expected=Ambiguous) 

2359 _assertLookup(detector=3, timespan=Timespan(None, None), expected=Ambiguous) 

2360 _assertLookup(detector=3, timespan=Timespan(t1, t2), expected=bias3a) 

2361 _assertLookup(detector=3, timespan=Timespan(t1, t3), expected=bias3a) 

2362 _assertLookup(detector=3, timespan=Timespan(t1, t4), expected=bias3a) 

2363 _assertLookup(detector=3, timespan=Timespan(t1, t5), expected=Ambiguous) 

2364 _assertLookup(detector=3, timespan=Timespan(t1, None), expected=Ambiguous) 

2365 _assertLookup(detector=3, timespan=Timespan(t2, t3), expected=bias3a) 

2366 _assertLookup(detector=3, timespan=Timespan(t2, t4), expected=bias3a) 

2367 _assertLookup(detector=3, timespan=Timespan(t2, t5), expected=Ambiguous) 

2368 _assertLookup(detector=3, timespan=Timespan(t2, None), expected=Ambiguous) 

2369 _assertLookup(detector=3, timespan=Timespan(t3, t4), expected=None) 

2370 _assertLookup(detector=3, timespan=Timespan(t3, t5), expected=bias3b) 

2371 _assertLookup(detector=3, timespan=Timespan(t3, None), expected=bias3b) 

2372 _assertLookup(detector=3, timespan=Timespan(t4, t5), expected=bias3b) 

2373 _assertLookup(detector=3, timespan=Timespan(t4, None), expected=bias3b) 

2374 _assertLookup(detector=3, timespan=Timespan(t5, None), expected=bias3b) 

2375 

2376 # Test lookups via temporal joins to exposures. 

2377 self.assertEqual( 

2378 set( 

2379 registry.queryDataIds( 

2380 ["exposure", "detector"], instrument="Cam1", detector=2 

2381 ).findRelatedDatasets("bias", collections=[collection]) 

2382 ), 

2383 { 

2384 (registry.expandDataId(instrument="Cam1", exposure=1, detector=2), bias2a), 

2385 (registry.expandDataId(instrument="Cam1", exposure=2, detector=2), bias2a), 

2386 (registry.expandDataId(instrument="Cam1", exposure=3, detector=2), bias2b), 

2387 }, 

2388 ) 

2389 self.assertEqual( 

2390 set( 

2391 registry.queryDataIds( 

2392 ["exposure", "detector"], instrument="Cam1", detector=3 

2393 ).findRelatedDatasets("bias", collections=[collection]) 

2394 ), 

2395 { 

2396 (registry.expandDataId(instrument="Cam1", exposure=0, detector=3), bias3a), 

2397 (registry.expandDataId(instrument="Cam1", exposure=1, detector=3), bias3a), 

2398 (registry.expandDataId(instrument="Cam1", exposure=3, detector=3), bias3b), 

2399 }, 

2400 ) 

2401 self.assertEqual( 

2402 set( 

2403 registry.queryDataIds( 

2404 ["exposure", "detector"], instrument="Cam1", detector=2 

2405 ).findRelatedDatasets("bias", collections=[collection, "imported_g"]) 

2406 ), 

2407 { 

2408 (registry.expandDataId(instrument="Cam1", exposure=0, detector=2), bias2a), 

2409 (registry.expandDataId(instrument="Cam1", exposure=1, detector=2), bias2a), 

2410 (registry.expandDataId(instrument="Cam1", exposure=2, detector=2), bias2a), 

2411 (registry.expandDataId(instrument="Cam1", exposure=3, detector=2), bias2b), 

2412 }, 

2413 ) 

2414 self.assertEqual( 

2415 set( 

2416 registry.queryDataIds( 

2417 ["exposure", "detector"], instrument="Cam1", detector=3 

2418 ).findRelatedDatasets("bias", collections=[collection, "imported_g"]) 

2419 ), 

2420 { 

2421 (registry.expandDataId(instrument="Cam1", exposure=0, detector=3), bias3a), 

2422 (registry.expandDataId(instrument="Cam1", exposure=1, detector=3), bias3a), 

2423 (registry.expandDataId(instrument="Cam1", exposure=2, detector=3), bias3a), 

2424 (registry.expandDataId(instrument="Cam1", exposure=3, detector=3), bias3b), 

2425 }, 

2426 ) 

2427 

2428 # Decertify [t3, t5) for all data IDs, and do test lookups again. 

2429 # This should truncate bias2a to [t2, t3), leave bias3a unchanged at 

2430 # [t1, t3), and truncate bias2b and bias3b to [t5, ∞). 

2431 registry.decertify(collection=collection, datasetType="bias", timespan=Timespan(t3, t5)) 

2432 _assertLookup(detector=2, timespan=Timespan(None, t1), expected=None) 

2433 _assertLookup(detector=2, timespan=Timespan(None, t2), expected=None) 

2434 _assertLookup(detector=2, timespan=Timespan(None, t3), expected=bias2a) 

2435 _assertLookup(detector=2, timespan=Timespan(None, t4), expected=bias2a) 

2436 _assertLookup(detector=2, timespan=Timespan(None, t5), expected=bias2a) 

2437 _assertLookup(detector=2, timespan=Timespan(None, None), expected=Ambiguous) 

2438 _assertLookup(detector=2, timespan=Timespan(t1, t2), expected=None) 

2439 _assertLookup(detector=2, timespan=Timespan(t1, t3), expected=bias2a) 

2440 _assertLookup(detector=2, timespan=Timespan(t1, t4), expected=bias2a) 

2441 _assertLookup(detector=2, timespan=Timespan(t1, t5), expected=bias2a) 

2442 _assertLookup(detector=2, timespan=Timespan(t1, None), expected=Ambiguous) 

2443 _assertLookup(detector=2, timespan=Timespan(t2, t3), expected=bias2a) 

2444 _assertLookup(detector=2, timespan=Timespan(t2, t4), expected=bias2a) 

2445 _assertLookup(detector=2, timespan=Timespan(t2, t5), expected=bias2a) 

2446 _assertLookup(detector=2, timespan=Timespan(t2, None), expected=Ambiguous) 

2447 _assertLookup(detector=2, timespan=Timespan(t3, t4), expected=None) 

2448 _assertLookup(detector=2, timespan=Timespan(t3, t5), expected=None) 

2449 _assertLookup(detector=2, timespan=Timespan(t3, None), expected=bias2b) 

2450 _assertLookup(detector=2, timespan=Timespan(t4, t5), expected=None) 

2451 _assertLookup(detector=2, timespan=Timespan(t4, None), expected=bias2b) 

2452 _assertLookup(detector=2, timespan=Timespan(t5, None), expected=bias2b) 

2453 _assertLookup(detector=3, timespan=Timespan(None, t1), expected=None) 

2454 _assertLookup(detector=3, timespan=Timespan(None, t2), expected=bias3a) 

2455 _assertLookup(detector=3, timespan=Timespan(None, t3), expected=bias3a) 

2456 _assertLookup(detector=3, timespan=Timespan(None, t4), expected=bias3a) 

2457 _assertLookup(detector=3, timespan=Timespan(None, t5), expected=bias3a) 

2458 _assertLookup(detector=3, timespan=Timespan(None, None), expected=Ambiguous) 

2459 _assertLookup(detector=3, timespan=Timespan(t1, t2), expected=bias3a) 

2460 _assertLookup(detector=3, timespan=Timespan(t1, t3), expected=bias3a) 

2461 _assertLookup(detector=3, timespan=Timespan(t1, t4), expected=bias3a) 

2462 _assertLookup(detector=3, timespan=Timespan(t1, t5), expected=bias3a) 

2463 _assertLookup(detector=3, timespan=Timespan(t1, None), expected=Ambiguous) 

2464 _assertLookup(detector=3, timespan=Timespan(t2, t3), expected=bias3a) 

2465 _assertLookup(detector=3, timespan=Timespan(t2, t4), expected=bias3a) 

2466 _assertLookup(detector=3, timespan=Timespan(t2, t5), expected=bias3a) 

2467 _assertLookup(detector=3, timespan=Timespan(t2, None), expected=Ambiguous) 

2468 _assertLookup(detector=3, timespan=Timespan(t3, t4), expected=None) 

2469 _assertLookup(detector=3, timespan=Timespan(t3, t5), expected=None) 

2470 _assertLookup(detector=3, timespan=Timespan(t3, None), expected=bias3b) 

2471 _assertLookup(detector=3, timespan=Timespan(t4, t5), expected=None) 

2472 _assertLookup(detector=3, timespan=Timespan(t4, None), expected=bias3b) 

2473 _assertLookup(detector=3, timespan=Timespan(t5, None), expected=bias3b) 

2474 

2475 # Decertify everything, this time with explicit data IDs, then check 

2476 # that no lookups succeed. 

2477 registry.decertify( 

2478 collection, 

2479 "bias", 

2480 Timespan(None, None), 

2481 dataIds=[ 

2482 dict(instrument="Cam1", detector=2), 

2483 dict(instrument="Cam1", detector=3), 

2484 ], 

2485 ) 

2486 for detector in (2, 3): 

2487 for timespan in allTimespans: 

2488 _assertLookup(detector=detector, timespan=timespan, expected=None) 

2489 # Certify bias2a and bias3a over (-∞, ∞), check that all lookups return 

2490 # those. 

2491 registry.certify( 

2492 collection, 

2493 [bias2a, bias3a], 

2494 Timespan(None, None), 

2495 ) 

2496 for timespan in allTimespans: 

2497 _assertLookup(detector=2, timespan=timespan, expected=bias2a) 

2498 _assertLookup(detector=3, timespan=timespan, expected=bias3a) 

2499 # Decertify just bias2 over [t2, t4). 

2500 # This should split a single certification row into two (and leave the 

2501 # other existing row, for bias3a, alone). 

2502 registry.decertify( 

2503 collection, "bias", Timespan(t2, t4), dataIds=[dict(instrument="Cam1", detector=2)] 

2504 ) 

2505 for timespan in allTimespans: 

2506 _assertLookup(detector=3, timespan=timespan, expected=bias3a) 

2507 overlapsBefore = timespan.overlaps(Timespan(None, t2)) 

2508 overlapsAfter = timespan.overlaps(Timespan(t4, None)) 

2509 if overlapsBefore and overlapsAfter: 

2510 expected = Ambiguous 

2511 elif overlapsBefore or overlapsAfter: 

2512 expected = bias2a 

2513 else: 

2514 expected = None 

2515 _assertLookup(detector=2, timespan=timespan, expected=expected) 

2516 

2517 def testSkipCalibs(self): 

2518 """Test how queries handle skipping of calibration collections.""" 

2519 registry = self.makeRegistry() 

2520 self.loadData(registry, "base.yaml") 

2521 self.loadData(registry, "datasets.yaml") 

2522 

2523 coll_calib = "Cam1/calibs/default" 

2524 registry.registerCollection(coll_calib, type=CollectionType.CALIBRATION) 

2525 

2526 # Add all biases to the calibration collection. 

2527 # Without this, the logic that prunes dataset subqueries based on 

2528 # datasetType-collection summary information will fire before the logic 

2529 # we want to test below. This is a good thing (it avoids the dreaded 

2530 # NotImplementedError a bit more often) everywhere but here. 

2531 registry.certify(coll_calib, registry.queryDatasets("bias", collections=...), Timespan(None, None)) 

2532 

2533 coll_list = [coll_calib, "imported_g", "imported_r"] 

2534 chain = "Cam1/chain" 

2535 registry.registerCollection(chain, type=CollectionType.CHAINED) 

2536 registry.setCollectionChain(chain, coll_list) 

2537 

2538 # explicit list will raise if findFirst=True or there are temporal 

2539 # dimensions 

2540 with self.assertRaises(NotImplementedError): 

2541 registry.queryDatasets("bias", collections=coll_list, findFirst=True) 

2542 with self.assertRaises(NotImplementedError): 

2543 registry.queryDataIds( 

2544 ["instrument", "detector", "exposure"], datasets="bias", collections=coll_list 

2545 ).count() 

2546 

2547 # chain will skip 

2548 datasets = list(registry.queryDatasets("bias", collections=chain)) 

2549 self.assertGreater(len(datasets), 0) 

2550 

2551 dataIds = list(registry.queryDataIds(["instrument", "detector"], datasets="bias", collections=chain)) 

2552 self.assertGreater(len(dataIds), 0) 

2553 

2554 # glob will skip too 

2555 datasets = list(registry.queryDatasets("bias", collections="*d*")) 

2556 self.assertGreater(len(datasets), 0) 

2557 

2558 # regular expression will skip too 

2559 pattern = re.compile(".*") 

2560 datasets = list(registry.queryDatasets("bias", collections=pattern)) 

2561 self.assertGreater(len(datasets), 0) 

2562 

2563 # ellipsis should work as usual 

2564 datasets = list(registry.queryDatasets("bias", collections=...)) 

2565 self.assertGreater(len(datasets), 0) 

2566 

2567 # few tests with findFirst 

2568 datasets = list(registry.queryDatasets("bias", collections=chain, findFirst=True)) 

2569 self.assertGreater(len(datasets), 0) 

2570 

2571 def testIngestTimeQuery(self): 

2572 registry = self.makeRegistry() 

2573 self.loadData(registry, "base.yaml") 

2574 dt0 = datetime.datetime.now(datetime.UTC) 

2575 self.loadData(registry, "datasets.yaml") 

2576 dt1 = datetime.datetime.now(datetime.UTC) 

2577 

2578 datasets = list(registry.queryDatasets(..., collections=...)) 

2579 len0 = len(datasets) 

2580 self.assertGreater(len0, 0) 

2581 

2582 where = "ingest_date > T'2000-01-01'" 

2583 datasets = list(registry.queryDatasets(..., collections=..., where=where)) 

2584 len1 = len(datasets) 

2585 self.assertEqual(len0, len1) 

2586 

2587 # no one will ever use this piece of software in 30 years 

2588 where = "ingest_date > T'2050-01-01'" 

2589 datasets = list(registry.queryDatasets(..., collections=..., where=where)) 

2590 len2 = len(datasets) 

2591 self.assertEqual(len2, 0) 

2592 

2593 # Check more exact timing to make sure there is no 37 seconds offset 

2594 # (after fixing DM-30124). SQLite time precision is 1 second, make 

2595 # sure that we don't test with higher precision. 

2596 tests = [ 

2597 # format: (timestamp, operator, expected_len) 

2598 (dt0 - timedelta(seconds=1), ">", len0), 

2599 (dt0 - timedelta(seconds=1), "<", 0), 

2600 (dt1 + timedelta(seconds=1), "<", len0), 

2601 (dt1 + timedelta(seconds=1), ">", 0), 

2602 ] 

2603 for dt, op, expect_len in tests: 

2604 dt_str = dt.isoformat(sep=" ") 

2605 

2606 where = f"ingest_date {op} T'{dt_str}'" 

2607 datasets = list(registry.queryDatasets(..., collections=..., where=where)) 

2608 self.assertEqual(len(datasets), expect_len) 

2609 

2610 # same with bind using datetime or astropy Time 

2611 where = f"ingest_date {op} ingest_time" 

2612 datasets = list( 

2613 registry.queryDatasets(..., collections=..., where=where, bind={"ingest_time": dt}) 

2614 ) 

2615 self.assertEqual(len(datasets), expect_len) 

2616 

2617 dt_astropy = astropy.time.Time(dt, format="datetime") 

2618 datasets = list( 

2619 registry.queryDatasets(..., collections=..., where=where, bind={"ingest_time": dt_astropy}) 

2620 ) 

2621 self.assertEqual(len(datasets), expect_len) 

2622 

2623 def testTimespanQueries(self): 

2624 """Test query expressions involving timespans.""" 

2625 registry = self.makeRegistry() 

2626 self.loadData(registry, "hsc-rc2-subset.yaml") 

2627 # All exposures in the database; mapping from ID to timespan. 

2628 visits = {record.id: record.timespan for record in registry.queryDimensionRecords("visit")} 

2629 # Just those IDs, sorted (which is also temporal sorting, because HSC 

2630 # exposure IDs are monotonically increasing). 

2631 ids = sorted(visits.keys()) 

2632 self.assertGreater(len(ids), 20) 

2633 # Pick some quasi-random indexes into `ids` to play with. 

2634 i1 = int(len(ids) * 0.1) 

2635 i2 = int(len(ids) * 0.3) 

2636 i3 = int(len(ids) * 0.6) 

2637 i4 = int(len(ids) * 0.8) 

2638 # Extract some times from those: just before the beginning of i1 (which 

2639 # should be after the end of the exposure before), exactly the 

2640 # beginning of i2, just after the beginning of i3 (and before its end), 

2641 # and the exact end of i4. 

2642 t1 = visits[ids[i1]].begin - astropy.time.TimeDelta(1.0, format="sec") 

2643 self.assertGreater(t1, visits[ids[i1 - 1]].end) 

2644 t2 = visits[ids[i2]].begin 

2645 t3 = visits[ids[i3]].begin + astropy.time.TimeDelta(1.0, format="sec") 

2646 self.assertLess(t3, visits[ids[i3]].end) 

2647 t4 = visits[ids[i4]].end 

2648 # Make sure those are actually in order. 

2649 self.assertEqual([t1, t2, t3, t4], sorted([t4, t3, t2, t1])) 

2650 

2651 bind = { 

2652 "t1": t1, 

2653 "t2": t2, 

2654 "t3": t3, 

2655 "t4": t4, 

2656 "ts23": Timespan(t2, t3), 

2657 } 

2658 

2659 def query(where): 

2660 """Return results as a sorted, deduplicated list of visit IDs. 

2661 

2662 Parameters 

2663 ---------- 

2664 where : `str` 

2665 The WHERE clause for the query. 

2666 """ 

2667 return sorted( 

2668 { 

2669 dataId["visit"] 

2670 for dataId in registry.queryDataIds("visit", instrument="HSC", bind=bind, where=where) 

2671 } 

2672 ) 

2673 

2674 # Try a bunch of timespan queries, mixing up the bounds themselves, 

2675 # where they appear in the expression, and how we get the timespan into 

2676 # the expression. 

2677 

2678 # t1 is before the start of i1, so this should not include i1. 

2679 self.assertEqual(ids[:i1], query("visit.timespan OVERLAPS (null, t1)")) 

2680 # t2 is exactly at the start of i2, but ends are exclusive, so these 

2681 # should not include i2. 

2682 self.assertEqual(ids[i1:i2], query("(t1, t2) OVERLAPS visit.timespan")) 

2683 self.assertEqual(ids[:i2], query("visit.timespan < (t2, t4)")) 

2684 # t3 is in the middle of i3, so this should include i3. 

2685 self.assertEqual(ids[i2 : i3 + 1], query("visit.timespan OVERLAPS ts23")) 

2686 # This one should not include t3 by the same reasoning. 

2687 self.assertEqual(ids[i3 + 1 :], query("visit.timespan > (t1, t3)")) 

2688 # t4 is exactly at the end of i4, so this should include i4. 

2689 self.assertEqual(ids[i3 : i4 + 1], query(f"visit.timespan OVERLAPS (T'{t3.tai.isot}', t4)")) 

2690 # i4's upper bound of t4 is exclusive so this should not include t4. 

2691 self.assertEqual(ids[i4 + 1 :], query("visit.timespan OVERLAPS (t4, NULL)")) 

2692 

2693 # Now some timespan vs. time scalar queries. 

2694 self.assertEqual(ids[:i2], query("visit.timespan < t2")) 

2695 self.assertEqual(ids[:i2], query("t2 > visit.timespan")) 

2696 self.assertEqual(ids[i3 + 1 :], query("visit.timespan > t3")) 

2697 self.assertEqual(ids[i3 + 1 :], query("t3 < visit.timespan")) 

2698 self.assertEqual(ids[i3 : i3 + 1], query("visit.timespan OVERLAPS t3")) 

2699 self.assertEqual(ids[i3 : i3 + 1], query(f"T'{t3.tai.isot}' OVERLAPS visit.timespan")) 

2700 

2701 # Empty timespans should not overlap anything. 

2702 self.assertEqual([], query("visit.timespan OVERLAPS (t3, t2)")) 

2703 

2704 def testCollectionSummaries(self): 

2705 """Test recording and retrieval of collection summaries.""" 

2706 self.maxDiff = None 

2707 registry = self.makeRegistry() 

2708 # Importing datasets from yaml should go through the code path where 

2709 # we update collection summaries as we insert datasets. 

2710 self.loadData(registry, "base.yaml") 

2711 self.loadData(registry, "datasets.yaml") 

2712 flat = registry.getDatasetType("flat") 

2713 expected1 = CollectionSummary() 

2714 expected1.dataset_types.add(registry.getDatasetType("bias")) 

2715 expected1.add_data_ids( 

2716 flat, [DataCoordinate.standardize(instrument="Cam1", universe=registry.dimensions)] 

2717 ) 

2718 self.assertEqual(registry.getCollectionSummary("imported_g"), expected1) 

2719 self.assertEqual(registry.getCollectionSummary("imported_r"), expected1) 

2720 # Create a chained collection with both of the imported runs; the 

2721 # summary should be the same, because it's a union with itself. 

2722 chain = "chain" 

2723 registry.registerCollection(chain, CollectionType.CHAINED) 

2724 registry.setCollectionChain(chain, ["imported_r", "imported_g"]) 

2725 self.assertEqual(registry.getCollectionSummary(chain), expected1) 

2726 # Associate flats only into a tagged collection and a calibration 

2727 # collection to check summaries of those. 

2728 tag = "tag" 

2729 registry.registerCollection(tag, CollectionType.TAGGED) 

2730 registry.associate(tag, registry.queryDatasets(flat, collections="imported_g")) 

2731 calibs = "calibs" 

2732 registry.registerCollection(calibs, CollectionType.CALIBRATION) 

2733 registry.certify( 

2734 calibs, registry.queryDatasets(flat, collections="imported_g"), timespan=Timespan(None, None) 

2735 ) 

2736 expected2 = expected1.copy() 

2737 expected2.dataset_types.discard("bias") 

2738 self.assertEqual(registry.getCollectionSummary(tag), expected2) 

2739 self.assertEqual(registry.getCollectionSummary(calibs), expected2) 

2740 # Explicitly calling SqlRegistry.refresh() should load those same 

2741 # summaries, via a totally different code path. 

2742 registry.refresh() 

2743 self.assertEqual(registry.getCollectionSummary("imported_g"), expected1) 

2744 self.assertEqual(registry.getCollectionSummary("imported_r"), expected1) 

2745 self.assertEqual(registry.getCollectionSummary(tag), expected2) 

2746 self.assertEqual(registry.getCollectionSummary(calibs), expected2) 

2747 

2748 def testBindInQueryDatasets(self): 

2749 """Test that the bind parameter is correctly forwarded in 

2750 queryDatasets recursion. 

2751 """ 

2752 registry = self.makeRegistry() 

2753 # Importing datasets from yaml should go through the code path where 

2754 # we update collection summaries as we insert datasets. 

2755 self.loadData(registry, "base.yaml") 

2756 self.loadData(registry, "datasets.yaml") 

2757 self.assertEqual( 

2758 set(registry.queryDatasets("flat", band="r", collections=...)), 

2759 set(registry.queryDatasets("flat", where="band=my_band", bind={"my_band": "r"}, collections=...)), 

2760 ) 

2761 

2762 def testQueryIntRangeExpressions(self): 

2763 """Test integer range expressions in ``where`` arguments. 

2764 

2765 Note that our expressions use inclusive stop values, unlike Python's. 

2766 """ 

2767 registry = self.makeRegistry() 

2768 self.loadData(registry, "base.yaml") 

2769 self.assertEqual( 

2770 set(registry.queryDataIds(["detector"], instrument="Cam1", where="detector IN (1..2)")), 

2771 {registry.expandDataId(instrument="Cam1", detector=n) for n in [1, 2]}, 

2772 ) 

2773 self.assertEqual( 

2774 set(registry.queryDataIds(["detector"], instrument="Cam1", where="detector IN (1..4:2)")), 

2775 {registry.expandDataId(instrument="Cam1", detector=n) for n in [1, 3]}, 

2776 ) 

2777 self.assertEqual( 

2778 set(registry.queryDataIds(["detector"], instrument="Cam1", where="detector IN (2..4:2)")), 

2779 {registry.expandDataId(instrument="Cam1", detector=n) for n in [2, 4]}, 

2780 ) 

2781 

2782 def testQueryResultSummaries(self): 

2783 """Test summary methods like `count`, `any`, and `explain_no_results` 

2784 on `DataCoordinateQueryResults` and `DatasetQueryResults`. 

2785 """ 

2786 registry = self.makeRegistry() 

2787 self.loadData(registry, "base.yaml") 

2788 self.loadData(registry, "datasets.yaml") 

2789 self.loadData(registry, "spatial.yaml") 

2790 # Default test dataset has two collections, each with both flats and 

2791 # biases. Add a new collection with only biases. 

2792 registry.registerCollection("biases", CollectionType.TAGGED) 

2793 registry.associate("biases", registry.queryDatasets("bias", collections=["imported_g"])) 

2794 # First query yields two results, and involves no postprocessing. 

2795 query1 = registry.queryDataIds(["physical_filter"], band="r") 

2796 self.assertTrue(query1.any(execute=False, exact=False)) 

2797 self.assertTrue(query1.any(execute=True, exact=False)) 

2798 self.assertTrue(query1.any(execute=True, exact=True)) 

2799 self.assertEqual(query1.count(exact=False), 2) 

2800 self.assertEqual(query1.count(exact=True), 2) 

2801 self.assertFalse(list(query1.explain_no_results())) 

2802 # Second query should yield no results, which we should see when 

2803 # we attempt to expand the data ID. 

2804 query2 = registry.queryDataIds(["physical_filter"], band="h") 

2805 # There's no execute=False, exact=Fals test here because the behavior 

2806 # not something we want to guarantee in this case (and exact=False 

2807 # says either answer is legal). 

2808 self.assertFalse(query2.any(execute=True, exact=False)) 

2809 self.assertFalse(query2.any(execute=True, exact=True)) 

2810 self.assertEqual(query2.count(exact=False), 0) 

2811 self.assertEqual(query2.count(exact=True), 0) 

2812 self.assertTrue(list(query2.explain_no_results())) 

2813 # These queries yield no results due to various problems that can be 

2814 # spotted prior to execution, yielding helpful diagnostics. 

2815 base_query = registry.queryDataIds(["detector", "physical_filter"]) 

2816 queries_and_snippets = [ 

2817 ( 

2818 # Dataset type name doesn't match any existing dataset types. 

2819 registry.queryDatasets("nonexistent", collections=...), 

2820 ["nonexistent"], 

2821 ), 

2822 ( 

2823 # Dataset type object isn't registered. 

2824 registry.queryDatasets( 

2825 DatasetType( 

2826 "nonexistent", 

2827 dimensions=["instrument"], 

2828 universe=registry.dimensions, 

2829 storageClass="Image", 

2830 ), 

2831 collections=..., 

2832 ), 

2833 ["nonexistent"], 

2834 ), 

2835 ( 

2836 # No datasets of this type in this collection. 

2837 registry.queryDatasets("flat", collections=["biases"]), 

2838 ["flat", "biases"], 

2839 ), 

2840 ( 

2841 # No datasets of this type in this collection. 

2842 base_query.findDatasets("flat", collections=["biases"]), 

2843 ["flat", "biases"], 

2844 ), 

2845 ( 

2846 # No collections matching at all. 

2847 registry.queryDatasets("flat", collections=re.compile("potato.+")), 

2848 ["potato"], 

2849 ), 

2850 ] 

2851 with self.assertRaises(MissingDatasetTypeError): 

2852 # Dataset type name doesn't match any existing dataset types. 

2853 registry.queryDataIds(["detector"], datasets=["nonexistent"], collections=...) 

2854 with self.assertRaises(MissingDatasetTypeError): 

2855 # Dataset type name doesn't match any existing dataset types. 

2856 registry.queryDimensionRecords("detector", datasets=["nonexistent"], collections=...) 

2857 for query, snippets in queries_and_snippets: 

2858 self.assertFalse(query.any(execute=False, exact=False)) 

2859 self.assertFalse(query.any(execute=True, exact=False)) 

2860 self.assertFalse(query.any(execute=True, exact=True)) 

2861 self.assertEqual(query.count(exact=False), 0) 

2862 self.assertEqual(query.count(exact=True), 0) 

2863 messages = list(query.explain_no_results()) 

2864 self.assertTrue(messages) 

2865 # Want all expected snippets to appear in at least one message. 

2866 self.assertTrue( 

2867 any( 

2868 all(snippet in message for snippet in snippets) for message in query.explain_no_results() 

2869 ), 

2870 messages, 

2871 ) 

2872 

2873 # Wildcards on dataset types are not permitted in queryDataIds. 

2874 with self.assertRaises(DatasetTypeExpressionError): 

2875 registry.queryDataIds(["detector"], datasets=re.compile("^nonexistent$"), collections=...) 

2876 

2877 # These queries yield no results due to problems that can be identified 

2878 # by cheap follow-up queries, yielding helpful diagnostics. 

2879 for query, snippets in [ 

2880 ( 

2881 # No records for one of the involved dimensions. 

2882 registry.queryDataIds(["subfilter"]), 

2883 ["no rows", "subfilter"], 

2884 ), 

2885 ( 

2886 # No records for one of the involved dimensions. 

2887 registry.queryDimensionRecords("subfilter"), 

2888 ["no rows", "subfilter"], 

2889 ), 

2890 ]: 

2891 self.assertFalse(query.any(execute=True, exact=False)) 

2892 self.assertFalse(query.any(execute=True, exact=True)) 

2893 self.assertEqual(query.count(exact=True), 0) 

2894 messages = list(query.explain_no_results()) 

2895 self.assertTrue(messages) 

2896 # Want all expected snippets to appear in at least one message. 

2897 self.assertTrue( 

2898 any( 

2899 all(snippet in message for snippet in snippets) for message in query.explain_no_results() 

2900 ), 

2901 messages, 

2902 ) 

2903 

2904 # This query yields four overlaps in the database, but one is filtered 

2905 # out in postprocessing. The count queries aren't accurate because 

2906 # they don't account for duplication that happens due to an internal 

2907 # join against commonSkyPix. 

2908 query3 = registry.queryDataIds(["visit", "tract"], instrument="Cam1", skymap="SkyMap1") 

2909 self.assertEqual( 

2910 { 

2911 DataCoordinate.standardize( 

2912 instrument="Cam1", 

2913 skymap="SkyMap1", 

2914 visit=v, 

2915 tract=t, 

2916 universe=registry.dimensions, 

2917 ) 

2918 for v, t in [(1, 0), (2, 0), (2, 1)] 

2919 }, 

2920 set(query3), 

2921 ) 

2922 self.assertTrue(query3.any(execute=False, exact=False)) 

2923 self.assertTrue(query3.any(execute=True, exact=False)) 

2924 self.assertTrue(query3.any(execute=True, exact=True)) 

2925 self.assertGreaterEqual(query3.count(exact=False), 4) 

2926 self.assertGreaterEqual(query3.count(exact=True, discard=True), 3) 

2927 self.assertFalse(list(query3.explain_no_results())) 

2928 # This query yields overlaps in the database, but all are filtered 

2929 # out in postprocessing. The count queries again aren't very useful. 

2930 # We have to use `where=` here to avoid an optimization that 

2931 # (currently) skips the spatial postprocess-filtering because it 

2932 # recognizes that no spatial join is necessary. That's not ideal, but 

2933 # fixing it is out of scope for this ticket. 

2934 query4 = registry.queryDataIds( 

2935 ["visit", "tract"], 

2936 instrument="Cam1", 

2937 skymap="SkyMap1", 

2938 where="visit=1 AND detector=1 AND tract=0 AND patch=4", 

2939 ) 

2940 self.assertFalse(set(query4)) 

2941 self.assertTrue(query4.any(execute=False, exact=False)) 

2942 self.assertTrue(query4.any(execute=True, exact=False)) 

2943 self.assertFalse(query4.any(execute=True, exact=True)) 

2944 self.assertGreaterEqual(query4.count(exact=False), 1) 

2945 self.assertEqual(query4.count(exact=True, discard=True), 0) 

2946 messages = query4.explain_no_results() 

2947 self.assertTrue(messages) 

2948 self.assertTrue(any("overlap" in message for message in messages)) 

2949 # This query should yield results from one dataset type but not the 

2950 # other, which is not registered. 

2951 query5 = registry.queryDatasets(["bias", "nonexistent"], collections=["biases"]) 

2952 self.assertTrue(set(query5)) 

2953 self.assertTrue(query5.any(execute=False, exact=False)) 

2954 self.assertTrue(query5.any(execute=True, exact=False)) 

2955 self.assertTrue(query5.any(execute=True, exact=True)) 

2956 self.assertGreaterEqual(query5.count(exact=False), 1) 

2957 self.assertGreaterEqual(query5.count(exact=True), 1) 

2958 self.assertFalse(list(query5.explain_no_results())) 

2959 # This query applies a selection that yields no results, fully in the 

2960 # database. Explaining why it fails involves traversing the relation 

2961 # tree and running a LIMIT 1 query at each level that has the potential 

2962 # to remove rows. 

2963 query6 = registry.queryDimensionRecords( 

2964 "detector", where="detector.purpose = 'no-purpose'", instrument="Cam1" 

2965 ) 

2966 self.assertEqual(query6.count(exact=True), 0) 

2967 messages = query6.explain_no_results() 

2968 self.assertTrue(messages) 

2969 self.assertTrue(any("no-purpose" in message for message in messages)) 

2970 

2971 def testQueryDataIdsExpressionError(self): 

2972 """Test error checking of 'where' expressions in queryDataIds.""" 

2973 registry = self.makeRegistry() 

2974 self.loadData(registry, "base.yaml") 

2975 bind = {"time": astropy.time.Time("2020-01-01T01:00:00", format="isot", scale="tai")} 

2976 with self.assertRaisesRegex(LookupError, r"No dimension element with name 'foo' in 'foo\.bar'\."): 

2977 registry.queryDataIds(["detector"], where="foo.bar = 12") 

2978 with self.assertRaisesRegex( 

2979 LookupError, "Dimension element name cannot be inferred in this context." 

2980 ): 

2981 registry.queryDataIds(["detector"], where="timespan.end < time", bind=bind) 

2982 

2983 def testQueryDataIdsOrderBy(self): 

2984 """Test order_by and limit on result returned by queryDataIds().""" 

2985 registry = self.makeRegistry() 

2986 self.loadData(registry, "base.yaml") 

2987 self.loadData(registry, "datasets.yaml") 

2988 self.loadData(registry, "spatial.yaml") 

2989 

2990 def do_query(dimensions=("visit", "tract"), datasets=None, collections=None): 

2991 return registry.queryDataIds( 

2992 dimensions, datasets=datasets, collections=collections, instrument="Cam1", skymap="SkyMap1" 

2993 ) 

2994 

2995 Test = namedtuple( 

2996 "testQueryDataIdsOrderByTest", 

2997 ("order_by", "keys", "result", "limit", "datasets", "collections"), 

2998 defaults=(None, None, None), 

2999 ) 

3000 

3001 test_data = ( 

3002 Test("tract,visit", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))), 

3003 Test("-tract,visit", "tract,visit", ((1, 2), (1, 2), (0, 1), (0, 1), (0, 2), (0, 2))), 

3004 Test("tract,-visit", "tract,visit", ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2))), 

3005 Test("-tract,-visit", "tract,visit", ((1, 2), (1, 2), (0, 2), (0, 2), (0, 1), (0, 1))), 

3006 Test( 

3007 "tract.id,visit.id", 

3008 "tract,visit", 

3009 ((0, 1), (0, 1), (0, 2)), 

3010 limit=(3,), 

3011 ), 

3012 Test("-tract,-visit", "tract,visit", ((1, 2), (1, 2), (0, 2)), limit=(3,)), 

3013 Test("tract,visit", "tract,visit", ((0, 2), (1, 2), (1, 2)), limit=(3, 3)), 

3014 Test("-tract,-visit", "tract,visit", ((0, 1),), limit=(3, 5)), 

3015 Test( 

3016 "tract,visit.exposure_time", "tract,visit", ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2)) 

3017 ), 

3018 Test( 

3019 "-tract,-visit.exposure_time", "tract,visit", ((1, 2), (1, 2), (0, 1), (0, 1), (0, 2), (0, 2)) 

3020 ), 

3021 Test("tract,-exposure_time", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))), 

3022 Test("tract,visit.name", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))), 

3023 Test( 

3024 "tract,-visit.timespan.begin,visit.timespan.end", 

3025 "tract,visit", 

3026 ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2)), 

3027 ), 

3028 Test("visit.day_obs,exposure.day_obs", "visit,exposure", ()), 

3029 Test("visit.timespan.begin,-exposure.timespan.begin", "visit,exposure", ()), 

3030 Test( 

3031 "tract,detector", 

3032 "tract,detector", 

3033 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)), 

3034 datasets="flat", 

3035 collections="imported_r", 

3036 ), 

3037 Test( 

3038 "tract,detector.full_name", 

3039 "tract,detector", 

3040 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)), 

3041 datasets="flat", 

3042 collections="imported_r", 

3043 ), 

3044 Test( 

3045 "tract,detector.raft,detector.name_in_raft", 

3046 "tract,detector", 

3047 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)), 

3048 datasets="flat", 

3049 collections="imported_r", 

3050 ), 

3051 ) 

3052 

3053 for test in test_data: 

3054 order_by = test.order_by.split(",") 

3055 keys = test.keys.split(",") 

3056 query = do_query(keys, test.datasets, test.collections).order_by(*order_by) 

3057 if test.limit is not None: 

3058 query = query.limit(*test.limit) 

3059 dataIds = tuple(tuple(dataId[k] for k in keys) for dataId in query) 

3060 self.assertEqual(dataIds, test.result) 

3061 

3062 # and materialize 

3063 query = do_query(keys).order_by(*order_by) 

3064 if test.limit is not None: 

3065 query = query.limit(*test.limit) 

3066 with self.assertRaises(RelationalAlgebraError): 

3067 with query.materialize(): 

3068 pass 

3069 

3070 # errors in a name 

3071 for order_by in ("", "-"): 

3072 with self.assertRaisesRegex(ValueError, "Empty dimension name in ORDER BY"): 

3073 list(do_query().order_by(order_by)) 

3074 

3075 for order_by in ("undimension.name", "-undimension.name"): 

3076 with self.assertRaisesRegex(ValueError, "Unknown dimension element 'undimension'"): 

3077 list(do_query().order_by(order_by)) 

3078 

3079 for order_by in ("attract", "-attract"): 

3080 with self.assertRaisesRegex(ValueError, "Metadata 'attract' cannot be found in any dimension"): 

3081 list(do_query().order_by(order_by)) 

3082 

3083 with self.assertRaisesRegex(ValueError, "Metadata 'exposure_time' exists in more than one dimension"): 

3084 list(do_query(("exposure", "visit")).order_by("exposure_time")) 

3085 

3086 with self.assertRaisesRegex( 

3087 ValueError, 

3088 r"Timespan exists in more than one dimension element \(day_obs, exposure, visit\); " 

3089 r"qualify timespan with specific dimension name\.", 

3090 ): 

3091 list(do_query(("exposure", "visit")).order_by("timespan.begin")) 

3092 

3093 with self.assertRaisesRegex( 

3094 ValueError, "Cannot find any temporal dimension element for 'timespan.begin'" 

3095 ): 

3096 list(do_query("tract").order_by("timespan.begin")) 

3097 

3098 with self.assertRaisesRegex(ValueError, "Cannot use 'timespan.begin' with non-temporal element"): 

3099 list(do_query("tract").order_by("tract.timespan.begin")) 

3100 

3101 with self.assertRaisesRegex(ValueError, "Field 'name' does not exist in 'tract'."): 

3102 list(do_query("tract").order_by("tract.name")) 

3103 

3104 with self.assertRaisesRegex( 

3105 ValueError, r"Unknown dimension element 'timestamp'; perhaps you meant 'timespan.begin'\?" 

3106 ): 

3107 list(do_query("visit").order_by("timestamp.begin")) 

3108 

3109 def testQueryDataIdsGovernorExceptions(self): 

3110 """Test exceptions raised by queryDataIds() for incorrect governors.""" 

3111 registry = self.makeRegistry() 

3112 self.loadData(registry, "base.yaml") 

3113 self.loadData(registry, "datasets.yaml") 

3114 self.loadData(registry, "spatial.yaml") 

3115 

3116 def do_query(dimensions, dataId=None, where="", bind=None, **kwargs): 

3117 return registry.queryDataIds(dimensions, dataId=dataId, where=where, bind=bind, **kwargs) 

3118 

3119 Test = namedtuple( 

3120 "testQueryDataIdExceptionsTest", 

3121 ("dimensions", "dataId", "where", "bind", "kwargs", "exception", "count"), 

3122 defaults=(None, None, None, {}, None, 0), 

3123 ) 

3124 

3125 test_data = ( 

3126 Test("tract,visit", count=6), 

3127 Test("tract,visit", kwargs={"instrument": "Cam1", "skymap": "SkyMap1"}, count=6), 

3128 Test( 

3129 "tract,visit", kwargs={"instrument": "Cam2", "skymap": "SkyMap1"}, exception=DataIdValueError 

3130 ), 

3131 Test("tract,visit", dataId={"instrument": "Cam1", "skymap": "SkyMap1"}, count=6), 

3132 Test( 

3133 "tract,visit", dataId={"instrument": "Cam1", "skymap": "SkyMap2"}, exception=DataIdValueError 

3134 ), 

3135 Test("tract,visit", where="instrument='Cam1' AND skymap='SkyMap1'", count=6), 

3136 Test("tract,visit", where="instrument='Cam1' AND skymap='SkyMap5'", exception=DataIdValueError), 

3137 Test( 

3138 "tract,visit", 

3139 where="instrument=cam AND skymap=map", 

3140 bind={"cam": "Cam1", "map": "SkyMap1"}, 

3141 count=6, 

3142 ), 

3143 Test( 

3144 "tract,visit", 

3145 where="instrument=cam AND skymap=map", 

3146 bind={"cam": "Cam", "map": "SkyMap"}, 

3147 exception=DataIdValueError, 

3148 ), 

3149 ) 

3150 

3151 for test in test_data: 

3152 dimensions = test.dimensions.split(",") 

3153 if test.exception: 

3154 with self.assertRaises(test.exception): 

3155 do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs).count() 

3156 else: 

3157 query = do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs) 

3158 self.assertEqual(query.count(discard=True), test.count) 

3159 

3160 # and materialize 

3161 if test.exception: 

3162 with self.assertRaises(test.exception): 

3163 query = do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs) 

3164 with query.materialize() as materialized: 

3165 materialized.count(discard=True) 

3166 else: 

3167 query = do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs) 

3168 with query.materialize() as materialized: 

3169 self.assertEqual(materialized.count(discard=True), test.count) 

3170 

3171 def testQueryDimensionRecordsOrderBy(self): 

3172 """Test order_by and limit on result returned by 

3173 queryDimensionRecords(). 

3174 """ 

3175 registry = self.makeRegistry() 

3176 self.loadData(registry, "base.yaml") 

3177 self.loadData(registry, "datasets.yaml") 

3178 self.loadData(registry, "spatial.yaml") 

3179 

3180 def do_query(element, datasets=None, collections=None): 

3181 return registry.queryDimensionRecords( 

3182 element, instrument="Cam1", datasets=datasets, collections=collections 

3183 ) 

3184 

3185 query = do_query("detector") 

3186 self.assertEqual(len(list(query)), 4) 

3187 

3188 Test = namedtuple( 

3189 "testQueryDataIdsOrderByTest", 

3190 ("element", "order_by", "result", "limit", "datasets", "collections"), 

3191 defaults=(None, None, None), 

3192 ) 

3193 

3194 test_data = ( 

3195 Test("detector", "detector", (1, 2, 3, 4)), 

3196 Test("detector", "-detector", (4, 3, 2, 1)), 

3197 Test("detector", "raft,-name_in_raft", (2, 1, 4, 3)), 

3198 Test("detector", "-detector.purpose", (4,), limit=(1,)), 

3199 Test("detector", "-purpose,detector.raft,name_in_raft", (2, 3), limit=(2, 2)), 

3200 Test("visit", "visit", (1, 2)), 

3201 Test("visit", "-visit.id", (2, 1)), 

3202 Test("visit", "zenith_angle", (1, 2)), 

3203 Test("visit", "-visit.name", (2, 1)), 

3204 Test("visit", "day_obs,-timespan.begin", (2, 1)), 

3205 ) 

3206 

3207 for test in test_data: 

3208 order_by = test.order_by.split(",") 

3209 query = do_query(test.element).order_by(*order_by) 

3210 if test.limit is not None: 

3211 query = query.limit(*test.limit) 

3212 dataIds = tuple(rec.id for rec in query) 

3213 self.assertEqual(dataIds, test.result) 

3214 

3215 # errors in a name 

3216 for order_by in ("", "-"): 

3217 with self.assertRaisesRegex(ValueError, "Empty dimension name in ORDER BY"): 

3218 list(do_query("detector").order_by(order_by)) 

3219 

3220 for order_by in ("undimension.name", "-undimension.name"): 

3221 with self.assertRaisesRegex(ValueError, "Element name mismatch: 'undimension'"): 

3222 list(do_query("detector").order_by(order_by)) 

3223 

3224 for order_by in ("attract", "-attract"): 

3225 with self.assertRaisesRegex(ValueError, "Field 'attract' does not exist in 'detector'."): 

3226 list(do_query("detector").order_by(order_by)) 

3227 

3228 for order_by in ("timestamp.begin", "-timestamp.begin"): 

3229 with self.assertRaisesRegex( 

3230 ValueError, 

3231 r"Element name mismatch: 'timestamp' instead of 'visit'; " 

3232 r"perhaps you meant 'timespan.begin'\?", 

3233 ): 

3234 list(do_query("visit").order_by(order_by)) 

3235 

3236 def testQueryDimensionRecordsExceptions(self): 

3237 """Test exceptions raised by queryDimensionRecords().""" 

3238 registry = self.makeRegistry() 

3239 self.loadData(registry, "base.yaml") 

3240 self.loadData(registry, "datasets.yaml") 

3241 self.loadData(registry, "spatial.yaml") 

3242 

3243 result = registry.queryDimensionRecords("detector") 

3244 self.assertEqual(result.count(), 4) 

3245 result = registry.queryDimensionRecords("detector", instrument="Cam1") 

3246 self.assertEqual(result.count(), 4) 

3247 result = registry.queryDimensionRecords("detector", dataId={"instrument": "Cam1"}) 

3248 self.assertEqual(result.count(), 4) 

3249 result = registry.queryDimensionRecords("detector", where="instrument='Cam1'") 

3250 self.assertEqual(result.count(), 4) 

3251 result = registry.queryDimensionRecords("detector", where="instrument=instr", bind={"instr": "Cam1"}) 

3252 self.assertEqual(result.count(), 4) 

3253 

3254 with self.assertRaisesRegex(DataIdValueError, "dimension instrument"): 

3255 result = registry.queryDimensionRecords("detector", instrument="NotCam1") 

3256 result.count() 

3257 

3258 with self.assertRaisesRegex(DataIdValueError, "dimension instrument"): 

3259 result = registry.queryDimensionRecords("detector", dataId={"instrument": "NotCam1"}) 

3260 result.count() 

3261 

3262 with self.assertRaisesRegex(DataIdValueError, "Unknown values specified for governor dimension"): 

3263 result = registry.queryDimensionRecords("detector", where="instrument='NotCam1'") 

3264 result.count() 

3265 

3266 with self.assertRaisesRegex(DataIdValueError, "Unknown values specified for governor dimension"): 

3267 result = registry.queryDimensionRecords( 

3268 "detector", where="instrument=instr", bind={"instr": "NotCam1"} 

3269 ) 

3270 result.count() 

3271 

3272 def testDatasetConstrainedDimensionRecordQueries(self): 

3273 """Test that queryDimensionRecords works even when given a dataset 

3274 constraint whose dimensions extend beyond the requested dimension 

3275 element's. 

3276 """ 

3277 registry = self.makeRegistry() 

3278 self.loadData(registry, "base.yaml") 

3279 self.loadData(registry, "datasets.yaml") 

3280 # Query for physical_filter dimension records, using a dataset that 

3281 # has both physical_filter and dataset dimensions. 

3282 records = registry.queryDimensionRecords( 

3283 "physical_filter", 

3284 datasets=["flat"], 

3285 collections="imported_r", 

3286 ) 

3287 self.assertEqual({record.name for record in records}, {"Cam1-R1", "Cam1-R2"}) 

3288 # Trying to constrain by all dataset types is an error. 

3289 with self.assertRaises(TypeError): 

3290 list(registry.queryDimensionRecords("physical_filter", datasets=..., collections="imported_r")) 

3291 

3292 def testSkyPixDatasetQueries(self): 

3293 """Test that we can build queries involving skypix dimensions as long 

3294 as a dataset type that uses those dimensions is included. 

3295 """ 

3296 registry = self.makeRegistry() 

3297 self.loadData(registry, "base.yaml") 

3298 dataset_type = DatasetType( 

3299 "a", dimensions=["htm7", "instrument"], universe=registry.dimensions, storageClass="int" 

3300 ) 

3301 registry.registerDatasetType(dataset_type) 

3302 run = "r" 

3303 registry.registerRun(run) 

3304 # First try queries where there are no datasets; the concern is whether 

3305 # we can even build and execute these queries without raising, even 

3306 # when "doomed" query shortcuts are in play. 

3307 self.assertFalse( 

3308 list(registry.queryDataIds(["htm7", "instrument"], datasets=dataset_type, collections=run)) 

3309 ) 

3310 self.assertFalse(list(registry.queryDatasets(dataset_type, collections=run))) 

3311 # Now add a dataset and see that we can get it back. 

3312 htm7 = registry.dimensions.skypix["htm"][7].pixelization 

3313 data_id = registry.expandDataId(instrument="Cam1", htm7=htm7.universe()[0][0]) 

3314 (ref,) = registry.insertDatasets(dataset_type, [data_id], run=run) 

3315 self.assertEqual( 

3316 set(registry.queryDataIds(["htm7", "instrument"], datasets=dataset_type, collections=run)), 

3317 {data_id}, 

3318 ) 

3319 self.assertEqual(set(registry.queryDatasets(dataset_type, collections=run)), {ref}) 

3320 

3321 def testDatasetIdFactory(self): 

3322 """Simple test for DatasetIdFactory, mostly to catch potential changes 

3323 in its API. 

3324 """ 

3325 registry = self.makeRegistry() 

3326 factory = DatasetIdFactory() 

3327 dataset_type = DatasetType( 

3328 "datasetType", 

3329 dimensions=["detector", "instrument"], 

3330 universe=registry.dimensions, 

3331 storageClass="int", 

3332 ) 

3333 run = "run" 

3334 data_id = DataCoordinate.standardize( 

3335 instrument="Cam1", detector=1, dimensions=dataset_type.dimensions 

3336 ) 

3337 

3338 datasetId = factory.makeDatasetId(run, dataset_type, data_id, DatasetIdGenEnum.UNIQUE) 

3339 self.assertIsInstance(datasetId, uuid.UUID) 

3340 self.assertEqual(datasetId.version, 4) 

3341 

3342 datasetId = factory.makeDatasetId(run, dataset_type, data_id, DatasetIdGenEnum.DATAID_TYPE) 

3343 self.assertIsInstance(datasetId, uuid.UUID) 

3344 self.assertEqual(datasetId.version, 5) 

3345 

3346 datasetId = factory.makeDatasetId(run, dataset_type, data_id, DatasetIdGenEnum.DATAID_TYPE_RUN) 

3347 self.assertIsInstance(datasetId, uuid.UUID) 

3348 self.assertEqual(datasetId.version, 5) 

3349 

3350 def testExposureQueries(self): 

3351 """Test query methods using arguments sourced from the exposure log 

3352 service. 

3353 

3354 The most complete test dataset currently available to daf_butler tests 

3355 is hsc-rc2-subset.yaml export (which is unfortunately distinct from the 

3356 the lsst/rc2_subset GitHub repo), but that does not have 'exposure' 

3357 dimension records as it was focused on providing nontrivial spatial 

3358 overlaps between visit+detector and tract+patch. So in this test we 

3359 need to translate queries that originally used the exposure dimension 

3360 to use the (very similar) visit dimension instead. 

3361 """ 

3362 registry = self.makeRegistry() 

3363 self.loadData(registry, "hsc-rc2-subset.yaml") 

3364 self.assertEqual( 

3365 [ 

3366 record.id 

3367 for record in registry.queryDimensionRecords("visit", instrument="HSC") 

3368 .order_by("id") 

3369 .limit(5) 

3370 ], 

3371 [318, 322, 326, 330, 332], 

3372 ) 

3373 self.assertEqual( 

3374 [ 

3375 data_id["visit"] 

3376 for data_id in registry.queryDataIds(["visit"], instrument="HSC").order_by("visit").limit(5) 

3377 ], 

3378 [318, 322, 326, 330, 332], 

3379 ) 

3380 self.assertEqual( 

3381 [ 

3382 record.id 

3383 for record in registry.queryDimensionRecords("detector", instrument="HSC") 

3384 .order_by("full_name") 

3385 .limit(5) 

3386 ], 

3387 [73, 72, 71, 70, 65], 

3388 ) 

3389 self.assertEqual( 

3390 [ 

3391 data_id["detector"] 

3392 for data_id in registry.queryDataIds(["detector"], instrument="HSC") 

3393 .order_by("full_name") 

3394 .limit(5) 

3395 ], 

3396 [73, 72, 71, 70, 65], 

3397 ) 

3398 

3399 def test_long_query_names(self) -> None: 

3400 """Test that queries involving very long names are handled correctly. 

3401 

3402 This is especially important for PostgreSQL, which truncates symbols 

3403 longer than 64 chars, but it's worth testing for all DBs. 

3404 """ 

3405 registry = self.makeRegistry() 

3406 name = "abcd" * 17 

3407 registry.registerDatasetType( 

3408 DatasetType( 

3409 name, 

3410 dimensions=(), 

3411 storageClass="Exposure", 

3412 universe=registry.dimensions, 

3413 ) 

3414 ) 

3415 # Need to search more than one collection actually containing a 

3416 # matching dataset to avoid optimizations that sidestep bugs due to 

3417 # truncation by making findFirst=True a no-op. 

3418 run1 = "run1" 

3419 registry.registerRun(run1) 

3420 run2 = "run2" 

3421 registry.registerRun(run2) 

3422 (ref1,) = registry.insertDatasets(name, [DataCoordinate.make_empty(registry.dimensions)], run1) 

3423 registry.insertDatasets(name, [DataCoordinate.make_empty(registry.dimensions)], run2) 

3424 self.assertEqual( 

3425 set(registry.queryDatasets(name, collections=[run1, run2], findFirst=True)), 

3426 {ref1}, 

3427 ) 

3428 

3429 def test_skypix_constraint_queries(self) -> None: 

3430 """Test queries spatially constrained by a skypix data ID.""" 

3431 registry = self.makeRegistry() 

3432 self.loadData(registry, "hsc-rc2-subset.yaml") 

3433 patch_regions = { 

3434 (data_id["tract"], data_id["patch"]): data_id.region 

3435 for data_id in registry.queryDataIds(["patch"]).expanded() 

3436 } 

3437 skypix_dimension: SkyPixDimension = registry.dimensions["htm11"] 

3438 # This check ensures the test doesn't become trivial due to a config 

3439 # change; if it does, just pick a different HTML level. 

3440 self.assertNotEqual(skypix_dimension, registry.dimensions.commonSkyPix) 

3441 # Gather all skypix IDs that definitely overlap at least one of these 

3442 # patches. 

3443 relevant_skypix_ids = lsst.sphgeom.RangeSet() 

3444 for patch_region in patch_regions.values(): 

3445 relevant_skypix_ids |= skypix_dimension.pixelization.interior(patch_region) 

3446 # Look for a "nontrivial" skypix_id that overlaps at least one patch 

3447 # and does not overlap at least one other patch. 

3448 for skypix_id in itertools.chain.from_iterable( 

3449 range(begin, end) for begin, end in relevant_skypix_ids 

3450 ): 

3451 skypix_region = skypix_dimension.pixelization.pixel(skypix_id) 

3452 overlapping_patches = { 

3453 patch_key 

3454 for patch_key, patch_region in patch_regions.items() 

3455 if not patch_region.isDisjointFrom(skypix_region) 

3456 } 

3457 if overlapping_patches and overlapping_patches != patch_regions.keys(): 

3458 break 

3459 else: 

3460 raise RuntimeError("Could not find usable skypix ID for this dimension configuration.") 

3461 self.assertEqual( 

3462 { 

3463 (data_id["tract"], data_id["patch"]) 

3464 for data_id in registry.queryDataIds( 

3465 ["patch"], 

3466 dataId={skypix_dimension.name: skypix_id}, 

3467 ) 

3468 }, 

3469 overlapping_patches, 

3470 ) 

3471 # Test that a three-way join that includes the common skypix system in 

3472 # the dimensions doesn't generate redundant join terms in the query. 

3473 full_data_ids = set( 

3474 registry.queryDataIds( 

3475 ["tract", "visit", "htm7"], skymap="hsc_rings_v1", instrument="HSC" 

3476 ).expanded() 

3477 ) 

3478 self.assertGreater(len(full_data_ids), 0) 

3479 for data_id in full_data_ids: 

3480 self.assertFalse(data_id.records["tract"].region.isDisjointFrom(data_id.records["htm7"].region)) 

3481 self.assertFalse(data_id.records["visit"].region.isDisjointFrom(data_id.records["htm7"].region)) 

3482 

3483 def test_spatial_constraint_queries(self) -> None: 

3484 """Test queries in which one spatial dimension in the constraint (data 

3485 ID or ``where`` string) constrains a different spatial dimension in the 

3486 query result columns. 

3487 """ 

3488 registry = self.makeRegistry() 

3489 self.loadData(registry, "hsc-rc2-subset.yaml") 

3490 patch_regions = { 

3491 (data_id["tract"], data_id["patch"]): data_id.region 

3492 for data_id in registry.queryDataIds(["patch"]).expanded() 

3493 } 

3494 observation_regions = { 

3495 (data_id["visit"], data_id["detector"]): data_id.region 

3496 for data_id in registry.queryDataIds(["visit", "detector"]).expanded() 

3497 } 

3498 all_combos = { 

3499 (patch_key, observation_key) 

3500 for patch_key, observation_key in itertools.product(patch_regions, observation_regions) 

3501 } 

3502 overlapping_combos = { 

3503 (patch_key, observation_key) 

3504 for patch_key, observation_key in all_combos 

3505 if not patch_regions[patch_key].isDisjointFrom(observation_regions[observation_key]) 

3506 } 

3507 # Check a direct spatial join with no constraint first. 

3508 self.assertEqual( 

3509 { 

3510 ((data_id["tract"], data_id["patch"]), (data_id["visit"], data_id["detector"])) 

3511 for data_id in registry.queryDataIds(["patch", "visit", "detector"]) 

3512 }, 

3513 overlapping_combos, 

3514 ) 

3515 overlaps_by_patch: defaultdict[tuple[int, int], set[tuple[str, str]]] = defaultdict(set) 

3516 overlaps_by_observation: defaultdict[tuple[int, int], set[tuple[str, str]]] = defaultdict(set) 

3517 for patch_key, observation_key in overlapping_combos: 

3518 overlaps_by_patch[patch_key].add(observation_key) 

3519 overlaps_by_observation[observation_key].add(patch_key) 

3520 # Find patches and observations that overlap at least one of the other 

3521 # but not all of the other. 

3522 nontrivial_patch = next( 

3523 iter( 

3524 patch_key 

3525 for patch_key, observation_keys in overlaps_by_patch.items() 

3526 if observation_keys and observation_keys != observation_regions.keys() 

3527 ) 

3528 ) 

3529 nontrivial_observation = next( 

3530 iter( 

3531 observation_key 

3532 for observation_key, patch_keys in overlaps_by_observation.items() 

3533 if patch_keys and patch_keys != patch_regions.keys() 

3534 ) 

3535 ) 

3536 # Use the nontrivial patches and observations as constraints on the 

3537 # other dimensions in various ways, first via a 'where' expression. 

3538 # It's better in general to us 'bind' instead of f-strings, but these 

3539 # all integers so there are no quoting concerns. 

3540 self.assertEqual( 

3541 { 

3542 (data_id["visit"], data_id["detector"]) 

3543 for data_id in registry.queryDataIds( 

3544 ["visit", "detector"], 

3545 where=f"tract={nontrivial_patch[0]} AND patch={nontrivial_patch[1]}", 

3546 skymap="hsc_rings_v1", 

3547 ) 

3548 }, 

3549 overlaps_by_patch[nontrivial_patch], 

3550 ) 

3551 self.assertEqual( 

3552 { 

3553 (data_id["tract"], data_id["patch"]) 

3554 for data_id in registry.queryDataIds( 

3555 ["patch"], 

3556 where=f"visit={nontrivial_observation[0]} AND detector={nontrivial_observation[1]}", 

3557 instrument="HSC", 

3558 ) 

3559 }, 

3560 overlaps_by_observation[nontrivial_observation], 

3561 ) 

3562 # and then via the dataId argument. 

3563 self.assertEqual( 

3564 { 

3565 (data_id["visit"], data_id["detector"]) 

3566 for data_id in registry.queryDataIds( 

3567 ["visit", "detector"], 

3568 dataId={ 

3569 "tract": nontrivial_patch[0], 

3570 "patch": nontrivial_patch[1], 

3571 }, 

3572 skymap="hsc_rings_v1", 

3573 ) 

3574 }, 

3575 overlaps_by_patch[nontrivial_patch], 

3576 ) 

3577 self.assertEqual( 

3578 { 

3579 (data_id["tract"], data_id["patch"]) 

3580 for data_id in registry.queryDataIds( 

3581 ["patch"], 

3582 dataId={ 

3583 "visit": nontrivial_observation[0], 

3584 "detector": nontrivial_observation[1], 

3585 }, 

3586 instrument="HSC", 

3587 ) 

3588 }, 

3589 overlaps_by_observation[nontrivial_observation], 

3590 ) 

3591 

3592 def test_query_projection_drop_postprocessing(self) -> None: 

3593 """Test that projections and deduplications on query objects can 

3594 drop post-query region filtering to ensure the query remains in 

3595 the SQL engine. 

3596 """ 

3597 registry = self.makeRegistry() 

3598 self.loadData(registry, "base.yaml") 

3599 self.loadData(registry, "spatial.yaml") 

3600 

3601 def pop_transfer(tree: Relation) -> Relation: 

3602 """If a relation tree terminates with a transfer to a new engine, 

3603 return the relation prior to that transfer. If not, return the 

3604 original relation. 

3605 

3606 Parameters 

3607 ---------- 

3608 tree : `Relation` 

3609 The relation tree to modify. 

3610 """ 

3611 match tree: 

3612 case Transfer(target=target): 

3613 return target 

3614 case _: 

3615 return tree 

3616 

3617 # There's no public way to get a Query object yet, so we get one from a 

3618 # DataCoordinateQueryResults private attribute. When a public API is 

3619 # available this test should use it. 

3620 query = registry.queryDataIds(["visit", "detector", "tract", "patch"])._query 

3621 # We expect this query to terminate in the iteration engine originally, 

3622 # because region-filtering is necessary. 

3623 self.assertIsInstance(pop_transfer(query.relation).engine, iteration.Engine) 

3624 # If we deduplicate, we usually have to do that downstream of the 

3625 # filtering. That means the deduplication has to happen in the 

3626 # iteration engine. 

3627 self.assertIsInstance(pop_transfer(query.projected(unique=True).relation).engine, iteration.Engine) 

3628 # If we pass drop_postprocessing, we instead drop the region filtering 

3629 # so the deduplication can happen in SQL (though there might still be 

3630 # transfer to iteration at the tail of the tree that we can ignore; 

3631 # that's what the pop_transfer takes care of here). 

3632 self.assertIsInstance( 

3633 pop_transfer(query.projected(unique=True, drop_postprocessing=True).relation).engine, 

3634 sql.Engine, 

3635 ) 

3636 

3637 def test_query_find_datasets_drop_postprocessing(self) -> None: 

3638 """Test that DataCoordinateQueryResults.findDatasets avoids commutator 

3639 problems with the FindFirstDataset relation operation. 

3640 """ 

3641 # Setup: load some visit, tract, and patch records, and insert two 

3642 # datasets with dimensions {visit, patch}, with one in each of two 

3643 # RUN collections. 

3644 registry = self.makeRegistry() 

3645 self.loadData(registry, "base.yaml") 

3646 self.loadData(registry, "spatial.yaml") 

3647 storage_class = StorageClass("Warpy") 

3648 registry.storageClasses.registerStorageClass(storage_class) 

3649 dataset_type = DatasetType( 

3650 "warp", {"visit", "patch"}, storageClass=storage_class, universe=registry.dimensions 

3651 ) 

3652 registry.registerDatasetType(dataset_type) 

3653 (data_id,) = registry.queryDataIds(["visit", "patch"]).limit(1) 

3654 registry.registerRun("run1") 

3655 registry.registerRun("run2") 

3656 (ref1,) = registry.insertDatasets(dataset_type, [data_id], run="run1") 

3657 (ref2,) = registry.insertDatasets(dataset_type, [data_id], run="run2") 

3658 # Query for the dataset using queryDataIds(...).findDatasets(...) 

3659 # against only one of the two collections. This should work even 

3660 # though the relation returned by queryDataIds ends with 

3661 # iteration-engine region-filtering, because we can recognize before 

3662 # running the query that there is only one collecton to search and 

3663 # hence the (default) findFirst=True is irrelevant, and joining in the 

3664 # dataset query commutes past the iteration-engine postprocessing. 

3665 query1 = registry.queryDataIds( 

3666 {"visit", "patch"}, visit=data_id["visit"], instrument=data_id["instrument"] 

3667 ) 

3668 self.assertEqual( 

3669 set(query1.findDatasets(dataset_type.name, collections=["run1"])), 

3670 {ref1}, 

3671 ) 

3672 # Query for the dataset using queryDataIds(...).findDatasets(...) 

3673 # against both collections. This can only work if the FindFirstDataset 

3674 # operation can be commuted past the iteration-engine options into SQL. 

3675 query2 = registry.queryDataIds( 

3676 {"visit", "patch"}, visit=data_id["visit"], instrument=data_id["instrument"] 

3677 ) 

3678 self.assertEqual( 

3679 set(query2.findDatasets(dataset_type.name, collections=["run2", "run1"])), 

3680 {ref2}, 

3681 ) 

3682 

3683 def test_query_empty_collections(self) -> None: 

3684 """Test for registry query methods with empty collections. The methods 

3685 should return empty result set (or None when applicable) and provide 

3686 "doomed" diagnostics. 

3687 """ 

3688 registry = self.makeRegistry() 

3689 self.loadData(registry, "base.yaml") 

3690 self.loadData(registry, "datasets.yaml") 

3691 

3692 # Tests for registry.findDataset() 

3693 with self.assertRaises(NoDefaultCollectionError): 

3694 registry.findDataset("bias", instrument="Cam1", detector=1) 

3695 self.assertIsNotNone(registry.findDataset("bias", instrument="Cam1", detector=1, collections=...)) 

3696 self.assertIsNone(registry.findDataset("bias", instrument="Cam1", detector=1, collections=[])) 

3697 

3698 # Tests for registry.queryDatasets() 

3699 with self.assertRaises(NoDefaultCollectionError): 

3700 registry.queryDatasets("bias") 

3701 self.assertTrue(list(registry.queryDatasets("bias", collections=...))) 

3702 

3703 result = registry.queryDatasets("bias", collections=[]) 

3704 self.assertEqual(len(list(result)), 0) 

3705 messages = list(result.explain_no_results()) 

3706 self.assertTrue(messages) 

3707 self.assertTrue(any("because collection list is empty" in message for message in messages)) 

3708 

3709 # Tests for registry.queryDataIds() 

3710 with self.assertRaises(NoDefaultCollectionError): 

3711 registry.queryDataIds("detector", datasets="bias") 

3712 self.assertTrue(list(registry.queryDataIds("detector", datasets="bias", collections=...))) 

3713 

3714 result = registry.queryDataIds("detector", datasets="bias", collections=[]) 

3715 self.assertEqual(len(list(result)), 0) 

3716 messages = list(result.explain_no_results()) 

3717 self.assertTrue(messages) 

3718 self.assertTrue(any("because collection list is empty" in message for message in messages)) 

3719 

3720 # Tests for registry.queryDimensionRecords() 

3721 with self.assertRaises(NoDefaultCollectionError): 

3722 registry.queryDimensionRecords("detector", datasets="bias") 

3723 self.assertTrue(list(registry.queryDimensionRecords("detector", datasets="bias", collections=...))) 

3724 

3725 result = registry.queryDimensionRecords("detector", datasets="bias", collections=[]) 

3726 self.assertEqual(len(list(result)), 0) 

3727 messages = list(result.explain_no_results()) 

3728 self.assertTrue(messages) 

3729 self.assertTrue(any("because collection list is empty" in message for message in messages)) 

3730 

3731 def test_dataset_followup_spatial_joins(self) -> None: 

3732 """Test queryDataIds(...).findRelatedDatasets(...) where a spatial join 

3733 is involved. 

3734 """ 

3735 registry = self.makeRegistry() 

3736 self.loadData(registry, "base.yaml") 

3737 self.loadData(registry, "spatial.yaml") 

3738 pvi_dataset_type = DatasetType( 

3739 "pvi", {"visit", "detector"}, storageClass="StructuredDataDict", universe=registry.dimensions 

3740 ) 

3741 registry.registerDatasetType(pvi_dataset_type) 

3742 collection = "datasets" 

3743 registry.registerRun(collection) 

3744 (pvi1,) = registry.insertDatasets( 

3745 pvi_dataset_type, [{"instrument": "Cam1", "visit": 1, "detector": 1}], run=collection 

3746 ) 

3747 (pvi2,) = registry.insertDatasets( 

3748 pvi_dataset_type, [{"instrument": "Cam1", "visit": 1, "detector": 2}], run=collection 

3749 ) 

3750 (pvi3,) = registry.insertDatasets( 

3751 pvi_dataset_type, [{"instrument": "Cam1", "visit": 1, "detector": 3}], run=collection 

3752 ) 

3753 self.assertEqual( 

3754 set( 

3755 registry.queryDataIds(["patch"], skymap="SkyMap1", tract=0) 

3756 .expanded() 

3757 .findRelatedDatasets("pvi", [collection]) 

3758 ), 

3759 { 

3760 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=0), pvi1), 

3761 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=0), pvi2), 

3762 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=1), pvi2), 

3763 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=2), pvi1), 

3764 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=2), pvi2), 

3765 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=2), pvi3), 

3766 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=3), pvi2), 

3767 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=4), pvi3), 

3768 }, 

3769 )