Coverage for python/lsst/daf/butler/registry/tests/_registry.py: 6%

1562 statements  

« prev     ^ index     » next       coverage.py v7.5.1, created at 2024-05-11 03:16 -0700

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27from __future__ import annotations 

28 

29from ... import ddl 

30 

31__all__ = ["RegistryTests"] 

32 

33import datetime 

34import itertools 

35import os 

36import re 

37import time 

38import unittest 

39import uuid 

40from abc import ABC, abstractmethod 

41from collections import defaultdict, namedtuple 

42from collections.abc import Callable, Iterator 

43from concurrent.futures import ThreadPoolExecutor 

44from datetime import timedelta 

45from threading import Barrier 

46 

47import astropy.time 

48import sqlalchemy 

49 

50try: 

51 import numpy as np 

52except ImportError: 

53 np = None 

54 

55import lsst.sphgeom 

56from lsst.daf.relation import Relation, RelationalAlgebraError, Transfer, iteration, sql 

57 

58from ..._dataset_association import DatasetAssociation 

59from ..._dataset_ref import DatasetIdFactory, DatasetIdGenEnum, DatasetRef 

60from ..._dataset_type import DatasetType 

61from ..._exceptions import ( 

62 CollectionTypeError, 

63 DataIdValueError, 

64 InconsistentDataIdError, 

65 InvalidQueryError, 

66 MissingCollectionError, 

67 MissingDatasetTypeError, 

68) 

69from ..._exceptions_legacy import DatasetTypeError 

70from ..._storage_class import StorageClass 

71from ..._timespan import Timespan 

72from ...dimensions import DataCoordinate, DataCoordinateSet, SkyPixDimension 

73from .._collection_summary import CollectionSummary 

74from .._collection_type import CollectionType 

75from .._config import RegistryConfig 

76from .._exceptions import ( 

77 ArgumentError, 

78 CollectionError, 

79 ConflictingDefinitionError, 

80 DatasetTypeExpressionError, 

81 NoDefaultCollectionError, 

82 OrphanedRecordError, 

83) 

84from .._registry import Registry 

85from ..interfaces import ButlerAttributeExistsError 

86from ..sql_registry import SqlRegistry 

87 

88 

89class RegistryTests(ABC): 

90 """Generic tests for the `SqlRegistry` class that can be subclassed to 

91 generate tests for different configurations. 

92 """ 

93 

94 collectionsManager: str | None = None 

95 """Name of the collections manager class, if subclass provides value for 

96 this member then it overrides name specified in default configuration 

97 (`str`). 

98 """ 

99 

100 datasetsManager: str | dict[str, str] | None = None 

101 """Name or configuration dictionary of the datasets manager class, if 

102 subclass provides value for this member then it overrides name specified 

103 in default configuration (`str` or `dict`). 

104 """ 

105 

106 supportsCollectionRegex: bool = True 

107 """True if the registry class being tested supports regex searches for 

108 collections.""" 

109 

110 supportsDetailedQueryExplain: bool = True 

111 """True if the registry class being tested can generate detailed 

112 explanations for queries that return no rows by running additional queries 

113 to diagnose the problem. 

114 """ 

115 

116 supportsQueryOffset: bool = True 

117 """True if the registry class being tested supports the 'offset' parameter 

118 to query methods. 

119 """ 

120 

121 supportsQueryGovernorValidation: bool = True 

122 """True if the registry class being tested validates that values provided 

123 by the user for governor dimensions are correct before running queries. 

124 """ 

125 

126 @classmethod 

127 @abstractmethod 

128 def getDataDir(cls) -> str: 

129 """Return the root directory containing test data YAML files.""" 

130 raise NotImplementedError() 

131 

132 def makeRegistryConfig(self) -> RegistryConfig: 

133 """Create RegistryConfig used to create a registry. 

134 

135 This method should be called by a subclass from `makeRegistry`. 

136 Returned instance will be pre-configured based on the values of class 

137 members, and default-configured for all other parameters. Subclasses 

138 that need default configuration should just instantiate 

139 `RegistryConfig` directly. 

140 """ 

141 config = RegistryConfig() 

142 if self.collectionsManager: 

143 config["managers", "collections"] = self.collectionsManager 

144 if self.datasetsManager: 

145 config["managers", "datasets"] = self.datasetsManager 

146 return config 

147 

148 @abstractmethod 

149 def makeRegistry(self, share_repo_with: Registry | None = None) -> Registry | None: 

150 """Return the Registry instance to be tested. 

151 

152 Parameters 

153 ---------- 

154 share_repo_with : `Registry`, optional 

155 If provided, the new registry should point to the same data 

156 repository as this existing registry. 

157 

158 Returns 

159 ------- 

160 registry : `Registry` 

161 New `Registry` instance, or `None` *only* if `share_repo_with` 

162 is not `None` and this test case does not support that argument 

163 (e.g. it is impossible with in-memory SQLite DBs). 

164 """ 

165 raise NotImplementedError() 

166 

167 def loadData(self, registry: SqlRegistry, filename: str) -> None: 

168 """Load registry test data from ``getDataDir/<filename>``, 

169 which should be a YAML import/export file. 

170 

171 Parameters 

172 ---------- 

173 registry : `SqlRegistry` 

174 The registry to load into. 

175 filename : `str` 

176 The name of the file to load. 

177 """ 

178 from ...transfers import YamlRepoImportBackend 

179 

180 with open(os.path.join(self.getDataDir(), filename)) as stream: 

181 backend = YamlRepoImportBackend(stream, registry) 

182 backend.register() 

183 backend.load(datastore=None) 

184 

185 def checkQueryResults(self, results, expected): 

186 """Check that a query results object contains expected values. 

187 

188 Parameters 

189 ---------- 

190 results : `DataCoordinateQueryResults` or `DatasetQueryResults` 

191 A lazy-evaluation query results object. 

192 expected : `list` 

193 A list of `DataCoordinate` o `DatasetRef` objects that should be 

194 equal to results of the query, aside from ordering. 

195 """ 

196 self.assertCountEqual(list(results), expected) 

197 self.assertEqual(results.count(), len(expected)) 

198 if expected: 

199 self.assertTrue(results.any()) 

200 else: 

201 self.assertFalse(results.any()) 

202 

203 def testOpaque(self): 

204 """Tests for `SqlRegistry.registerOpaqueTable`, 

205 `SqlRegistry.insertOpaqueData`, `SqlRegistry.fetchOpaqueData`, and 

206 `SqlRegistry.deleteOpaqueData`. 

207 """ 

208 registry = self.makeRegistry() 

209 table = "opaque_table_for_testing" 

210 registry.registerOpaqueTable( 

211 table, 

212 spec=ddl.TableSpec( 

213 fields=[ 

214 ddl.FieldSpec("id", dtype=sqlalchemy.BigInteger, primaryKey=True), 

215 ddl.FieldSpec("name", dtype=sqlalchemy.String, length=16, nullable=False), 

216 ddl.FieldSpec("count", dtype=sqlalchemy.SmallInteger, nullable=True), 

217 ], 

218 ), 

219 ) 

220 rows = [ 

221 {"id": 1, "name": "one", "count": None}, 

222 {"id": 2, "name": "two", "count": 5}, 

223 {"id": 3, "name": "three", "count": 6}, 

224 ] 

225 registry.insertOpaqueData(table, *rows) 

226 self.assertCountEqual(rows, list(registry.fetchOpaqueData(table))) 

227 self.assertEqual(rows[0:1], list(registry.fetchOpaqueData(table, id=1))) 

228 self.assertEqual(rows[1:2], list(registry.fetchOpaqueData(table, name="two"))) 

229 self.assertEqual(rows[0:1], list(registry.fetchOpaqueData(table, id=(1, 3), name=("one", "two")))) 

230 self.assertEqual(rows, list(registry.fetchOpaqueData(table, id=(1, 2, 3)))) 

231 # Test very long IN clause which exceeds sqlite limit on number of 

232 # parameters. SQLite says the limit is 32k but it looks like it is 

233 # much higher. 

234 self.assertEqual(rows, list(registry.fetchOpaqueData(table, id=list(range(300_000))))) 

235 # Two IN clauses, each longer than 1k batch size, first with 

236 # duplicates, second has matching elements in different batches (after 

237 # sorting). 

238 self.assertEqual( 

239 rows[0:2], 

240 list( 

241 registry.fetchOpaqueData( 

242 table, 

243 id=list(range(1000)) + list(range(100, 0, -1)), 

244 name=["one"] + [f"q{i}" for i in range(2200)] + ["two"], 

245 ) 

246 ), 

247 ) 

248 self.assertEqual([], list(registry.fetchOpaqueData(table, id=1, name="two"))) 

249 registry.deleteOpaqueData(table, id=3) 

250 self.assertCountEqual(rows[:2], list(registry.fetchOpaqueData(table))) 

251 registry.deleteOpaqueData(table) 

252 self.assertEqual([], list(registry.fetchOpaqueData(table))) 

253 

254 def testDatasetType(self): 

255 """Tests for `SqlRegistry.registerDatasetType` and 

256 `SqlRegistry.getDatasetType`. 

257 """ 

258 registry = self.makeRegistry() 

259 # Check valid insert 

260 datasetTypeName = "test" 

261 storageClass = StorageClass("testDatasetType") 

262 registry.storageClasses.registerStorageClass(storageClass) 

263 dimensions = registry.dimensions.conform(("instrument", "visit")) 

264 differentDimensions = registry.dimensions.conform(("instrument", "patch")) 

265 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

266 # Inserting for the first time should return True 

267 self.assertTrue(registry.registerDatasetType(inDatasetType)) 

268 outDatasetType1 = registry.getDatasetType(datasetTypeName) 

269 self.assertEqual(outDatasetType1, inDatasetType) 

270 

271 # Re-inserting should work 

272 self.assertFalse(registry.registerDatasetType(inDatasetType)) 

273 # Except when they are not identical 

274 with self.assertRaises(ConflictingDefinitionError): 

275 nonIdenticalDatasetType = DatasetType(datasetTypeName, differentDimensions, storageClass) 

276 registry.registerDatasetType(nonIdenticalDatasetType) 

277 

278 # Template can be None 

279 datasetTypeName = "testNoneTemplate" 

280 storageClass = StorageClass("testDatasetType2") 

281 registry.storageClasses.registerStorageClass(storageClass) 

282 dimensions = registry.dimensions.conform(("instrument", "visit")) 

283 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

284 registry.registerDatasetType(inDatasetType) 

285 outDatasetType2 = registry.getDatasetType(datasetTypeName) 

286 self.assertEqual(outDatasetType2, inDatasetType) 

287 

288 allTypes = set(registry.queryDatasetTypes()) 

289 self.assertEqual(allTypes, {outDatasetType1, outDatasetType2}) 

290 

291 # Test some basic queryDatasetTypes functionality 

292 missing: list[str] = [] 

293 types = registry.queryDatasetTypes(["te*", "notarealdatasettype"], missing=missing) 

294 self.assertCountEqual([dt.name for dt in types], ["test", "testNoneTemplate"]) 

295 self.assertEqual(missing, ["notarealdatasettype"]) 

296 

297 def testDimensions(self): 

298 """Tests for `SqlRegistry.insertDimensionData`, 

299 `SqlRegistry.syncDimensionData`, and `SqlRegistry.expandDataId`. 

300 """ 

301 registry = self.makeRegistry() 

302 dimensionName = "instrument" 

303 dimension = registry.dimensions[dimensionName] 

304 dimensionValue = { 

305 "name": "DummyCam", 

306 "visit_max": 10, 

307 "visit_system": 0, 

308 "exposure_max": 10, 

309 "detector_max": 2, 

310 "class_name": "lsst.pipe.base.Instrument", 

311 } 

312 registry.insertDimensionData(dimensionName, dimensionValue) 

313 # Inserting the same value twice should fail 

314 with self.assertRaises(sqlalchemy.exc.IntegrityError): 

315 registry.insertDimensionData(dimensionName, dimensionValue) 

316 # expandDataId should retrieve the record we just inserted 

317 self.assertEqual( 

318 registry.expandDataId(instrument="DummyCam", dimensions=dimension.minimal_group) 

319 .records[dimensionName] 

320 .toDict(), 

321 dimensionValue, 

322 ) 

323 # expandDataId should raise if there is no record with the given ID. 

324 with self.assertRaises(DataIdValueError): 

325 registry.expandDataId({"instrument": "Unknown"}, dimensions=dimension.minimal_group) 

326 # band doesn't have a table; insert should fail. 

327 with self.assertRaises(TypeError): 

328 registry.insertDimensionData("band", {"band": "i"}) 

329 dimensionName2 = "physical_filter" 

330 dimension2 = registry.dimensions[dimensionName2] 

331 dimensionValue2 = {"name": "DummyCam_i", "band": "i"} 

332 # Missing required dependency ("instrument") should fail 

333 with self.assertRaises(KeyError): 

334 registry.insertDimensionData(dimensionName2, dimensionValue2) 

335 # Adding required dependency should fix the failure 

336 dimensionValue2["instrument"] = "DummyCam" 

337 registry.insertDimensionData(dimensionName2, dimensionValue2) 

338 # expandDataId should retrieve the record we just inserted. 

339 self.assertEqual( 

340 registry.expandDataId( 

341 instrument="DummyCam", physical_filter="DummyCam_i", dimensions=dimension2.minimal_group 

342 ) 

343 .records[dimensionName2] 

344 .toDict(), 

345 dimensionValue2, 

346 ) 

347 # Use syncDimensionData to insert a new record successfully. 

348 dimensionName3 = "detector" 

349 dimensionValue3 = { 

350 "instrument": "DummyCam", 

351 "id": 1, 

352 "full_name": "one", 

353 "name_in_raft": "zero", 

354 "purpose": "SCIENCE", 

355 } 

356 self.assertTrue(registry.syncDimensionData(dimensionName3, dimensionValue3)) 

357 # Sync that again. Note that one field ("raft") is NULL, and that 

358 # should be okay. 

359 self.assertFalse(registry.syncDimensionData(dimensionName3, dimensionValue3)) 

360 # Now try that sync with the same primary key but a different value. 

361 # This should fail. 

362 with self.assertRaises(ConflictingDefinitionError): 

363 registry.syncDimensionData( 

364 dimensionName3, 

365 { 

366 "instrument": "DummyCam", 

367 "id": 1, 

368 "full_name": "one", 

369 "name_in_raft": "four", 

370 "purpose": "SCIENCE", 

371 }, 

372 ) 

373 

374 @unittest.skipIf(np is None, "numpy not available.") 

375 def testNumpyDataId(self): 

376 """Test that we can use a numpy int in a dataId.""" 

377 registry = self.makeRegistry() 

378 dimensionEntries = [ 

379 ("instrument", {"instrument": "DummyCam"}), 

380 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}), 

381 ("day_obs", {"instrument": "DummyCam", "id": 20250101}), 

382 # Using an np.int64 here fails unless Records.fromDict is also 

383 # patched to look for numbers.Integral 

384 ( 

385 "visit", 

386 { 

387 "instrument": "DummyCam", 

388 "id": 42, 

389 "name": "fortytwo", 

390 "physical_filter": "d-r", 

391 "day_obs": 20250101, 

392 }, 

393 ), 

394 ] 

395 for args in dimensionEntries: 

396 registry.insertDimensionData(*args) 

397 

398 # Try a normal integer and something that looks like an int but 

399 # is not. 

400 for visit_id in (42, np.int64(42)): 

401 with self.subTest(visit_id=visit_id, id_type=type(visit_id).__name__): 

402 expanded = registry.expandDataId({"instrument": "DummyCam", "visit": visit_id}) 

403 self.assertEqual(expanded["visit"], int(visit_id)) 

404 self.assertIsInstance(expanded["visit"], int) 

405 

406 def testDataIdRelationships(self): 

407 """Test that `SqlRegistry.expandDataId` raises an exception when the 

408 given keys are inconsistent. 

409 """ 

410 registry = self.makeRegistry() 

411 self.loadData(registry, "base.yaml") 

412 # Insert a few more dimension records for the next test. 

413 registry.insertDimensionData( 

414 "day_obs", 

415 {"instrument": "Cam1", "id": 20250101}, 

416 ) 

417 registry.insertDimensionData( 

418 "group", 

419 {"instrument": "Cam1", "name": "group1"}, 

420 ) 

421 registry.insertDimensionData( 

422 "exposure", 

423 { 

424 "instrument": "Cam1", 

425 "id": 1, 

426 "obs_id": "one", 

427 "physical_filter": "Cam1-G", 

428 "group": "group1", 

429 "day_obs": 20250101, 

430 }, 

431 ) 

432 registry.insertDimensionData( 

433 "group", 

434 {"instrument": "Cam1", "name": "group2"}, 

435 ) 

436 registry.insertDimensionData( 

437 "exposure", 

438 { 

439 "instrument": "Cam1", 

440 "id": 2, 

441 "obs_id": "two", 

442 "physical_filter": "Cam1-G", 

443 "group": "group2", 

444 "day_obs": 20250101, 

445 }, 

446 ) 

447 registry.insertDimensionData( 

448 "visit_system", 

449 {"instrument": "Cam1", "id": 0, "name": "one-to-one"}, 

450 ) 

451 registry.insertDimensionData( 

452 "visit", 

453 {"instrument": "Cam1", "id": 1, "name": "one", "physical_filter": "Cam1-G", "day_obs": 20250101}, 

454 ) 

455 registry.insertDimensionData( 

456 "visit_definition", 

457 {"instrument": "Cam1", "visit": 1, "exposure": 1}, 

458 ) 

459 with self.assertRaises(InconsistentDataIdError): 

460 registry.expandDataId( 

461 {"instrument": "Cam1", "visit": 1, "exposure": 2}, 

462 ) 

463 

464 def testDataset(self): 

465 """Basic tests for `SqlRegistry.insertDatasets`, 

466 `SqlRegistry.getDataset`, and `SqlRegistry.removeDatasets`. 

467 """ 

468 registry = self.makeRegistry() 

469 self.loadData(registry, "base.yaml") 

470 run = "tésτ" 

471 registry.registerRun(run) 

472 datasetType = registry.getDatasetType("bias") 

473 dataId = {"instrument": "Cam1", "detector": 2} 

474 (ref,) = registry.insertDatasets(datasetType, dataIds=[dataId], run=run) 

475 outRef = registry.getDataset(ref.id) 

476 self.assertIsNotNone(ref.id) 

477 self.assertEqual(ref, outRef) 

478 with self.assertRaises(ConflictingDefinitionError): 

479 registry.insertDatasets(datasetType, dataIds=[dataId], run=run) 

480 registry.removeDatasets([ref]) 

481 self.assertIsNone(registry.findDataset(datasetType, dataId, collections=[run])) 

482 

483 def testFindDataset(self): 

484 """Tests for `SqlRegistry.findDataset`.""" 

485 registry = self.makeRegistry() 

486 self.loadData(registry, "base.yaml") 

487 run = "tésτ" 

488 datasetType = registry.getDatasetType("bias") 

489 dataId = {"instrument": "Cam1", "detector": 4} 

490 registry.registerRun(run) 

491 (inputRef,) = registry.insertDatasets(datasetType, dataIds=[dataId], run=run) 

492 outputRef = registry.findDataset(datasetType, dataId, collections=[run]) 

493 self.assertEqual(outputRef, inputRef) 

494 # Check that retrieval with invalid dataId raises 

495 with self.assertRaises(LookupError): 

496 dataId = {"instrument": "Cam1"} # no detector 

497 registry.findDataset(datasetType, dataId, collections=run) 

498 # Check that different dataIds match to different datasets 

499 dataId1 = {"instrument": "Cam1", "detector": 1} 

500 (inputRef1,) = registry.insertDatasets(datasetType, dataIds=[dataId1], run=run) 

501 dataId2 = {"instrument": "Cam1", "detector": 2} 

502 (inputRef2,) = registry.insertDatasets(datasetType, dataIds=[dataId2], run=run) 

503 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef1) 

504 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef2) 

505 self.assertNotEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef2) 

506 self.assertNotEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef1) 

507 # Check that requesting a non-existing dataId returns None 

508 nonExistingDataId = {"instrument": "Cam1", "detector": 3} 

509 self.assertIsNone(registry.findDataset(datasetType, nonExistingDataId, collections=run)) 

510 # Search more than one collection, in which two have the right 

511 # dataset type and another does not. 

512 registry.registerRun("empty") 

513 self.loadData(registry, "datasets.yaml") 

514 bias1 = registry.findDataset("bias", instrument="Cam1", detector=2, collections=["imported_g"]) 

515 self.assertIsNotNone(bias1) 

516 bias2 = registry.findDataset("bias", instrument="Cam1", detector=2, collections=["imported_r"]) 

517 self.assertIsNotNone(bias2) 

518 self.assertEqual( 

519 bias1, 

520 registry.findDataset( 

521 "bias", instrument="Cam1", detector=2, collections=["empty", "imported_g", "imported_r"] 

522 ), 

523 ) 

524 self.assertEqual( 

525 bias2, 

526 registry.findDataset( 

527 "bias", instrument="Cam1", detector=2, collections=["empty", "imported_r", "imported_g"] 

528 ), 

529 ) 

530 # Search more than one collection, with one of them a CALIBRATION 

531 # collection. 

532 registry.registerCollection("Cam1/calib", CollectionType.CALIBRATION) 

533 timespan = Timespan( 

534 begin=astropy.time.Time("2020-01-01T01:00:00", format="isot", scale="tai"), 

535 end=astropy.time.Time("2020-01-01T02:00:00", format="isot", scale="tai"), 

536 ) 

537 registry.certify("Cam1/calib", [bias2], timespan=timespan) 

538 self.assertEqual( 

539 bias1, 

540 registry.findDataset( 

541 "bias", 

542 instrument="Cam1", 

543 detector=2, 

544 collections=["empty", "imported_g", "Cam1/calib"], 

545 timespan=timespan, 

546 ), 

547 ) 

548 self.assertEqual( 

549 bias2, 

550 registry.findDataset( 

551 "bias", 

552 instrument="Cam1", 

553 detector=2, 

554 collections=["empty", "Cam1/calib", "imported_g"], 

555 timespan=timespan, 

556 ), 

557 ) 

558 # If we try to search those same collections without a timespan, it 

559 # should still work, since the CALIBRATION collection is ignored. 

560 self.assertEqual( 

561 bias1, 

562 registry.findDataset( 

563 "bias", instrument="Cam1", detector=2, collections=["empty", "imported_g", "Cam1/calib"] 

564 ), 

565 ) 

566 self.assertEqual( 

567 bias1, 

568 registry.findDataset( 

569 "bias", instrument="Cam1", detector=2, collections=["empty", "Cam1/calib", "imported_g"] 

570 ), 

571 ) 

572 

573 def testRemoveDatasetTypeSuccess(self): 

574 """Test that SqlRegistry.removeDatasetType works when there are no 

575 datasets of that type present. 

576 """ 

577 registry = self.makeRegistry() 

578 self.loadData(registry, "base.yaml") 

579 registry.removeDatasetType("flat") 

580 with self.assertRaises(MissingDatasetTypeError): 

581 registry.getDatasetType("flat") 

582 

583 def testRemoveDatasetTypeFailure(self): 

584 """Test that SqlRegistry.removeDatasetType raises when there are 

585 datasets of that type present or if the dataset type is for a 

586 component. 

587 """ 

588 registry = self.makeRegistry() 

589 self.loadData(registry, "base.yaml") 

590 self.loadData(registry, "datasets.yaml") 

591 with self.assertRaises(OrphanedRecordError): 

592 registry.removeDatasetType("flat") 

593 with self.assertRaises(DatasetTypeError): 

594 registry.removeDatasetType(DatasetType.nameWithComponent("flat", "image")) 

595 

596 def testImportDatasetsUUID(self): 

597 """Test for `SqlRegistry._importDatasets` with UUID dataset ID.""" 

598 if isinstance(self.datasetsManager, str): 

599 if not self.datasetsManager.endswith(".ByDimensionsDatasetRecordStorageManagerUUID"): 

600 self.skipTest(f"Unexpected dataset manager {self.datasetsManager}") 

601 elif isinstance(self.datasetsManager, dict) and not self.datasetsManager["cls"].endswith( 

602 ".ByDimensionsDatasetRecordStorageManagerUUID" 

603 ): 

604 self.skipTest(f"Unexpected dataset manager {self.datasetsManager['cls']}") 

605 

606 registry = self.makeRegistry() 

607 self.loadData(registry, "base.yaml") 

608 for run in range(6): 

609 registry.registerRun(f"run{run}") 

610 datasetTypeBias = registry.getDatasetType("bias") 

611 datasetTypeFlat = registry.getDatasetType("flat") 

612 dataIdBias1 = {"instrument": "Cam1", "detector": 1} 

613 dataIdBias2 = {"instrument": "Cam1", "detector": 2} 

614 dataIdFlat1 = {"instrument": "Cam1", "detector": 1, "physical_filter": "Cam1-G", "band": "g"} 

615 

616 ref = DatasetRef(datasetTypeBias, dataIdBias1, run="run0") 

617 (ref1,) = registry._importDatasets([ref]) 

618 # UUID is used without change 

619 self.assertEqual(ref.id, ref1.id) 

620 

621 # All different failure modes 

622 refs = ( 

623 # Importing same DatasetRef with different dataset ID is an error 

624 DatasetRef(datasetTypeBias, dataIdBias1, run="run0"), 

625 # Same DatasetId but different DataId 

626 DatasetRef(datasetTypeBias, dataIdBias2, id=ref1.id, run="run0"), 

627 DatasetRef(datasetTypeFlat, dataIdFlat1, id=ref1.id, run="run0"), 

628 # Same DatasetRef and DatasetId but different run 

629 DatasetRef(datasetTypeBias, dataIdBias1, id=ref1.id, run="run1"), 

630 ) 

631 for ref in refs: 

632 with self.assertRaises(ConflictingDefinitionError): 

633 registry._importDatasets([ref]) 

634 

635 # Test for non-unique IDs, they can be re-imported multiple times. 

636 for run, idGenMode in ((2, DatasetIdGenEnum.DATAID_TYPE), (4, DatasetIdGenEnum.DATAID_TYPE_RUN)): 

637 with self.subTest(idGenMode=idGenMode): 

638 # Make dataset ref with reproducible dataset ID. 

639 ref = DatasetRef(datasetTypeBias, dataIdBias1, run=f"run{run}", id_generation_mode=idGenMode) 

640 (ref1,) = registry._importDatasets([ref]) 

641 self.assertIsInstance(ref1.id, uuid.UUID) 

642 self.assertEqual(ref1.id.version, 5) 

643 self.assertEqual(ref1.id, ref.id) 

644 

645 # Importing it again is OK 

646 (ref2,) = registry._importDatasets([ref1]) 

647 self.assertEqual(ref2.id, ref1.id) 

648 

649 # Cannot import to different run with the same ID 

650 ref = DatasetRef(datasetTypeBias, dataIdBias1, id=ref1.id, run=f"run{run+1}") 

651 with self.assertRaises(ConflictingDefinitionError): 

652 registry._importDatasets([ref]) 

653 

654 ref = DatasetRef( 

655 datasetTypeBias, dataIdBias1, run=f"run{run+1}", id_generation_mode=idGenMode 

656 ) 

657 if idGenMode is DatasetIdGenEnum.DATAID_TYPE: 

658 # Cannot import same DATAID_TYPE ref into a new run 

659 with self.assertRaises(ConflictingDefinitionError): 

660 (ref2,) = registry._importDatasets([ref]) 

661 else: 

662 # DATAID_TYPE_RUN ref can be imported into a new run 

663 (ref2,) = registry._importDatasets([ref]) 

664 

665 def testComponentLookups(self): 

666 """Test searching for component datasets via their parents. 

667 

668 Components can no longer be found by registry. This test checks 

669 that this now fails. 

670 """ 

671 registry = self.makeRegistry() 

672 self.loadData(registry, "base.yaml") 

673 self.loadData(registry, "datasets.yaml") 

674 # Test getting the child dataset type (which does still exist in the 

675 # Registry), and check for consistency with 

676 # DatasetRef.makeComponentRef. 

677 collection = "imported_g" 

678 parentType = registry.getDatasetType("bias") 

679 childType = registry.getDatasetType("bias.wcs") 

680 parentRefResolved = registry.findDataset( 

681 parentType, collections=collection, instrument="Cam1", detector=1 

682 ) 

683 self.assertIsInstance(parentRefResolved, DatasetRef) 

684 self.assertEqual(childType, parentRefResolved.makeComponentRef("wcs").datasetType) 

685 # Search for a single dataset with findDataset. 

686 with self.assertRaises(DatasetTypeError): 

687 registry.findDataset("bias.wcs", collections=collection, dataId=parentRefResolved.dataId) 

688 

689 def testCollections(self): 

690 """Tests for registry methods that manage collections.""" 

691 registry = self.makeRegistry() 

692 other_registry = self.makeRegistry(share_repo_with=registry) 

693 self.loadData(registry, "base.yaml") 

694 self.loadData(registry, "datasets.yaml") 

695 run1 = "imported_g" 

696 run2 = "imported_r" 

697 # Test setting a collection docstring after it has been created. 

698 registry.setCollectionDocumentation(run1, "doc for run1") 

699 self.assertEqual(registry.getCollectionDocumentation(run1), "doc for run1") 

700 registry.setCollectionDocumentation(run1, None) 

701 self.assertIsNone(registry.getCollectionDocumentation(run1)) 

702 datasetType = "bias" 

703 # Find some datasets via their run's collection. 

704 dataId1 = {"instrument": "Cam1", "detector": 1} 

705 ref1 = registry.findDataset(datasetType, dataId1, collections=run1) 

706 self.assertIsNotNone(ref1) 

707 dataId2 = {"instrument": "Cam1", "detector": 2} 

708 ref2 = registry.findDataset(datasetType, dataId2, collections=run1) 

709 self.assertIsNotNone(ref2) 

710 # Associate those into a new collection, then look for them there. 

711 tag1 = "tag1" 

712 registry.registerCollection(tag1, type=CollectionType.TAGGED, doc="doc for tag1") 

713 # Check that we can query for old and new collections by type. 

714 self.assertEqual(set(registry.queryCollections(collectionTypes=CollectionType.RUN)), {run1, run2}) 

715 self.assertEqual( 

716 set(registry.queryCollections(collectionTypes={CollectionType.TAGGED, CollectionType.RUN})), 

717 {tag1, run1, run2}, 

718 ) 

719 self.assertEqual(registry.getCollectionDocumentation(tag1), "doc for tag1") 

720 registry.associate(tag1, [ref1, ref2]) 

721 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1) 

722 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

723 # Disassociate one and verify that we can't it there anymore... 

724 registry.disassociate(tag1, [ref1]) 

725 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=tag1)) 

726 # ...but we can still find ref2 in tag1, and ref1 in the run. 

727 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run1), ref1) 

728 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

729 collections = set(registry.queryCollections()) 

730 self.assertEqual(collections, {run1, run2, tag1}) 

731 # Associate both refs into tag1 again; ref2 is already there, but that 

732 # should be a harmless no-op. 

733 registry.associate(tag1, [ref1, ref2]) 

734 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1) 

735 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

736 # Get a different dataset (from a different run) that has the same 

737 # dataset type and data ID as ref2. 

738 ref2b = registry.findDataset(datasetType, dataId2, collections=run2) 

739 self.assertNotEqual(ref2, ref2b) 

740 # Attempting to associate that into tag1 should be an error. 

741 with self.assertRaises(ConflictingDefinitionError): 

742 registry.associate(tag1, [ref2b]) 

743 # That error shouldn't have messed up what we had before. 

744 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1) 

745 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

746 # Attempt to associate the conflicting dataset again, this time with 

747 # a dataset that isn't in the collection and won't cause a conflict. 

748 # Should also fail without modifying anything. 

749 dataId3 = {"instrument": "Cam1", "detector": 3} 

750 ref3 = registry.findDataset(datasetType, dataId3, collections=run1) 

751 with self.assertRaises(ConflictingDefinitionError): 

752 registry.associate(tag1, [ref3, ref2b]) 

753 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1) 

754 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

755 self.assertIsNone(registry.findDataset(datasetType, dataId3, collections=tag1)) 

756 # Register a chained collection that searches [tag1, run2] 

757 chain1 = "chain1" 

758 registry.registerCollection(chain1, type=CollectionType.CHAINED) 

759 self.assertIs(registry.getCollectionType(chain1), CollectionType.CHAINED) 

760 # Chained collection exists, but has no collections in it. 

761 self.assertFalse(registry.getCollectionChain(chain1)) 

762 # If we query for all collections, we should get the chained collection 

763 # only if we don't ask to flatten it (i.e. yield only its children). 

764 self.assertEqual(set(registry.queryCollections(flattenChains=False)), {tag1, run1, run2, chain1}) 

765 self.assertEqual(set(registry.queryCollections(flattenChains=True)), {tag1, run1, run2}) 

766 # Attempt to set its child collections to something circular; that 

767 # should fail. 

768 with self.assertRaises(ValueError): 

769 registry.setCollectionChain(chain1, [tag1, chain1]) 

770 # Add the child collections. 

771 registry.setCollectionChain(chain1, [tag1, run2]) 

772 self.assertEqual(list(registry.getCollectionChain(chain1)), [tag1, run2]) 

773 self.assertEqual(registry.getCollectionParentChains(tag1), {chain1}) 

774 self.assertEqual(registry.getCollectionParentChains(run2), {chain1}) 

775 # Refresh the other registry that points to the same repo, and make 

776 # sure it can see the things we've done (note that this does require 

777 # an explicit refresh(); that's the documented behavior, because 

778 # caching is ~impossible otherwise). 

779 if other_registry is not None: 

780 other_registry.refresh() 

781 self.assertEqual(list(other_registry.getCollectionChain(chain1)), [tag1, run2]) 

782 self.assertEqual(other_registry.getCollectionParentChains(tag1), {chain1}) 

783 self.assertEqual(other_registry.getCollectionParentChains(run2), {chain1}) 

784 # Searching for dataId1 or dataId2 in the chain should return ref1 and 

785 # ref2, because both are in tag1. 

786 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain1), ref1) 

787 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain1), ref2) 

788 # Now disassociate ref2 from tag1. The search (for bias) with 

789 # dataId2 in chain1 should then: 

790 # 1. not find it in tag1 

791 # 2. find a different dataset in run2 

792 registry.disassociate(tag1, [ref2]) 

793 ref2b = registry.findDataset(datasetType, dataId2, collections=chain1) 

794 self.assertNotEqual(ref2b, ref2) 

795 self.assertEqual(ref2b, registry.findDataset(datasetType, dataId2, collections=run2)) 

796 # Define a new chain so we can test recursive chains. 

797 chain2 = "chain2" 

798 registry.registerCollection(chain2, type=CollectionType.CHAINED) 

799 registry.setCollectionChain(chain2, [run2, chain1]) 

800 self.assertEqual(registry.getCollectionParentChains(chain1), {chain2}) 

801 self.assertEqual(registry.getCollectionParentChains(run2), {chain1, chain2}) 

802 

803 if self.supportsCollectionRegex: 

804 # Query for collections matching a regex. 

805 self.assertCountEqual( 

806 list(registry.queryCollections(re.compile("imported_."), flattenChains=False)), 

807 ["imported_r", "imported_g"], 

808 ) 

809 # Query for collections matching a regex or an explicit str. 

810 self.assertCountEqual( 

811 list(registry.queryCollections([re.compile("imported_."), "chain1"], flattenChains=False)), 

812 ["imported_r", "imported_g", "chain1"], 

813 ) 

814 # Same queries as the regex ones above, but using globs instead of 

815 # regex. 

816 self.assertCountEqual( 

817 list(registry.queryCollections("imported_*", flattenChains=False)), 

818 ["imported_r", "imported_g"], 

819 ) 

820 # Query for collections matching a regex or an explicit str. 

821 self.assertCountEqual( 

822 list(registry.queryCollections(["imported_*", "chain1"], flattenChains=False)), 

823 ["imported_r", "imported_g", "chain1"], 

824 ) 

825 

826 # Search for bias with dataId1 should find it via tag1 in chain2, 

827 # recursing, because is not in run1. 

828 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=run2)) 

829 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain2), ref1) 

830 # Search for bias with dataId2 should find it in run2 (ref2b). 

831 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain2), ref2b) 

832 # Search for a flat that is in run2. That should not be found 

833 # at the front of chain2, because of the restriction to bias 

834 # on run2 there, but it should be found in at the end of chain1. 

835 dataId4 = {"instrument": "Cam1", "detector": 3, "physical_filter": "Cam1-R2"} 

836 ref4 = registry.findDataset("flat", dataId4, collections=run2) 

837 self.assertIsNotNone(ref4) 

838 self.assertEqual(ref4, registry.findDataset("flat", dataId4, collections=chain2)) 

839 # Deleting a collection that's part of a CHAINED collection is not 

840 # allowed, and is exception-safe. 

841 with self.assertRaises(sqlalchemy.exc.IntegrityError): 

842 registry.removeCollection(run2) 

843 self.assertEqual(registry.getCollectionType(run2), CollectionType.RUN) 

844 with self.assertRaises(sqlalchemy.exc.IntegrityError): 

845 registry.removeCollection(chain1) 

846 self.assertEqual(registry.getCollectionType(chain1), CollectionType.CHAINED) 

847 # Actually remove chain2, test that it's gone by asking for its type. 

848 registry.removeCollection(chain2) 

849 with self.assertRaises(MissingCollectionError): 

850 registry.getCollectionType(chain2) 

851 # Actually remove run2 and chain1, which should work now. 

852 registry.removeCollection(chain1) 

853 registry.removeCollection(run2) 

854 with self.assertRaises(MissingCollectionError): 

855 registry.getCollectionType(run2) 

856 with self.assertRaises(MissingCollectionError): 

857 registry.getCollectionType(chain1) 

858 # Remove tag1 as well, just to test that we can remove TAGGED 

859 # collections. 

860 registry.removeCollection(tag1) 

861 with self.assertRaises(MissingCollectionError): 

862 registry.getCollectionType(tag1) 

863 

864 def testCollectionChainCaching(self): 

865 registry = self.makeRegistry() 

866 with registry.caching_context(): 

867 registry.registerCollection("a") 

868 registry.registerCollection("chain", CollectionType.CHAINED) 

869 # There used to be a caching bug (DM-43750) that would throw an 

870 # exception if you modified a collection chain for a collection 

871 # that was already in the cache. 

872 registry.setCollectionChain("chain", ["a"]) 

873 self.assertEqual(list(registry.getCollectionChain("chain")), ["a"]) 

874 

875 def testCollectionChainFlatten(self): 

876 """Test that `SqlRegistry.setCollectionChain` obeys its 'flatten' 

877 option. 

878 """ 

879 registry = self.makeRegistry() 

880 registry.registerCollection("inner", CollectionType.CHAINED) 

881 registry.registerCollection("innermost", CollectionType.RUN) 

882 registry.setCollectionChain("inner", ["innermost"]) 

883 registry.registerCollection("outer", CollectionType.CHAINED) 

884 registry.setCollectionChain("outer", ["inner"], flatten=False) 

885 self.assertEqual(list(registry.getCollectionChain("outer")), ["inner"]) 

886 registry.setCollectionChain("outer", ["inner"], flatten=True) 

887 self.assertEqual(list(registry.getCollectionChain("outer")), ["innermost"]) 

888 

889 def testCollectionChainPrependConcurrency(self): 

890 """Verify that locking via database row locks is working as 

891 expected. 

892 """ 

893 

894 def blocked_thread_func(registry: SqlRegistry): 

895 # This call will become blocked after it has decided on positions 

896 # for the new children in the collection chain, but before 

897 # inserting them. 

898 registry._managers.collections.prepend_collection_chain("chain", ["a"]) 

899 

900 def unblocked_thread_func(registry: SqlRegistry): 

901 registry._managers.collections.prepend_collection_chain("chain", ["b"]) 

902 

903 registry = self._do_collection_concurrency_test(blocked_thread_func, unblocked_thread_func) 

904 

905 # blocked_thread_func should have finished first, inserting "a". 

906 # unblocked_thread_func should have finished second, prepending "b". 

907 self.assertEqual(("b", "a"), registry.getCollectionChain("chain")) 

908 

909 def testCollectionChainReplaceConcurrency(self): 

910 """Verify that locking via database row locks is working as 

911 expected. 

912 """ 

913 

914 def blocked_thread_func(registry: SqlRegistry): 

915 # This call will become blocked after deleting children, but before 

916 # inserting new ones. 

917 registry.setCollectionChain("chain", ["a"]) 

918 

919 def unblocked_thread_func(registry: SqlRegistry): 

920 registry.setCollectionChain("chain", ["b"]) 

921 

922 registry = self._do_collection_concurrency_test(blocked_thread_func, unblocked_thread_func) 

923 

924 # blocked_thread_func should have finished first. 

925 # unblocked_thread_func should have finished second, overwriting the 

926 # chain with "b". 

927 self.assertEqual(("b",), registry.getCollectionChain("chain")) 

928 

929 def _do_collection_concurrency_test( 

930 self, blocked_thread_func: Callable[[SqlRegistry]], unblocked_thread_func: Callable[[SqlRegistry]] 

931 ) -> SqlRegistry: 

932 # This function: 

933 # 1. Sets up two registries pointing at the same database. 

934 # 2. Start running 'blocked_thread_func' in a background thread, 

935 # arranging for it to become blocked during a critical section in 

936 # the collections manager. 

937 # 3. Wait for 'blocked_thread_func' to reach the critical section 

938 # 4. Start running 'unblocked_thread_func'. 

939 # 5. Allow both functions to run to completion. 

940 

941 # Set up two registries pointing to the same DB 

942 registry1 = self.makeRegistry() 

943 assert isinstance(registry1, SqlRegistry) 

944 registry2 = self.makeRegistry(share_repo_with=registry1) 

945 if registry2 is None: 

946 # This will happen for in-memory SQL databases. 

947 raise unittest.SkipTest("Testing concurrency requires two connections to the same DB.") 

948 

949 registry1.registerCollection("chain", CollectionType.CHAINED) 

950 for collection in ["a", "b"]: 

951 registry1.registerCollection(collection) 

952 

953 # Arrange for registry1 to block during its critical section, allowing 

954 # us to detect this and control when it becomes unblocked. 

955 enter_barrier = Barrier(2, timeout=60) 

956 exit_barrier = Barrier(2, timeout=60) 

957 

958 def wait_for_barrier(): 

959 enter_barrier.wait() 

960 exit_barrier.wait() 

961 

962 registry1._managers.collections._block_for_concurrency_test = wait_for_barrier 

963 

964 with ThreadPoolExecutor(max_workers=1) as exec1: 

965 with ThreadPoolExecutor(max_workers=1) as exec2: 

966 future1 = exec1.submit(blocked_thread_func, registry1) 

967 enter_barrier.wait() 

968 

969 # At this point registry 1 has entered the critical section and 

970 # is waiting for us to release it. Start the other thread. 

971 future2 = exec2.submit(unblocked_thread_func, registry2) 

972 # thread2 should block inside a database call, but we have no 

973 # way to detect when it is in this state. 

974 time.sleep(0.200) 

975 

976 # Let the threads run to completion. 

977 exit_barrier.wait() 

978 future1.result() 

979 future2.result() 

980 

981 return registry1 

982 

983 def testBasicTransaction(self): 

984 """Test that all operations within a single transaction block are 

985 rolled back if an exception propagates out of the block. 

986 """ 

987 registry = self.makeRegistry() 

988 storageClass = StorageClass("testDatasetType") 

989 registry.storageClasses.registerStorageClass(storageClass) 

990 with registry.transaction(): 

991 registry.insertDimensionData("instrument", {"name": "Cam1", "class_name": "A"}) 

992 with self.assertRaises(ValueError): 

993 with registry.transaction(): 

994 registry.insertDimensionData("instrument", {"name": "Cam2"}) 

995 raise ValueError("Oops, something went wrong") 

996 # Cam1 should exist 

997 self.assertEqual(registry.expandDataId(instrument="Cam1").records["instrument"].class_name, "A") 

998 # But Cam2 and Cam3 should both not exist 

999 with self.assertRaises(DataIdValueError): 

1000 registry.expandDataId(instrument="Cam2") 

1001 with self.assertRaises(DataIdValueError): 

1002 registry.expandDataId(instrument="Cam3") 

1003 

1004 def testNestedTransaction(self): 

1005 """Test that operations within a transaction block are not rolled back 

1006 if an exception propagates out of an inner transaction block and is 

1007 then caught. 

1008 """ 

1009 registry = self.makeRegistry() 

1010 dimension = registry.dimensions["instrument"] 

1011 dataId1 = {"instrument": "DummyCam"} 

1012 dataId2 = {"instrument": "DummyCam2"} 

1013 checkpointReached = False 

1014 with registry.transaction(): 

1015 # This should be added and (ultimately) committed. 

1016 registry.insertDimensionData(dimension, dataId1) 

1017 with self.assertRaises(sqlalchemy.exc.IntegrityError): 

1018 with registry.transaction(savepoint=True): 

1019 # This does not conflict, and should succeed (but not 

1020 # be committed). 

1021 registry.insertDimensionData(dimension, dataId2) 

1022 checkpointReached = True 

1023 # This should conflict and raise, triggerring a rollback 

1024 # of the previous insertion within the same transaction 

1025 # context, but not the original insertion in the outer 

1026 # block. 

1027 registry.insertDimensionData(dimension, dataId1) 

1028 self.assertTrue(checkpointReached) 

1029 self.assertIsNotNone(registry.expandDataId(dataId1, dimensions=dimension.minimal_group)) 

1030 with self.assertRaises(DataIdValueError): 

1031 registry.expandDataId(dataId2, dimensions=dimension.minimal_group) 

1032 

1033 def testInstrumentDimensions(self): 

1034 """Test queries involving only instrument dimensions, with no joins to 

1035 skymap. 

1036 """ 

1037 registry = self.makeRegistry() 

1038 

1039 # need a bunch of dimensions and datasets for test 

1040 registry.insertDimensionData( 

1041 "instrument", dict(name="DummyCam", visit_max=25, exposure_max=300, detector_max=6) 

1042 ) 

1043 registry.insertDimensionData("day_obs", dict(instrument="DummyCam", id=20250101)) 

1044 registry.insertDimensionData( 

1045 "physical_filter", 

1046 dict(instrument="DummyCam", name="dummy_r", band="r"), 

1047 dict(instrument="DummyCam", name="dummy_i", band="i"), 

1048 ) 

1049 registry.insertDimensionData( 

1050 "detector", *[dict(instrument="DummyCam", id=i, full_name=str(i)) for i in range(1, 6)] 

1051 ) 

1052 registry.insertDimensionData( 

1053 "visit", 

1054 dict(instrument="DummyCam", id=10, name="ten", physical_filter="dummy_i", day_obs=20250101), 

1055 dict(instrument="DummyCam", id=11, name="eleven", physical_filter="dummy_r", day_obs=20250101), 

1056 dict(instrument="DummyCam", id=20, name="twelve", physical_filter="dummy_r", day_obs=20250101), 

1057 ) 

1058 registry.insertDimensionData( 

1059 "group", 

1060 dict(instrument="DummyCam", name="ten"), 

1061 dict(instrument="DummyCam", name="eleven"), 

1062 dict(instrument="DummyCam", name="twelve"), 

1063 ) 

1064 for i in range(1, 6): 

1065 registry.insertDimensionData( 

1066 "visit_detector_region", 

1067 dict(instrument="DummyCam", visit=10, detector=i), 

1068 dict(instrument="DummyCam", visit=11, detector=i), 

1069 dict(instrument="DummyCam", visit=20, detector=i), 

1070 ) 

1071 registry.insertDimensionData( 

1072 "exposure", 

1073 dict( 

1074 instrument="DummyCam", 

1075 id=100, 

1076 obs_id="100", 

1077 physical_filter="dummy_i", 

1078 group="ten", 

1079 day_obs=20250101, 

1080 ), 

1081 dict( 

1082 instrument="DummyCam", 

1083 id=101, 

1084 obs_id="101", 

1085 physical_filter="dummy_i", 

1086 group="ten", 

1087 day_obs=20250101, 

1088 ), 

1089 dict( 

1090 instrument="DummyCam", 

1091 id=110, 

1092 obs_id="110", 

1093 physical_filter="dummy_r", 

1094 group="eleven", 

1095 day_obs=20250101, 

1096 ), 

1097 dict( 

1098 instrument="DummyCam", 

1099 id=111, 

1100 obs_id="111", 

1101 physical_filter="dummy_r", 

1102 group="eleven", 

1103 day_obs=20250101, 

1104 ), 

1105 dict( 

1106 instrument="DummyCam", 

1107 id=200, 

1108 obs_id="200", 

1109 physical_filter="dummy_r", 

1110 group="twelve", 

1111 day_obs=20250101, 

1112 ), 

1113 dict( 

1114 instrument="DummyCam", 

1115 id=201, 

1116 obs_id="201", 

1117 physical_filter="dummy_r", 

1118 group="twelve", 

1119 day_obs=20250101, 

1120 ), 

1121 ) 

1122 registry.insertDimensionData( 

1123 "visit_definition", 

1124 dict(instrument="DummyCam", exposure=100, visit=10), 

1125 dict(instrument="DummyCam", exposure=101, visit=10), 

1126 dict(instrument="DummyCam", exposure=110, visit=11), 

1127 dict(instrument="DummyCam", exposure=111, visit=11), 

1128 dict(instrument="DummyCam", exposure=200, visit=20), 

1129 dict(instrument="DummyCam", exposure=201, visit=20), 

1130 ) 

1131 # dataset types 

1132 run1 = "test1_r" 

1133 run2 = "test2_r" 

1134 tagged2 = "test2_t" 

1135 registry.registerRun(run1) 

1136 registry.registerRun(run2) 

1137 registry.registerCollection(tagged2) 

1138 storageClass = StorageClass("testDataset") 

1139 registry.storageClasses.registerStorageClass(storageClass) 

1140 rawType = DatasetType( 

1141 name="RAW", 

1142 dimensions=registry.dimensions.conform(("instrument", "exposure", "detector")), 

1143 storageClass=storageClass, 

1144 ) 

1145 registry.registerDatasetType(rawType) 

1146 calexpType = DatasetType( 

1147 name="CALEXP", 

1148 dimensions=registry.dimensions.conform(("instrument", "visit", "detector")), 

1149 storageClass=storageClass, 

1150 ) 

1151 registry.registerDatasetType(calexpType) 

1152 

1153 # add pre-existing datasets 

1154 for exposure in (100, 101, 110, 111): 

1155 for detector in (1, 2, 3): 

1156 # note that only 3 of 5 detectors have datasets 

1157 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector) 

1158 (ref,) = registry.insertDatasets(rawType, dataIds=[dataId], run=run1) 

1159 # exposures 100 and 101 appear in both run1 and tagged2. 

1160 # 100 has different datasets in the different collections 

1161 # 101 has the same dataset in both collections. 

1162 if exposure == 100: 

1163 (ref,) = registry.insertDatasets(rawType, dataIds=[dataId], run=run2) 

1164 if exposure in (100, 101): 

1165 registry.associate(tagged2, [ref]) 

1166 # Add pre-existing datasets to tagged2. 

1167 for exposure in (200, 201): 

1168 for detector in (3, 4, 5): 

1169 # note that only 3 of 5 detectors have datasets 

1170 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector) 

1171 (ref,) = registry.insertDatasets(rawType, dataIds=[dataId], run=run2) 

1172 registry.associate(tagged2, [ref]) 

1173 

1174 dimensions = registry.dimensions.conform( 

1175 rawType.dimensions.required.names | calexpType.dimensions.required.names 

1176 ) 

1177 # Test that single dim string works as well as list of str 

1178 rows = registry.queryDataIds("visit", datasets=rawType, collections=run1).expanded().toSet() 

1179 rowsI = registry.queryDataIds(["visit"], datasets=rawType, collections=run1).expanded().toSet() 

1180 self.assertEqual(rows, rowsI) 

1181 # with empty expression 

1182 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1).expanded().toSet() 

1183 self.assertEqual(len(rows), 4 * 3) # 4 exposures times 3 detectors 

1184 self.assertCountEqual({dataId["exposure"] for dataId in rows}, (100, 101, 110, 111)) 

1185 self.assertCountEqual({dataId["visit"] for dataId in rows}, (10, 11)) 

1186 self.assertCountEqual({dataId["detector"] for dataId in rows}, (1, 2, 3)) 

1187 

1188 # second collection 

1189 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=tagged2).toSet() 

1190 self.assertEqual(len(rows), 4 * 3) # 4 exposures times 3 detectors 

1191 for dataId in rows: 

1192 self.assertCountEqual(dataId.dimensions.required, ("instrument", "detector", "exposure", "visit")) 

1193 self.assertCountEqual({dataId["exposure"] for dataId in rows}, (100, 101, 200, 201)) 

1194 self.assertCountEqual({dataId["visit"] for dataId in rows}, (10, 20)) 

1195 self.assertCountEqual({dataId["detector"] for dataId in rows}, (1, 2, 3, 4, 5)) 

1196 

1197 # with two input datasets 

1198 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=[run1, tagged2]).toSet() 

1199 self.assertEqual(len(set(rows)), 6 * 3) # 6 exposures times 3 detectors; set needed to de-dupe 

1200 for dataId in rows: 

1201 self.assertCountEqual(dataId.dimensions.required, ("instrument", "detector", "exposure", "visit")) 

1202 self.assertCountEqual({dataId["exposure"] for dataId in rows}, (100, 101, 110, 111, 200, 201)) 

1203 self.assertCountEqual({dataId["visit"] for dataId in rows}, (10, 11, 20)) 

1204 self.assertCountEqual({dataId["detector"] for dataId in rows}, (1, 2, 3, 4, 5)) 

1205 

1206 # limit to single visit 

1207 rows = registry.queryDataIds( 

1208 dimensions, datasets=rawType, collections=run1, where="visit = 10", instrument="DummyCam" 

1209 ).toSet() 

1210 self.assertEqual(len(rows), 2 * 3) # 2 exposures times 3 detectors 

1211 self.assertCountEqual({dataId["exposure"] for dataId in rows}, (100, 101)) 

1212 self.assertCountEqual({dataId["visit"] for dataId in rows}, (10,)) 

1213 self.assertCountEqual({dataId["detector"] for dataId in rows}, (1, 2, 3)) 

1214 

1215 # more limiting expression, using link names instead of Table.column 

1216 rows = registry.queryDataIds( 

1217 dimensions, 

1218 datasets=rawType, 

1219 collections=run1, 

1220 where="visit = 10 and detector > 1 and 'DummyCam'=instrument", 

1221 ).toSet() 

1222 self.assertEqual(len(rows), 2 * 2) # 2 exposures times 2 detectors 

1223 self.assertCountEqual({dataId["exposure"] for dataId in rows}, (100, 101)) 

1224 self.assertCountEqual({dataId["visit"] for dataId in rows}, (10,)) 

1225 self.assertCountEqual({dataId["detector"] for dataId in rows}, (2, 3)) 

1226 

1227 # queryDataIds with only one of `datasets` and `collections` is an 

1228 # error. 

1229 with self.assertRaises(CollectionError): 

1230 registry.queryDataIds(dimensions, datasets=rawType) 

1231 with self.assertRaises(ArgumentError): 

1232 registry.queryDataIds(dimensions, collections=run1) 

1233 

1234 # expression excludes everything 

1235 rows = registry.queryDataIds( 

1236 dimensions, datasets=rawType, collections=run1, where="visit > 1000", instrument="DummyCam" 

1237 ).toSet() 

1238 self.assertEqual(len(rows), 0) 

1239 

1240 # Selecting by physical_filter, this is not in the dimensions, but it 

1241 # is a part of the full expression so it should work too. 

1242 rows = registry.queryDataIds( 

1243 dimensions, 

1244 datasets=rawType, 

1245 collections=run1, 

1246 where="physical_filter = 'dummy_r'", 

1247 instrument="DummyCam", 

1248 ).toSet() 

1249 self.assertEqual(len(rows), 2 * 3) # 2 exposures times 3 detectors 

1250 self.assertCountEqual({dataId["exposure"] for dataId in rows}, (110, 111)) 

1251 self.assertCountEqual({dataId["visit"] for dataId in rows}, (11,)) 

1252 self.assertCountEqual({dataId["detector"] for dataId in rows}, (1, 2, 3)) 

1253 

1254 def testSkyMapDimensions(self): 

1255 """Tests involving only skymap dimensions, no joins to instrument.""" 

1256 registry = self.makeRegistry() 

1257 

1258 # need a bunch of dimensions and datasets for test, we want 

1259 # "band" in the test so also have to add physical_filter 

1260 # dimensions 

1261 registry.insertDimensionData("instrument", dict(instrument="DummyCam")) 

1262 registry.insertDimensionData( 

1263 "physical_filter", 

1264 dict(instrument="DummyCam", name="dummy_r", band="r"), 

1265 dict(instrument="DummyCam", name="dummy_i", band="i"), 

1266 ) 

1267 registry.insertDimensionData("skymap", dict(name="DummyMap", hash=b"sha!")) 

1268 for tract in range(10): 

1269 registry.insertDimensionData("tract", dict(skymap="DummyMap", id=tract)) 

1270 registry.insertDimensionData( 

1271 "patch", 

1272 *[dict(skymap="DummyMap", tract=tract, id=patch, cell_x=0, cell_y=0) for patch in range(10)], 

1273 ) 

1274 

1275 # dataset types 

1276 run = "tésτ" 

1277 registry.registerRun(run) 

1278 storageClass = StorageClass("testDataset") 

1279 registry.storageClasses.registerStorageClass(storageClass) 

1280 calexpType = DatasetType( 

1281 name="deepCoadd_calexp", 

1282 dimensions=registry.dimensions.conform(("skymap", "tract", "patch", "band")), 

1283 storageClass=storageClass, 

1284 ) 

1285 registry.registerDatasetType(calexpType) 

1286 mergeType = DatasetType( 

1287 name="deepCoadd_mergeDet", 

1288 dimensions=registry.dimensions.conform(("skymap", "tract", "patch")), 

1289 storageClass=storageClass, 

1290 ) 

1291 registry.registerDatasetType(mergeType) 

1292 measType = DatasetType( 

1293 name="deepCoadd_meas", 

1294 dimensions=registry.dimensions.conform(("skymap", "tract", "patch", "band")), 

1295 storageClass=storageClass, 

1296 ) 

1297 registry.registerDatasetType(measType) 

1298 

1299 dimensions = registry.dimensions.conform( 

1300 calexpType.dimensions.required.names 

1301 | mergeType.dimensions.required.names 

1302 | measType.dimensions.required.names 

1303 ) 

1304 

1305 # add pre-existing datasets 

1306 for tract in (1, 3, 5): 

1307 for patch in (2, 4, 6, 7): 

1308 dataId = dict(skymap="DummyMap", tract=tract, patch=patch) 

1309 registry.insertDatasets(mergeType, dataIds=[dataId], run=run) 

1310 for aFilter in ("i", "r"): 

1311 dataId = dict(skymap="DummyMap", tract=tract, patch=patch, band=aFilter) 

1312 registry.insertDatasets(calexpType, dataIds=[dataId], run=run) 

1313 

1314 # with empty expression 

1315 rows = registry.queryDataIds(dimensions, datasets=[calexpType, mergeType], collections=run).toSet() 

1316 self.assertEqual(len(rows), 3 * 4 * 2) # 4 tracts x 4 patches x 2 filters 

1317 for dataId in rows: 

1318 self.assertCountEqual(dataId.dimensions.required, ("skymap", "tract", "patch", "band")) 

1319 self.assertCountEqual({dataId["tract"] for dataId in rows}, (1, 3, 5)) 

1320 self.assertCountEqual({dataId["patch"] for dataId in rows}, (2, 4, 6, 7)) 

1321 self.assertCountEqual({dataId["band"] for dataId in rows}, ("i", "r")) 

1322 

1323 # limit to 2 tracts and 2 patches 

1324 rows = registry.queryDataIds( 

1325 dimensions, 

1326 datasets=[calexpType, mergeType], 

1327 collections=run, 

1328 where="tract IN (1, 5) AND patch IN (2, 7)", 

1329 skymap="DummyMap", 

1330 ).toSet() 

1331 self.assertEqual(len(rows), 2 * 2 * 2) # 2 tracts x 2 patches x 2 filters 

1332 self.assertCountEqual({dataId["tract"] for dataId in rows}, (1, 5)) 

1333 self.assertCountEqual({dataId["patch"] for dataId in rows}, (2, 7)) 

1334 self.assertCountEqual({dataId["band"] for dataId in rows}, ("i", "r")) 

1335 

1336 # limit to single filter 

1337 rows = registry.queryDataIds( 

1338 dimensions, datasets=[calexpType, mergeType], collections=run, where="band = 'i'" 

1339 ).toSet() 

1340 self.assertEqual(len(rows), 3 * 4 * 1) # 4 tracts x 4 patches x 2 filters 

1341 self.assertCountEqual({dataId["tract"] for dataId in rows}, (1, 3, 5)) 

1342 self.assertCountEqual({dataId["patch"] for dataId in rows}, (2, 4, 6, 7)) 

1343 self.assertCountEqual({dataId["band"] for dataId in rows}, ("i",)) 

1344 

1345 # Specifying non-existing skymap is an exception 

1346 with self.assertRaisesRegex(DataIdValueError, "Unknown values specified for governor dimension"): 

1347 rows = registry.queryDataIds( 

1348 dimensions, datasets=[calexpType, mergeType], collections=run, where="skymap = 'Mars'" 

1349 ).toSet() 

1350 

1351 def testSpatialJoin(self): 

1352 """Test queries that involve spatial overlap joins.""" 

1353 registry = self.makeRegistry() 

1354 self.loadData(registry, "hsc-rc2-subset.yaml") 

1355 

1356 # Dictionary of spatial DatabaseDimensionElements, keyed by the name of 

1357 # the TopologicalFamily they belong to. We'll relate all elements in 

1358 # each family to all of the elements in each other family. 

1359 families = defaultdict(set) 

1360 # Dictionary of {element.name: {dataId: region}}. 

1361 regions = {} 

1362 for element in registry.dimensions.database_elements: 

1363 if element.spatial is not None: 

1364 families[element.spatial.name].add(element) 

1365 regions[element.name] = { 

1366 record.dataId: record.region for record in registry.queryDimensionRecords(element) 

1367 } 

1368 

1369 # If this check fails, it's not necessarily a problem - it may just be 

1370 # a reasonable change to the default dimension definitions - but the 

1371 # test below depends on there being more than one family to do anything 

1372 # useful. 

1373 self.assertEqual(len(families), 2) 

1374 

1375 # Overlap DatabaseDimensionElements with each other. 

1376 for family1, family2 in itertools.combinations(families, 2): 

1377 for element1, element2 in itertools.product(families[family1], families[family2]): 

1378 dimensions = element1.minimal_group | element2.minimal_group 

1379 # Construct expected set of overlapping data IDs via a 

1380 # brute-force comparison of the regions we've already fetched. 

1381 expected = { 

1382 DataCoordinate.standardize( 

1383 {**dataId1.required, **dataId2.required}, dimensions=dimensions 

1384 ) 

1385 for (dataId1, region1), (dataId2, region2) in itertools.product( 

1386 regions[element1.name].items(), regions[element2.name].items() 

1387 ) 

1388 if not region1.isDisjointFrom(region2) 

1389 } 

1390 self.assertGreater(len(expected), 2, msg="Test that we aren't just comparing empty sets.") 

1391 queried = set(registry.queryDataIds(dimensions)) 

1392 self.assertEqual(expected, queried) 

1393 

1394 # Overlap each DatabaseDimensionElement with the commonSkyPix system. 

1395 commonSkyPix = registry.dimensions.commonSkyPix 

1396 for elementName, these_regions in regions.items(): 

1397 dimensions = registry.dimensions[elementName].minimal_group | commonSkyPix.minimal_group 

1398 expected = set() 

1399 for dataId, region in these_regions.items(): 

1400 for begin, end in commonSkyPix.pixelization.envelope(region): 

1401 expected.update( 

1402 DataCoordinate.standardize( 

1403 {commonSkyPix.name: index, **dataId.required}, dimensions=dimensions 

1404 ) 

1405 for index in range(begin, end) 

1406 ) 

1407 self.assertGreater(len(expected), 2, msg="Test that we aren't just comparing empty sets.") 

1408 queried = set(registry.queryDataIds(dimensions)) 

1409 self.assertEqual(expected, queried) 

1410 

1411 def testAbstractQuery(self): 

1412 """Test that we can run a query that just lists the known 

1413 bands. This is tricky because band is 

1414 backed by a query against physical_filter. 

1415 """ 

1416 registry = self.makeRegistry() 

1417 registry.insertDimensionData("instrument", dict(name="DummyCam")) 

1418 registry.insertDimensionData( 

1419 "physical_filter", 

1420 dict(instrument="DummyCam", name="dummy_i", band="i"), 

1421 dict(instrument="DummyCam", name="dummy_i2", band="i"), 

1422 dict(instrument="DummyCam", name="dummy_r", band="r"), 

1423 ) 

1424 rows = registry.queryDataIds(["band"]).toSet() 

1425 self.assertCountEqual( 

1426 rows, 

1427 [ 

1428 DataCoordinate.standardize(band="i", universe=registry.dimensions), 

1429 DataCoordinate.standardize(band="r", universe=registry.dimensions), 

1430 ], 

1431 ) 

1432 

1433 def testAttributeManager(self): 

1434 """Test basic functionality of attribute manager.""" 

1435 # number of attributes with schema versions in a fresh database, 

1436 # 6 managers with 2 records per manager, plus config for dimensions 

1437 VERSION_COUNT = 6 * 2 + 1 

1438 

1439 registry = self.makeRegistry() 

1440 attributes = registry._managers.attributes 

1441 

1442 # check what get() returns for non-existing key 

1443 self.assertIsNone(attributes.get("attr")) 

1444 self.assertEqual(attributes.get("attr", ""), "") 

1445 self.assertEqual(attributes.get("attr", "Value"), "Value") 

1446 self.assertEqual(len(list(attributes.items())), VERSION_COUNT) 

1447 

1448 # cannot store empty key or value 

1449 with self.assertRaises(ValueError): 

1450 attributes.set("", "value") 

1451 with self.assertRaises(ValueError): 

1452 attributes.set("attr", "") 

1453 

1454 # set value of non-existing key 

1455 attributes.set("attr", "value") 

1456 self.assertEqual(len(list(attributes.items())), VERSION_COUNT + 1) 

1457 self.assertEqual(attributes.get("attr"), "value") 

1458 

1459 # update value of existing key 

1460 with self.assertRaises(ButlerAttributeExistsError): 

1461 attributes.set("attr", "value2") 

1462 

1463 attributes.set("attr", "value2", force=True) 

1464 self.assertEqual(len(list(attributes.items())), VERSION_COUNT + 1) 

1465 self.assertEqual(attributes.get("attr"), "value2") 

1466 

1467 # delete existing key 

1468 self.assertTrue(attributes.delete("attr")) 

1469 self.assertEqual(len(list(attributes.items())), VERSION_COUNT) 

1470 

1471 # delete non-existing key 

1472 self.assertFalse(attributes.delete("non-attr")) 

1473 

1474 # store bunch of keys and get the list back 

1475 data = [ 

1476 ("version.core", "1.2.3"), 

1477 ("version.dimensions", "3.2.1"), 

1478 ("config.managers.opaque", "ByNameOpaqueTableStorageManager"), 

1479 ] 

1480 for key, value in data: 

1481 attributes.set(key, value) 

1482 items = dict(attributes.items()) 

1483 for key, value in data: 

1484 self.assertEqual(items[key], value) 

1485 

1486 def testQueryDatasetsDeduplication(self): 

1487 """Test that the findFirst option to queryDatasets selects datasets 

1488 from collections in the order given". 

1489 """ 

1490 registry = self.makeRegistry() 

1491 self.loadData(registry, "base.yaml") 

1492 self.loadData(registry, "datasets.yaml") 

1493 self.assertCountEqual( 

1494 list(registry.queryDatasets("bias", collections=["imported_g", "imported_r"])), 

1495 [ 

1496 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"), 

1497 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g"), 

1498 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g"), 

1499 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r"), 

1500 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r"), 

1501 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

1502 ], 

1503 ) 

1504 self.assertCountEqual( 

1505 list(registry.queryDatasets("bias", collections=["imported_g", "imported_r"], findFirst=True)), 

1506 [ 

1507 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"), 

1508 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g"), 

1509 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g"), 

1510 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

1511 ], 

1512 ) 

1513 self.assertCountEqual( 

1514 list(registry.queryDatasets("bias", collections=["imported_r", "imported_g"], findFirst=True)), 

1515 [ 

1516 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"), 

1517 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r"), 

1518 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r"), 

1519 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

1520 ], 

1521 ) 

1522 

1523 def testQueryResults(self): 

1524 """Test querying for data IDs and then manipulating the QueryResults 

1525 object returned to perform other queries. 

1526 """ 

1527 registry = self.makeRegistry() 

1528 self.loadData(registry, "base.yaml") 

1529 self.loadData(registry, "datasets.yaml") 

1530 bias = registry.getDatasetType("bias") 

1531 flat = registry.getDatasetType("flat") 

1532 # Obtain expected results from methods other than those we're testing 

1533 # here. That includes: 

1534 # - the dimensions of the data IDs we want to query: 

1535 expected_dimensions = registry.dimensions.conform(["detector", "physical_filter"]) 

1536 # - the dimensions of some other data IDs we'll extract from that: 

1537 expected_subset_dimensions = registry.dimensions.conform(["detector"]) 

1538 # - the data IDs we expect to obtain from the first queries: 

1539 expectedDataIds = DataCoordinateSet( 

1540 { 

1541 DataCoordinate.standardize( 

1542 instrument="Cam1", detector=d, physical_filter=p, universe=registry.dimensions 

1543 ) 

1544 for d, p in itertools.product({1, 2, 3}, {"Cam1-G", "Cam1-R1", "Cam1-R2"}) 

1545 }, 

1546 dimensions=expected_dimensions, 

1547 hasFull=False, 

1548 hasRecords=False, 

1549 ) 

1550 # - the flat datasets we expect to find from those data IDs, in just 

1551 # one collection (so deduplication is irrelevant): 

1552 expectedFlats = [ 

1553 registry.findDataset( 

1554 flat, instrument="Cam1", detector=1, physical_filter="Cam1-R1", collections="imported_r" 

1555 ), 

1556 registry.findDataset( 

1557 flat, instrument="Cam1", detector=2, physical_filter="Cam1-R1", collections="imported_r" 

1558 ), 

1559 registry.findDataset( 

1560 flat, instrument="Cam1", detector=3, physical_filter="Cam1-R2", collections="imported_r" 

1561 ), 

1562 ] 

1563 # - the data IDs we expect to extract from that: 

1564 expectedSubsetDataIds = expectedDataIds.subset(expected_subset_dimensions) 

1565 # - the bias datasets we expect to find from those data IDs, after we 

1566 # subset-out the physical_filter dimension, both with duplicates: 

1567 expectedAllBiases = [ 

1568 registry.findDataset(bias, instrument="Cam1", detector=1, collections="imported_g"), 

1569 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_g"), 

1570 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_g"), 

1571 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_r"), 

1572 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_r"), 

1573 ] 

1574 # - ...and without duplicates: 

1575 expectedDeduplicatedBiases = [ 

1576 registry.findDataset(bias, instrument="Cam1", detector=1, collections="imported_g"), 

1577 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_r"), 

1578 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_r"), 

1579 ] 

1580 # Test against those expected results, using a "lazy" query for the 

1581 # data IDs (which re-executes that query each time we use it to do 

1582 # something new). 

1583 dataIds = registry.queryDataIds( 

1584 ["detector", "physical_filter"], 

1585 where="detector.purpose = 'SCIENCE'", # this rejects detector=4 

1586 instrument="Cam1", 

1587 ) 

1588 self.assertEqual(dataIds.dimensions, expected_dimensions) 

1589 self.assertEqual(dataIds.toSet(), expectedDataIds) 

1590 self.assertCountEqual( 

1591 list( 

1592 dataIds.findDatasets( 

1593 flat, 

1594 collections=["imported_r"], 

1595 ) 

1596 ), 

1597 expectedFlats, 

1598 ) 

1599 subsetDataIds = dataIds.subset(expected_subset_dimensions, unique=True) 

1600 self.assertEqual(subsetDataIds.dimensions, expected_subset_dimensions) 

1601 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds) 

1602 self.assertCountEqual( 

1603 list(subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=False)), 

1604 expectedAllBiases, 

1605 ) 

1606 self.assertCountEqual( 

1607 list(subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=True)), 

1608 expectedDeduplicatedBiases, 

1609 ) 

1610 

1611 # Searching for a dataset with dimensions we had projected away 

1612 # restores those dimensions. 

1613 self.assertCountEqual( 

1614 list(subsetDataIds.findDatasets("flat", collections=["imported_r"], findFirst=True)), 

1615 expectedFlats, 

1616 ) 

1617 

1618 # Use a named dataset type that does not exist and a dataset type 

1619 # object that does not exist. 

1620 unknown_type = DatasetType("not_known", dimensions=bias.dimensions, storageClass="Exposure") 

1621 

1622 # Test both string name and dataset type object. 

1623 test_type: str | DatasetType 

1624 for test_type, test_type_name in ( 

1625 (unknown_type, unknown_type.name), 

1626 (unknown_type.name, unknown_type.name), 

1627 ): 

1628 with self.assertRaisesRegex(DatasetTypeError, expected_regex=test_type_name): 

1629 list( 

1630 subsetDataIds.findDatasets( 

1631 test_type, collections=["imported_r", "imported_g"], findFirst=True 

1632 ) 

1633 ) 

1634 

1635 # Materialize the bias dataset queries (only) by putting the results 

1636 # into temporary tables, then repeat those tests. 

1637 with subsetDataIds.findDatasets( 

1638 bias, collections=["imported_r", "imported_g"], findFirst=False 

1639 ).materialize() as biases: 

1640 self.assertCountEqual(list(biases), expectedAllBiases) 

1641 with subsetDataIds.findDatasets( 

1642 bias, collections=["imported_r", "imported_g"], findFirst=True 

1643 ).materialize() as biases: 

1644 self.assertCountEqual(list(biases), expectedDeduplicatedBiases) 

1645 # Materialize the data ID subset query, but not the dataset queries. 

1646 with subsetDataIds.materialize() as subsetDataIds: 

1647 self.assertEqual(subsetDataIds.dimensions, expected_subset_dimensions) 

1648 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds) 

1649 self.assertCountEqual( 

1650 list( 

1651 subsetDataIds.findDatasets( 

1652 bias, collections=["imported_r", "imported_g"], findFirst=False 

1653 ) 

1654 ), 

1655 expectedAllBiases, 

1656 ) 

1657 self.assertCountEqual( 

1658 list( 

1659 subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=True) 

1660 ), 

1661 expectedDeduplicatedBiases, 

1662 ) 

1663 # Materialize the dataset queries, too. 

1664 with subsetDataIds.findDatasets( 

1665 bias, collections=["imported_r", "imported_g"], findFirst=False 

1666 ).materialize() as biases: 

1667 self.assertCountEqual(list(biases), expectedAllBiases) 

1668 with subsetDataIds.findDatasets( 

1669 bias, collections=["imported_r", "imported_g"], findFirst=True 

1670 ).materialize() as biases: 

1671 self.assertCountEqual(list(biases), expectedDeduplicatedBiases) 

1672 # Materialize the original query, but none of the follow-up queries. 

1673 with dataIds.materialize() as dataIds: 

1674 self.assertEqual(dataIds.dimensions, expected_dimensions) 

1675 self.assertEqual(dataIds.toSet(), expectedDataIds) 

1676 self.assertCountEqual( 

1677 list( 

1678 dataIds.findDatasets( 

1679 flat, 

1680 collections=["imported_r"], 

1681 ) 

1682 ), 

1683 expectedFlats, 

1684 ) 

1685 subsetDataIds = dataIds.subset(expected_subset_dimensions, unique=True) 

1686 self.assertEqual(subsetDataIds.dimensions, expected_subset_dimensions) 

1687 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds) 

1688 self.assertCountEqual( 

1689 list( 

1690 subsetDataIds.findDatasets( 

1691 bias, collections=["imported_r", "imported_g"], findFirst=False 

1692 ) 

1693 ), 

1694 expectedAllBiases, 

1695 ) 

1696 self.assertCountEqual( 

1697 list( 

1698 subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=True) 

1699 ), 

1700 expectedDeduplicatedBiases, 

1701 ) 

1702 # Materialize just the bias dataset queries. 

1703 with subsetDataIds.findDatasets( 

1704 bias, collections=["imported_r", "imported_g"], findFirst=False 

1705 ).materialize() as biases: 

1706 self.assertCountEqual(list(biases), expectedAllBiases) 

1707 with subsetDataIds.findDatasets( 

1708 bias, collections=["imported_r", "imported_g"], findFirst=True 

1709 ).materialize() as biases: 

1710 self.assertCountEqual(list(biases), expectedDeduplicatedBiases) 

1711 # Materialize the subset data ID query, but not the dataset 

1712 # queries. 

1713 with subsetDataIds.materialize() as subsetDataIds: 

1714 self.assertEqual(subsetDataIds.dimensions, expected_subset_dimensions) 

1715 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds) 

1716 self.assertCountEqual( 

1717 list( 

1718 subsetDataIds.findDatasets( 

1719 bias, collections=["imported_r", "imported_g"], findFirst=False 

1720 ) 

1721 ), 

1722 expectedAllBiases, 

1723 ) 

1724 self.assertCountEqual( 

1725 list( 

1726 subsetDataIds.findDatasets( 

1727 bias, collections=["imported_r", "imported_g"], findFirst=True 

1728 ) 

1729 ), 

1730 expectedDeduplicatedBiases, 

1731 ) 

1732 # Materialize the bias dataset queries, too, so now we're 

1733 # materializing every single step. 

1734 with subsetDataIds.findDatasets( 

1735 bias, collections=["imported_r", "imported_g"], findFirst=False 

1736 ).materialize() as biases: 

1737 self.assertCountEqual(list(biases), expectedAllBiases) 

1738 with subsetDataIds.findDatasets( 

1739 bias, collections=["imported_r", "imported_g"], findFirst=True 

1740 ).materialize() as biases: 

1741 self.assertCountEqual(list(biases), expectedDeduplicatedBiases) 

1742 

1743 def testStorageClassPropagation(self): 

1744 """Test that queries for datasets respect the storage class passed in 

1745 as part of a full dataset type. 

1746 """ 

1747 registry = self.makeRegistry() 

1748 self.loadData(registry, "base.yaml") 

1749 dataset_type_in_registry = DatasetType( 

1750 "tbl", dimensions=["instrument"], storageClass="Packages", universe=registry.dimensions 

1751 ) 

1752 registry.registerDatasetType(dataset_type_in_registry) 

1753 run = "run1" 

1754 registry.registerRun(run) 

1755 (inserted_ref,) = registry.insertDatasets( 

1756 dataset_type_in_registry, [registry.expandDataId(instrument="Cam1")], run=run 

1757 ) 

1758 self.assertEqual(inserted_ref.datasetType, dataset_type_in_registry) 

1759 query_dataset_type = DatasetType( 

1760 "tbl", dimensions=["instrument"], storageClass="StructuredDataDict", universe=registry.dimensions 

1761 ) 

1762 self.assertNotEqual(dataset_type_in_registry, query_dataset_type) 

1763 query_datasets_result = registry.queryDatasets(query_dataset_type, collections=[run]) 

1764 self.assertEqual(query_datasets_result.parentDatasetType, query_dataset_type) # type: ignore 

1765 (query_datasets_ref,) = query_datasets_result 

1766 self.assertEqual(query_datasets_ref.datasetType, query_dataset_type) 

1767 query_data_ids_find_datasets_result = registry.queryDataIds(["instrument"]).findDatasets( 

1768 query_dataset_type, collections=[run] 

1769 ) 

1770 self.assertEqual(query_data_ids_find_datasets_result.parentDatasetType, query_dataset_type) 

1771 (query_data_ids_find_datasets_ref,) = query_data_ids_find_datasets_result 

1772 self.assertEqual(query_data_ids_find_datasets_ref.datasetType, query_dataset_type) 

1773 query_dataset_types_result = registry.queryDatasetTypes(query_dataset_type) 

1774 self.assertEqual(list(query_dataset_types_result), [query_dataset_type]) 

1775 find_dataset_ref = registry.findDataset(query_dataset_type, instrument="Cam1", collections=[run]) 

1776 self.assertEqual(find_dataset_ref.datasetType, query_dataset_type) 

1777 

1778 def testEmptyDimensionsQueries(self): 

1779 """Test Query and QueryResults objects in the case where there are no 

1780 dimensions. 

1781 """ 

1782 # Set up test data: one dataset type, two runs, one dataset in each. 

1783 registry = self.makeRegistry() 

1784 self.loadData(registry, "base.yaml") 

1785 schema = DatasetType("schema", dimensions=registry.dimensions.empty, storageClass="Catalog") 

1786 registry.registerDatasetType(schema) 

1787 dataId = DataCoordinate.make_empty(registry.dimensions) 

1788 run1 = "run1" 

1789 run2 = "run2" 

1790 registry.registerRun(run1) 

1791 registry.registerRun(run2) 

1792 (dataset1,) = registry.insertDatasets(schema, dataIds=[dataId], run=run1) 

1793 (dataset2,) = registry.insertDatasets(schema, dataIds=[dataId], run=run2) 

1794 # Query directly for both of the datasets, and each one, one at a time. 

1795 self.checkQueryResults( 

1796 registry.queryDatasets(schema, collections=[run1, run2], findFirst=False), [dataset1, dataset2] 

1797 ) 

1798 self.checkQueryResults( 

1799 registry.queryDatasets(schema, collections=[run1, run2], findFirst=True), 

1800 [dataset1], 

1801 ) 

1802 self.checkQueryResults( 

1803 registry.queryDatasets(schema, collections=[run2, run1], findFirst=True), 

1804 [dataset2], 

1805 ) 

1806 # Query for data IDs with no dimensions. 

1807 dataIds = registry.queryDataIds([]) 

1808 self.checkQueryResults(dataIds, [dataId]) 

1809 # Use queried data IDs to find the datasets. 

1810 self.checkQueryResults( 

1811 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False), 

1812 [dataset1, dataset2], 

1813 ) 

1814 self.checkQueryResults( 

1815 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), 

1816 [dataset1], 

1817 ) 

1818 self.checkQueryResults( 

1819 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), 

1820 [dataset2], 

1821 ) 

1822 # Now materialize the data ID query results and repeat those tests. 

1823 with dataIds.materialize() as dataIds: 

1824 self.checkQueryResults(dataIds, [dataId]) 

1825 self.checkQueryResults( 

1826 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), 

1827 [dataset1], 

1828 ) 

1829 self.checkQueryResults( 

1830 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), 

1831 [dataset2], 

1832 ) 

1833 # Query for non-empty data IDs, then subset that to get the empty one. 

1834 # Repeat the above tests starting from that. 

1835 dataIds = registry.queryDataIds(["instrument"]).subset(registry.dimensions.empty, unique=True) 

1836 self.checkQueryResults(dataIds, [dataId]) 

1837 self.checkQueryResults( 

1838 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False), 

1839 [dataset1, dataset2], 

1840 ) 

1841 self.checkQueryResults( 

1842 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), 

1843 [dataset1], 

1844 ) 

1845 self.checkQueryResults( 

1846 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), 

1847 [dataset2], 

1848 ) 

1849 with dataIds.materialize() as dataIds: 

1850 self.checkQueryResults(dataIds, [dataId]) 

1851 self.checkQueryResults( 

1852 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False), 

1853 [dataset1, dataset2], 

1854 ) 

1855 self.checkQueryResults( 

1856 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), 

1857 [dataset1], 

1858 ) 

1859 self.checkQueryResults( 

1860 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), 

1861 [dataset2], 

1862 ) 

1863 # Query for non-empty data IDs, then materialize, then subset to get 

1864 # the empty one. Repeat again. 

1865 with registry.queryDataIds(["instrument"]).materialize() as nonEmptyDataIds: 

1866 dataIds = nonEmptyDataIds.subset(registry.dimensions.empty, unique=True) 

1867 self.checkQueryResults(dataIds, [dataId]) 

1868 self.checkQueryResults( 

1869 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False), 

1870 [dataset1, dataset2], 

1871 ) 

1872 self.checkQueryResults( 

1873 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), 

1874 [dataset1], 

1875 ) 

1876 self.checkQueryResults( 

1877 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), 

1878 [dataset2], 

1879 ) 

1880 with dataIds.materialize() as dataIds: 

1881 self.checkQueryResults(dataIds, [dataId]) 

1882 self.checkQueryResults( 

1883 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False), 

1884 [dataset1, dataset2], 

1885 ) 

1886 self.checkQueryResults( 

1887 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), 

1888 [dataset1], 

1889 ) 

1890 self.checkQueryResults( 

1891 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), 

1892 [dataset2], 

1893 ) 

1894 # Repeat the materialization tests with a dimension element that isn't 

1895 # cached, so there's no way we can know when building the query where 

1896 # there are any rows are not (there aren't). 

1897 dataIds = registry.queryDataIds(["exposure"]).subset(registry.dimensions.empty, unique=True) 

1898 with dataIds.materialize() as dataIds: 

1899 self.checkQueryResults(dataIds, []) 

1900 self.checkQueryResults( 

1901 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False), [] 

1902 ) 

1903 self.checkQueryResults(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), []) 

1904 self.checkQueryResults(dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), []) 

1905 # Query for non-empty data IDs with a constraint on an empty-data-ID 

1906 # dataset that exists. 

1907 dataIds = registry.queryDataIds(["instrument"], datasets="schema", collections=...) 

1908 self.checkQueryResults( 

1909 dataIds.subset(unique=True), 

1910 [DataCoordinate.standardize(instrument="Cam1", universe=registry.dimensions)], 

1911 ) 

1912 # Again query for non-empty data IDs with a constraint on empty-data-ID 

1913 # datasets, but when the datasets don't exist. We delete the existing 

1914 # dataset and query just that collection rather than creating a new 

1915 # empty collection because this is a bit less likely for our build-time 

1916 # logic to shortcut-out (via the collection summaries), and such a 

1917 # shortcut would make this test a bit more trivial than we'd like. 

1918 registry.removeDatasets([dataset2]) 

1919 dataIds = registry.queryDataIds(["instrument"], datasets="schema", collections=run2) 

1920 self.checkQueryResults(dataIds, []) 

1921 

1922 def testDimensionDataModifications(self): 

1923 """Test that modifying dimension records via: 

1924 syncDimensionData(..., update=True) and 

1925 insertDimensionData(..., replace=True) works as expected, even in the 

1926 presence of datasets using those dimensions and spatial overlap 

1927 relationships. 

1928 """ 

1929 

1930 def _unpack_range_set(ranges: lsst.sphgeom.RangeSet) -> Iterator[int]: 

1931 """Unpack a sphgeom.RangeSet into the integers it contains.""" 

1932 for begin, end in ranges: 

1933 yield from range(begin, end) 

1934 

1935 def _range_set_hull( 

1936 ranges: lsst.sphgeom.RangeSet, 

1937 pixelization: lsst.sphgeom.HtmPixelization, 

1938 ) -> lsst.sphgeom.ConvexPolygon: 

1939 """Create a ConvexPolygon hull of the region defined by a set of 

1940 HTM pixelization index ranges. 

1941 """ 

1942 points = [] 

1943 for index in _unpack_range_set(ranges): 

1944 points.extend(pixelization.triangle(index).getVertices()) 

1945 return lsst.sphgeom.ConvexPolygon(points) 

1946 

1947 # Use HTM to set up an initial parent region (one arbitrary trixel) 

1948 # and four child regions (the trixels within the parent at the next 

1949 # level. We'll use the parent as a tract/visit region and the children 

1950 # as its patch/visit_detector regions. 

1951 registry = self.makeRegistry() 

1952 htm6 = registry.dimensions.skypix["htm"][6].pixelization 

1953 commonSkyPix = registry.dimensions.commonSkyPix.pixelization 

1954 index = 12288 

1955 child_ranges_small = lsst.sphgeom.RangeSet(index).scaled(4) 

1956 assert htm6.universe().contains(child_ranges_small) 

1957 child_regions_small = [htm6.triangle(i) for i in _unpack_range_set(child_ranges_small)] 

1958 parent_region_small = lsst.sphgeom.ConvexPolygon( 

1959 list(itertools.chain.from_iterable(c.getVertices() for c in child_regions_small)) 

1960 ) 

1961 assert all(parent_region_small.contains(c) for c in child_regions_small) 

1962 # Make a larger version of each child region, defined to be the set of 

1963 # htm6 trixels that overlap the original's bounding circle. Make a new 

1964 # parent that's the convex hull of the new children. 

1965 child_regions_large = [ 

1966 _range_set_hull(htm6.envelope(c.getBoundingCircle()), htm6) for c in child_regions_small 

1967 ] 

1968 assert all( 

1969 large.contains(small) 

1970 for large, small in zip(child_regions_large, child_regions_small, strict=True) 

1971 ) 

1972 parent_region_large = lsst.sphgeom.ConvexPolygon( 

1973 list(itertools.chain.from_iterable(c.getVertices() for c in child_regions_large)) 

1974 ) 

1975 assert all(parent_region_large.contains(c) for c in child_regions_large) 

1976 assert parent_region_large.contains(parent_region_small) 

1977 assert not parent_region_small.contains(parent_region_large) 

1978 assert not all(parent_region_small.contains(c) for c in child_regions_large) 

1979 # Find some commonSkyPix indices that overlap the large regions but not 

1980 # overlap the small regions. We use commonSkyPix here to make sure the 

1981 # real tests later involve what's in the database, not just post-query 

1982 # filtering of regions. 

1983 child_difference_indices = [] 

1984 for large, small in zip(child_regions_large, child_regions_small, strict=True): 

1985 difference = list(_unpack_range_set(commonSkyPix.envelope(large) - commonSkyPix.envelope(small))) 

1986 assert difference, "if this is empty, we can't test anything useful with these regions" 

1987 assert all( 

1988 not commonSkyPix.triangle(d).isDisjointFrom(large) 

1989 and commonSkyPix.triangle(d).isDisjointFrom(small) 

1990 for d in difference 

1991 ) 

1992 child_difference_indices.append(difference) 

1993 parent_difference_indices = list( 

1994 _unpack_range_set( 

1995 commonSkyPix.envelope(parent_region_large) - commonSkyPix.envelope(parent_region_small) 

1996 ) 

1997 ) 

1998 assert parent_difference_indices, "if this is empty, we can't test anything useful with these regions" 

1999 assert all( 

2000 ( 

2001 not commonSkyPix.triangle(d).isDisjointFrom(parent_region_large) 

2002 and commonSkyPix.triangle(d).isDisjointFrom(parent_region_small) 

2003 ) 

2004 for d in parent_difference_indices 

2005 ) 

2006 # Now that we've finally got those regions, we'll insert the large ones 

2007 # as tract/patch dimension records. 

2008 skymap_name = "testing_v1" 

2009 registry.insertDimensionData( 

2010 "skymap", 

2011 { 

2012 "name": skymap_name, 

2013 "hash": bytes([42]), 

2014 "tract_max": 1, 

2015 "patch_nx_max": 2, 

2016 "patch_ny_max": 2, 

2017 }, 

2018 ) 

2019 registry.insertDimensionData("tract", {"skymap": skymap_name, "id": 0, "region": parent_region_large}) 

2020 registry.insertDimensionData( 

2021 "patch", 

2022 *[ 

2023 {"skymap": skymap_name, "tract": 0, "id": n, "cell_x": n % 2, "cell_y": n // 2, "region": c} 

2024 for n, c in enumerate(child_regions_large) 

2025 ], 

2026 ) 

2027 # Add at dataset that uses these dimensions to make sure that modifying 

2028 # them doesn't disrupt foreign keys (need to make sure DB doesn't 

2029 # implement insert with replace=True as delete-then-insert). 

2030 dataset_type = DatasetType( 

2031 "coadd", 

2032 dimensions=["tract", "patch"], 

2033 universe=registry.dimensions, 

2034 storageClass="Exposure", 

2035 ) 

2036 registry.registerDatasetType(dataset_type) 

2037 registry.registerCollection("the_run", CollectionType.RUN) 

2038 registry.insertDatasets( 

2039 dataset_type, 

2040 [{"skymap": skymap_name, "tract": 0, "patch": 2}], 

2041 run="the_run", 

2042 ) 

2043 # Query for tracts and patches that overlap some "difference" htm9 

2044 # pixels; there should be overlaps, because the database has 

2045 # the "large" suite of regions. 

2046 self.assertEqual( 

2047 {0}, 

2048 { 

2049 data_id["tract"] 

2050 for data_id in registry.queryDataIds( 

2051 ["tract"], 

2052 skymap=skymap_name, 

2053 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]}, 

2054 ) 

2055 }, 

2056 ) 

2057 for patch_id, patch_difference_indices in enumerate(child_difference_indices): 

2058 self.assertIn( 

2059 patch_id, 

2060 { 

2061 data_id["patch"] 

2062 for data_id in registry.queryDataIds( 

2063 ["patch"], 

2064 skymap=skymap_name, 

2065 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]}, 

2066 ) 

2067 }, 

2068 ) 

2069 # Use sync to update the tract region and insert to update the regions 

2070 # of the patches, to the "small" suite. 

2071 updated = registry.syncDimensionData( 

2072 "tract", 

2073 {"skymap": skymap_name, "id": 0, "region": parent_region_small}, 

2074 update=True, 

2075 ) 

2076 self.assertEqual(updated, {"region": parent_region_large}) 

2077 registry.insertDimensionData( 

2078 "patch", 

2079 *[ 

2080 {"skymap": skymap_name, "tract": 0, "id": n, "cell_x": n % 2, "cell_y": n // 2, "region": c} 

2081 for n, c in enumerate(child_regions_small) 

2082 ], 

2083 replace=True, 

2084 ) 

2085 # Query again; there now should be no such overlaps, because the 

2086 # database has the "small" suite of regions. 

2087 self.assertFalse( 

2088 set( 

2089 registry.queryDataIds( 

2090 ["tract"], 

2091 skymap=skymap_name, 

2092 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]}, 

2093 ) 

2094 ) 

2095 ) 

2096 for patch_id, patch_difference_indices in enumerate(child_difference_indices): 

2097 self.assertNotIn( 

2098 patch_id, 

2099 { 

2100 data_id["patch"] 

2101 for data_id in registry.queryDataIds( 

2102 ["patch"], 

2103 skymap=skymap_name, 

2104 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]}, 

2105 ) 

2106 }, 

2107 ) 

2108 # Update back to the large regions and query one more time. 

2109 updated = registry.syncDimensionData( 

2110 "tract", 

2111 {"skymap": skymap_name, "id": 0, "region": parent_region_large}, 

2112 update=True, 

2113 ) 

2114 self.assertEqual(updated, {"region": parent_region_small}) 

2115 registry.insertDimensionData( 

2116 "patch", 

2117 *[ 

2118 {"skymap": skymap_name, "tract": 0, "id": n, "cell_x": n % 2, "cell_y": n // 2, "region": c} 

2119 for n, c in enumerate(child_regions_large) 

2120 ], 

2121 replace=True, 

2122 ) 

2123 self.assertEqual( 

2124 {0}, 

2125 { 

2126 data_id["tract"] 

2127 for data_id in registry.queryDataIds( 

2128 ["tract"], 

2129 skymap=skymap_name, 

2130 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]}, 

2131 ) 

2132 }, 

2133 ) 

2134 for patch_id, patch_difference_indices in enumerate(child_difference_indices): 

2135 self.assertIn( 

2136 patch_id, 

2137 { 

2138 data_id["patch"] 

2139 for data_id in registry.queryDataIds( 

2140 ["patch"], 

2141 skymap=skymap_name, 

2142 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]}, 

2143 ) 

2144 }, 

2145 ) 

2146 

2147 def testCalibrationCollections(self): 

2148 """Test operations on `~CollectionType.CALIBRATION` collections, 

2149 including `SqlRegistry.certify`, `SqlRegistry.decertify`, 

2150 `SqlRegistry.findDataset`, and 

2151 `DataCoordinateQueryResults.findRelatedDatasets`. 

2152 """ 

2153 # Setup - make a Registry, fill it with some datasets in 

2154 # non-calibration collections. 

2155 registry = self.makeRegistry() 

2156 self.loadData(registry, "base.yaml") 

2157 self.loadData(registry, "datasets.yaml") 

2158 # Set up some timestamps. 

2159 t1 = astropy.time.Time("2020-01-01T01:00:00", format="isot", scale="tai") 

2160 t2 = astropy.time.Time("2020-01-01T02:00:00", format="isot", scale="tai") 

2161 t3 = astropy.time.Time("2020-01-01T03:00:00", format="isot", scale="tai") 

2162 t4 = astropy.time.Time("2020-01-01T04:00:00", format="isot", scale="tai") 

2163 t5 = astropy.time.Time("2020-01-01T05:00:00", format="isot", scale="tai") 

2164 allTimespans = [ 

2165 Timespan(a, b) for a, b in itertools.combinations([None, t1, t2, t3, t4, t5, None], r=2) 

2166 ] 

2167 # Insert some exposure records with timespans between each sequential 

2168 # pair of those. 

2169 registry.insertDimensionData( 

2170 "day_obs", {"instrument": "Cam1", "id": 20200101, "timespan": Timespan(t1, t5)} 

2171 ) 

2172 registry.insertDimensionData( 

2173 "group", 

2174 {"instrument": "Cam1", "name": "group0"}, 

2175 {"instrument": "Cam1", "name": "group1"}, 

2176 {"instrument": "Cam1", "name": "group2"}, 

2177 {"instrument": "Cam1", "name": "group3"}, 

2178 ) 

2179 registry.insertDimensionData( 

2180 "exposure", 

2181 { 

2182 "instrument": "Cam1", 

2183 "id": 0, 

2184 "group": "group0", 

2185 "obs_id": "zero", 

2186 "physical_filter": "Cam1-G", 

2187 "day_obs": 20200101, 

2188 "timespan": Timespan(t1, t2), 

2189 }, 

2190 { 

2191 "instrument": "Cam1", 

2192 "id": 1, 

2193 "group": "group1", 

2194 "obs_id": "one", 

2195 "physical_filter": "Cam1-G", 

2196 "day_obs": 20200101, 

2197 "timespan": Timespan(t2, t3), 

2198 }, 

2199 { 

2200 "instrument": "Cam1", 

2201 "id": 2, 

2202 "group": "group2", 

2203 "obs_id": "two", 

2204 "physical_filter": "Cam1-G", 

2205 "day_obs": 20200101, 

2206 "timespan": Timespan(t3, t4), 

2207 }, 

2208 { 

2209 "instrument": "Cam1", 

2210 "id": 3, 

2211 "group": "group3", 

2212 "obs_id": "three", 

2213 "physical_filter": "Cam1-G", 

2214 "day_obs": 20200101, 

2215 "timespan": Timespan(t4, t5), 

2216 }, 

2217 ) 

2218 # Get references to some datasets. 

2219 bias2a = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g") 

2220 bias3a = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g") 

2221 bias2b = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r") 

2222 bias3b = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r") 

2223 # Register the main calibration collection we'll be working with. 

2224 collection = "Cam1/calibs/default" 

2225 registry.registerCollection(collection, type=CollectionType.CALIBRATION) 

2226 # Cannot associate into a calibration collection (no timespan). 

2227 with self.assertRaises(CollectionTypeError): 

2228 registry.associate(collection, [bias2a]) 

2229 # Certify 2a dataset with [t2, t4) validity. 

2230 registry.certify(collection, [bias2a], Timespan(begin=t2, end=t4)) 

2231 # Test that we can query for this dataset via the new collection, both 

2232 # on its own and with a RUN collection. 

2233 self.assertEqual( 

2234 set(registry.queryDatasets("bias", findFirst=False, collections=collection)), 

2235 {bias2a}, 

2236 ) 

2237 self.assertEqual( 

2238 set(registry.queryDatasets("bias", findFirst=False, collections=[collection, "imported_r"])), 

2239 { 

2240 bias2a, 

2241 bias2b, 

2242 bias3b, 

2243 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

2244 }, 

2245 ) 

2246 self.assertEqual( 

2247 set(registry.queryDataIds("detector", datasets="bias", collections=collection)), 

2248 {registry.expandDataId(instrument="Cam1", detector=2)}, 

2249 ) 

2250 self.assertEqual( 

2251 set(registry.queryDataIds("detector", datasets="bias", collections=[collection, "imported_r"])), 

2252 { 

2253 registry.expandDataId(instrument="Cam1", detector=2), 

2254 registry.expandDataId(instrument="Cam1", detector=3), 

2255 registry.expandDataId(instrument="Cam1", detector=4), 

2256 }, 

2257 ) 

2258 self.assertEqual( 

2259 set( 

2260 registry.queryDataIds(["exposure", "detector"]).findRelatedDatasets( 

2261 "bias", findFirst=True, collections=[collection] 

2262 ) 

2263 ), 

2264 { 

2265 (registry.expandDataId(instrument="Cam1", detector=2, exposure=1), bias2a), 

2266 (registry.expandDataId(instrument="Cam1", detector=2, exposure=2), bias2a), 

2267 }, 

2268 ) 

2269 self.assertEqual( 

2270 set( 

2271 registry.queryDataIds( 

2272 ["exposure", "detector"], instrument="Cam1", detector=2 

2273 ).findRelatedDatasets("bias", findFirst=True, collections=[collection, "imported_r"]) 

2274 ), 

2275 { 

2276 (registry.expandDataId(instrument="Cam1", detector=2, exposure=1), bias2a), 

2277 (registry.expandDataId(instrument="Cam1", detector=2, exposure=2), bias2a), 

2278 (registry.expandDataId(instrument="Cam1", detector=2, exposure=0), bias2b), 

2279 (registry.expandDataId(instrument="Cam1", detector=2, exposure=3), bias2b), 

2280 }, 

2281 ) 

2282 

2283 # We should not be able to certify 2b with anything overlapping that 

2284 # window. 

2285 with self.assertRaises(ConflictingDefinitionError): 

2286 registry.certify(collection, [bias2b], Timespan(begin=None, end=t3)) 

2287 with self.assertRaises(ConflictingDefinitionError): 

2288 registry.certify(collection, [bias2b], Timespan(begin=None, end=t5)) 

2289 with self.assertRaises(ConflictingDefinitionError): 

2290 registry.certify(collection, [bias2b], Timespan(begin=t1, end=t3)) 

2291 with self.assertRaises(ConflictingDefinitionError): 

2292 registry.certify(collection, [bias2b], Timespan(begin=t1, end=t5)) 

2293 with self.assertRaises(ConflictingDefinitionError): 

2294 registry.certify(collection, [bias2b], Timespan(begin=t1, end=None)) 

2295 with self.assertRaises(ConflictingDefinitionError): 

2296 registry.certify(collection, [bias2b], Timespan(begin=t2, end=t3)) 

2297 with self.assertRaises(ConflictingDefinitionError): 

2298 registry.certify(collection, [bias2b], Timespan(begin=t2, end=t5)) 

2299 with self.assertRaises(ConflictingDefinitionError): 

2300 registry.certify(collection, [bias2b], Timespan(begin=t2, end=None)) 

2301 # We should be able to certify 3a with a range overlapping that window, 

2302 # because it's for a different detector. 

2303 # We'll certify 3a over [t1, t3). 

2304 registry.certify(collection, [bias3a], Timespan(begin=t1, end=t3)) 

2305 # Now we'll certify 2b and 3b together over [t4, ∞). 

2306 registry.certify(collection, [bias2b, bias3b], Timespan(begin=t4, end=None)) 

2307 

2308 # Fetch all associations and check that they are what we expect. 

2309 self.assertCountEqual( 

2310 list( 

2311 registry.queryDatasetAssociations( 

2312 "bias", 

2313 collections=[collection, "imported_g", "imported_r"], 

2314 ) 

2315 ), 

2316 [ 

2317 DatasetAssociation( 

2318 ref=registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"), 

2319 collection="imported_g", 

2320 timespan=None, 

2321 ), 

2322 DatasetAssociation( 

2323 ref=registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

2324 collection="imported_r", 

2325 timespan=None, 

2326 ), 

2327 DatasetAssociation(ref=bias2a, collection="imported_g", timespan=None), 

2328 DatasetAssociation(ref=bias3a, collection="imported_g", timespan=None), 

2329 DatasetAssociation(ref=bias2b, collection="imported_r", timespan=None), 

2330 DatasetAssociation(ref=bias3b, collection="imported_r", timespan=None), 

2331 DatasetAssociation(ref=bias2a, collection=collection, timespan=Timespan(begin=t2, end=t4)), 

2332 DatasetAssociation(ref=bias3a, collection=collection, timespan=Timespan(begin=t1, end=t3)), 

2333 DatasetAssociation(ref=bias2b, collection=collection, timespan=Timespan(begin=t4, end=None)), 

2334 DatasetAssociation(ref=bias3b, collection=collection, timespan=Timespan(begin=t4, end=None)), 

2335 ], 

2336 ) 

2337 

2338 class Ambiguous: 

2339 """Tag class to denote lookups that should be ambiguous.""" 

2340 

2341 pass 

2342 

2343 def _assertLookup( 

2344 detector: int, timespan: Timespan, expected: DatasetRef | type[Ambiguous] | None 

2345 ) -> None: 

2346 """Local function that asserts that a bias lookup returns the given 

2347 expected result. 

2348 """ 

2349 if expected is Ambiguous: 

2350 with self.assertRaises((DatasetTypeError, LookupError)): 

2351 registry.findDataset( 

2352 "bias", 

2353 collections=collection, 

2354 instrument="Cam1", 

2355 detector=detector, 

2356 timespan=timespan, 

2357 ) 

2358 else: 

2359 self.assertEqual( 

2360 expected, 

2361 registry.findDataset( 

2362 "bias", 

2363 collections=collection, 

2364 instrument="Cam1", 

2365 detector=detector, 

2366 timespan=timespan, 

2367 ), 

2368 ) 

2369 

2370 # Systematically test lookups against expected results. 

2371 _assertLookup(detector=2, timespan=Timespan(None, t1), expected=None) 

2372 _assertLookup(detector=2, timespan=Timespan(None, t2), expected=None) 

2373 _assertLookup(detector=2, timespan=Timespan(None, t3), expected=bias2a) 

2374 _assertLookup(detector=2, timespan=Timespan(None, t4), expected=bias2a) 

2375 _assertLookup(detector=2, timespan=Timespan(None, t5), expected=Ambiguous) 

2376 _assertLookup(detector=2, timespan=Timespan(None, None), expected=Ambiguous) 

2377 _assertLookup(detector=2, timespan=Timespan(t1, t2), expected=None) 

2378 _assertLookup(detector=2, timespan=Timespan(t1, t3), expected=bias2a) 

2379 _assertLookup(detector=2, timespan=Timespan(t1, t4), expected=bias2a) 

2380 _assertLookup(detector=2, timespan=Timespan(t1, t5), expected=Ambiguous) 

2381 _assertLookup(detector=2, timespan=Timespan(t1, None), expected=Ambiguous) 

2382 _assertLookup(detector=2, timespan=Timespan(t2, t3), expected=bias2a) 

2383 _assertLookup(detector=2, timespan=Timespan(t2, t4), expected=bias2a) 

2384 _assertLookup(detector=2, timespan=Timespan(t2, t5), expected=Ambiguous) 

2385 _assertLookup(detector=2, timespan=Timespan(t2, None), expected=Ambiguous) 

2386 _assertLookup(detector=2, timespan=Timespan(t3, t4), expected=bias2a) 

2387 _assertLookup(detector=2, timespan=Timespan(t3, t5), expected=Ambiguous) 

2388 _assertLookup(detector=2, timespan=Timespan(t3, None), expected=Ambiguous) 

2389 _assertLookup(detector=2, timespan=Timespan(t4, t5), expected=bias2b) 

2390 _assertLookup(detector=2, timespan=Timespan(t4, None), expected=bias2b) 

2391 _assertLookup(detector=2, timespan=Timespan(t5, None), expected=bias2b) 

2392 _assertLookup(detector=3, timespan=Timespan(None, t1), expected=None) 

2393 _assertLookup(detector=3, timespan=Timespan(None, t2), expected=bias3a) 

2394 _assertLookup(detector=3, timespan=Timespan(None, t3), expected=bias3a) 

2395 _assertLookup(detector=3, timespan=Timespan(None, t4), expected=bias3a) 

2396 _assertLookup(detector=3, timespan=Timespan(None, t5), expected=Ambiguous) 

2397 _assertLookup(detector=3, timespan=Timespan(None, None), expected=Ambiguous) 

2398 _assertLookup(detector=3, timespan=Timespan(t1, t2), expected=bias3a) 

2399 _assertLookup(detector=3, timespan=Timespan(t1, t3), expected=bias3a) 

2400 _assertLookup(detector=3, timespan=Timespan(t1, t4), expected=bias3a) 

2401 _assertLookup(detector=3, timespan=Timespan(t1, t5), expected=Ambiguous) 

2402 _assertLookup(detector=3, timespan=Timespan(t1, None), expected=Ambiguous) 

2403 _assertLookup(detector=3, timespan=Timespan(t2, t3), expected=bias3a) 

2404 _assertLookup(detector=3, timespan=Timespan(t2, t4), expected=bias3a) 

2405 _assertLookup(detector=3, timespan=Timespan(t2, t5), expected=Ambiguous) 

2406 _assertLookup(detector=3, timespan=Timespan(t2, None), expected=Ambiguous) 

2407 _assertLookup(detector=3, timespan=Timespan(t3, t4), expected=None) 

2408 _assertLookup(detector=3, timespan=Timespan(t3, t5), expected=bias3b) 

2409 _assertLookup(detector=3, timespan=Timespan(t3, None), expected=bias3b) 

2410 _assertLookup(detector=3, timespan=Timespan(t4, t5), expected=bias3b) 

2411 _assertLookup(detector=3, timespan=Timespan(t4, None), expected=bias3b) 

2412 _assertLookup(detector=3, timespan=Timespan(t5, None), expected=bias3b) 

2413 

2414 # Test lookups via temporal joins to exposures. 

2415 self.assertEqual( 

2416 set( 

2417 registry.queryDataIds( 

2418 ["exposure", "detector"], instrument="Cam1", detector=2 

2419 ).findRelatedDatasets("bias", collections=[collection]) 

2420 ), 

2421 { 

2422 (registry.expandDataId(instrument="Cam1", exposure=1, detector=2), bias2a), 

2423 (registry.expandDataId(instrument="Cam1", exposure=2, detector=2), bias2a), 

2424 (registry.expandDataId(instrument="Cam1", exposure=3, detector=2), bias2b), 

2425 }, 

2426 ) 

2427 self.assertEqual( 

2428 set( 

2429 registry.queryDataIds( 

2430 ["exposure", "detector"], instrument="Cam1", detector=3 

2431 ).findRelatedDatasets("bias", collections=[collection]) 

2432 ), 

2433 { 

2434 (registry.expandDataId(instrument="Cam1", exposure=0, detector=3), bias3a), 

2435 (registry.expandDataId(instrument="Cam1", exposure=1, detector=3), bias3a), 

2436 (registry.expandDataId(instrument="Cam1", exposure=3, detector=3), bias3b), 

2437 }, 

2438 ) 

2439 self.assertEqual( 

2440 set( 

2441 registry.queryDataIds( 

2442 ["exposure", "detector"], instrument="Cam1", detector=2 

2443 ).findRelatedDatasets("bias", collections=[collection, "imported_g"]) 

2444 ), 

2445 { 

2446 (registry.expandDataId(instrument="Cam1", exposure=0, detector=2), bias2a), 

2447 (registry.expandDataId(instrument="Cam1", exposure=1, detector=2), bias2a), 

2448 (registry.expandDataId(instrument="Cam1", exposure=2, detector=2), bias2a), 

2449 (registry.expandDataId(instrument="Cam1", exposure=3, detector=2), bias2b), 

2450 }, 

2451 ) 

2452 self.assertEqual( 

2453 set( 

2454 registry.queryDataIds( 

2455 ["exposure", "detector"], instrument="Cam1", detector=3 

2456 ).findRelatedDatasets("bias", collections=[collection, "imported_g"]) 

2457 ), 

2458 { 

2459 (registry.expandDataId(instrument="Cam1", exposure=0, detector=3), bias3a), 

2460 (registry.expandDataId(instrument="Cam1", exposure=1, detector=3), bias3a), 

2461 (registry.expandDataId(instrument="Cam1", exposure=2, detector=3), bias3a), 

2462 (registry.expandDataId(instrument="Cam1", exposure=3, detector=3), bias3b), 

2463 }, 

2464 ) 

2465 

2466 # Decertify [t3, t5) for all data IDs, and do test lookups again. 

2467 # This should truncate bias2a to [t2, t3), leave bias3a unchanged at 

2468 # [t1, t3), and truncate bias2b and bias3b to [t5, ∞). 

2469 registry.decertify(collection=collection, datasetType="bias", timespan=Timespan(t3, t5)) 

2470 _assertLookup(detector=2, timespan=Timespan(None, t1), expected=None) 

2471 _assertLookup(detector=2, timespan=Timespan(None, t2), expected=None) 

2472 _assertLookup(detector=2, timespan=Timespan(None, t3), expected=bias2a) 

2473 _assertLookup(detector=2, timespan=Timespan(None, t4), expected=bias2a) 

2474 _assertLookup(detector=2, timespan=Timespan(None, t5), expected=bias2a) 

2475 _assertLookup(detector=2, timespan=Timespan(None, None), expected=Ambiguous) 

2476 _assertLookup(detector=2, timespan=Timespan(t1, t2), expected=None) 

2477 _assertLookup(detector=2, timespan=Timespan(t1, t3), expected=bias2a) 

2478 _assertLookup(detector=2, timespan=Timespan(t1, t4), expected=bias2a) 

2479 _assertLookup(detector=2, timespan=Timespan(t1, t5), expected=bias2a) 

2480 _assertLookup(detector=2, timespan=Timespan(t1, None), expected=Ambiguous) 

2481 _assertLookup(detector=2, timespan=Timespan(t2, t3), expected=bias2a) 

2482 _assertLookup(detector=2, timespan=Timespan(t2, t4), expected=bias2a) 

2483 _assertLookup(detector=2, timespan=Timespan(t2, t5), expected=bias2a) 

2484 _assertLookup(detector=2, timespan=Timespan(t2, None), expected=Ambiguous) 

2485 _assertLookup(detector=2, timespan=Timespan(t3, t4), expected=None) 

2486 _assertLookup(detector=2, timespan=Timespan(t3, t5), expected=None) 

2487 _assertLookup(detector=2, timespan=Timespan(t3, None), expected=bias2b) 

2488 _assertLookup(detector=2, timespan=Timespan(t4, t5), expected=None) 

2489 _assertLookup(detector=2, timespan=Timespan(t4, None), expected=bias2b) 

2490 _assertLookup(detector=2, timespan=Timespan(t5, None), expected=bias2b) 

2491 _assertLookup(detector=3, timespan=Timespan(None, t1), expected=None) 

2492 _assertLookup(detector=3, timespan=Timespan(None, t2), expected=bias3a) 

2493 _assertLookup(detector=3, timespan=Timespan(None, t3), expected=bias3a) 

2494 _assertLookup(detector=3, timespan=Timespan(None, t4), expected=bias3a) 

2495 _assertLookup(detector=3, timespan=Timespan(None, t5), expected=bias3a) 

2496 _assertLookup(detector=3, timespan=Timespan(None, None), expected=Ambiguous) 

2497 _assertLookup(detector=3, timespan=Timespan(t1, t2), expected=bias3a) 

2498 _assertLookup(detector=3, timespan=Timespan(t1, t3), expected=bias3a) 

2499 _assertLookup(detector=3, timespan=Timespan(t1, t4), expected=bias3a) 

2500 _assertLookup(detector=3, timespan=Timespan(t1, t5), expected=bias3a) 

2501 _assertLookup(detector=3, timespan=Timespan(t1, None), expected=Ambiguous) 

2502 _assertLookup(detector=3, timespan=Timespan(t2, t3), expected=bias3a) 

2503 _assertLookup(detector=3, timespan=Timespan(t2, t4), expected=bias3a) 

2504 _assertLookup(detector=3, timespan=Timespan(t2, t5), expected=bias3a) 

2505 _assertLookup(detector=3, timespan=Timespan(t2, None), expected=Ambiguous) 

2506 _assertLookup(detector=3, timespan=Timespan(t3, t4), expected=None) 

2507 _assertLookup(detector=3, timespan=Timespan(t3, t5), expected=None) 

2508 _assertLookup(detector=3, timespan=Timespan(t3, None), expected=bias3b) 

2509 _assertLookup(detector=3, timespan=Timespan(t4, t5), expected=None) 

2510 _assertLookup(detector=3, timespan=Timespan(t4, None), expected=bias3b) 

2511 _assertLookup(detector=3, timespan=Timespan(t5, None), expected=bias3b) 

2512 

2513 # Decertify everything, this time with explicit data IDs, then check 

2514 # that no lookups succeed. 

2515 registry.decertify( 

2516 collection, 

2517 "bias", 

2518 Timespan(None, None), 

2519 dataIds=[ 

2520 dict(instrument="Cam1", detector=2), 

2521 dict(instrument="Cam1", detector=3), 

2522 ], 

2523 ) 

2524 for detector in (2, 3): 

2525 for timespan in allTimespans: 

2526 _assertLookup(detector=detector, timespan=timespan, expected=None) 

2527 # Certify bias2a and bias3a over (-∞, ∞), check that all lookups return 

2528 # those. 

2529 registry.certify( 

2530 collection, 

2531 [bias2a, bias3a], 

2532 Timespan(None, None), 

2533 ) 

2534 for timespan in allTimespans: 

2535 _assertLookup(detector=2, timespan=timespan, expected=bias2a) 

2536 _assertLookup(detector=3, timespan=timespan, expected=bias3a) 

2537 # Decertify just bias2 over [t2, t4). 

2538 # This should split a single certification row into two (and leave the 

2539 # other existing row, for bias3a, alone). 

2540 registry.decertify( 

2541 collection, "bias", Timespan(t2, t4), dataIds=[dict(instrument="Cam1", detector=2)] 

2542 ) 

2543 for timespan in allTimespans: 

2544 _assertLookup(detector=3, timespan=timespan, expected=bias3a) 

2545 overlapsBefore = timespan.overlaps(Timespan(None, t2)) 

2546 overlapsAfter = timespan.overlaps(Timespan(t4, None)) 

2547 if overlapsBefore and overlapsAfter: 

2548 expected = Ambiguous 

2549 elif overlapsBefore or overlapsAfter: 

2550 expected = bias2a 

2551 else: 

2552 expected = None 

2553 _assertLookup(detector=2, timespan=timespan, expected=expected) 

2554 

2555 def testSkipCalibs(self): 

2556 """Test how queries handle skipping of calibration collections.""" 

2557 registry = self.makeRegistry() 

2558 self.loadData(registry, "base.yaml") 

2559 self.loadData(registry, "datasets.yaml") 

2560 

2561 coll_calib = "Cam1/calibs/default" 

2562 registry.registerCollection(coll_calib, type=CollectionType.CALIBRATION) 

2563 

2564 # Add all biases to the calibration collection. 

2565 # Without this, the logic that prunes dataset subqueries based on 

2566 # datasetType-collection summary information will fire before the logic 

2567 # we want to test below. This is a good thing (it avoids the dreaded 

2568 # NotImplementedError a bit more often) everywhere but here. 

2569 registry.certify(coll_calib, registry.queryDatasets("bias", collections=...), Timespan(None, None)) 

2570 

2571 coll_list = [coll_calib, "imported_g", "imported_r"] 

2572 chain = "Cam1/chain" 

2573 registry.registerCollection(chain, type=CollectionType.CHAINED) 

2574 registry.setCollectionChain(chain, coll_list) 

2575 

2576 # explicit list will raise if findFirst=True or there are temporal 

2577 # dimensions 

2578 with self.assertRaises(NotImplementedError): 

2579 registry.queryDatasets("bias", collections=coll_list, findFirst=True) 

2580 with self.assertRaises(NotImplementedError): 

2581 registry.queryDataIds( 

2582 ["instrument", "detector", "exposure"], datasets="bias", collections=coll_list 

2583 ).count() 

2584 

2585 # chain will skip 

2586 datasets = list(registry.queryDatasets("bias", collections=chain)) 

2587 self.assertGreater(len(datasets), 0) 

2588 

2589 dataIds = list(registry.queryDataIds(["instrument", "detector"], datasets="bias", collections=chain)) 

2590 self.assertGreater(len(dataIds), 0) 

2591 

2592 # glob will skip too 

2593 datasets = list(registry.queryDatasets("bias", collections="*d*")) 

2594 self.assertGreater(len(datasets), 0) 

2595 

2596 # regular expression will skip too 

2597 pattern = re.compile(".*") 

2598 datasets = list(registry.queryDatasets("bias", collections=pattern)) 

2599 self.assertGreater(len(datasets), 0) 

2600 

2601 # ellipsis should work as usual 

2602 datasets = list(registry.queryDatasets("bias", collections=...)) 

2603 self.assertGreater(len(datasets), 0) 

2604 

2605 # few tests with findFirst 

2606 datasets = list(registry.queryDatasets("bias", collections=chain, findFirst=True)) 

2607 self.assertGreater(len(datasets), 0) 

2608 

2609 def testIngestTimeQuery(self): 

2610 registry = self.makeRegistry() 

2611 self.loadData(registry, "base.yaml") 

2612 dt0 = datetime.datetime.now(datetime.UTC) 

2613 self.loadData(registry, "datasets.yaml") 

2614 dt1 = datetime.datetime.now(datetime.UTC) 

2615 

2616 datasets = list(registry.queryDatasets(..., collections=...)) 

2617 len0 = len(datasets) 

2618 self.assertGreater(len0, 0) 

2619 

2620 where = "ingest_date > T'2000-01-01'" 

2621 datasets = list(registry.queryDatasets(..., collections=..., where=where)) 

2622 len1 = len(datasets) 

2623 self.assertEqual(len0, len1) 

2624 

2625 # no one will ever use this piece of software in 30 years 

2626 where = "ingest_date > T'2050-01-01'" 

2627 datasets = list(registry.queryDatasets(..., collections=..., where=where)) 

2628 len2 = len(datasets) 

2629 self.assertEqual(len2, 0) 

2630 

2631 # Check more exact timing to make sure there is no 37 seconds offset 

2632 # (after fixing DM-30124). SQLite time precision is 1 second, make 

2633 # sure that we don't test with higher precision. 

2634 tests = [ 

2635 # format: (timestamp, operator, expected_len) 

2636 (dt0 - timedelta(seconds=1), ">", len0), 

2637 (dt0 - timedelta(seconds=1), "<", 0), 

2638 (dt1 + timedelta(seconds=1), "<", len0), 

2639 (dt1 + timedelta(seconds=1), ">", 0), 

2640 ] 

2641 for dt, op, expect_len in tests: 

2642 dt_str = dt.isoformat(sep=" ") 

2643 

2644 where = f"ingest_date {op} T'{dt_str}'" 

2645 datasets = list(registry.queryDatasets(..., collections=..., where=where)) 

2646 self.assertEqual(len(datasets), expect_len) 

2647 

2648 # same with bind using datetime or astropy Time 

2649 where = f"ingest_date {op} ingest_time" 

2650 datasets = list( 

2651 registry.queryDatasets(..., collections=..., where=where, bind={"ingest_time": dt}) 

2652 ) 

2653 self.assertEqual(len(datasets), expect_len) 

2654 

2655 dt_astropy = astropy.time.Time(dt, format="datetime") 

2656 datasets = list( 

2657 registry.queryDatasets(..., collections=..., where=where, bind={"ingest_time": dt_astropy}) 

2658 ) 

2659 self.assertEqual(len(datasets), expect_len) 

2660 

2661 def testTimespanQueries(self): 

2662 """Test query expressions involving timespans.""" 

2663 registry = self.makeRegistry() 

2664 self.loadData(registry, "hsc-rc2-subset.yaml") 

2665 # All exposures in the database; mapping from ID to timespan. 

2666 visits = {record.id: record.timespan for record in registry.queryDimensionRecords("visit")} 

2667 # Just those IDs, sorted (which is also temporal sorting, because HSC 

2668 # exposure IDs are monotonically increasing). 

2669 ids = sorted(visits.keys()) 

2670 self.assertGreater(len(ids), 20) 

2671 # Pick some quasi-random indexes into `ids` to play with. 

2672 i1 = int(len(ids) * 0.1) 

2673 i2 = int(len(ids) * 0.3) 

2674 i3 = int(len(ids) * 0.6) 

2675 i4 = int(len(ids) * 0.8) 

2676 # Extract some times from those: just before the beginning of i1 (which 

2677 # should be after the end of the exposure before), exactly the 

2678 # beginning of i2, just after the beginning of i3 (and before its end), 

2679 # and the exact end of i4. 

2680 t1 = visits[ids[i1]].begin - astropy.time.TimeDelta(1.0, format="sec") 

2681 self.assertGreater(t1, visits[ids[i1 - 1]].end) 

2682 t2 = visits[ids[i2]].begin 

2683 t3 = visits[ids[i3]].begin + astropy.time.TimeDelta(1.0, format="sec") 

2684 self.assertLess(t3, visits[ids[i3]].end) 

2685 t4 = visits[ids[i4]].end 

2686 # Make sure those are actually in order. 

2687 self.assertEqual([t1, t2, t3, t4], sorted([t4, t3, t2, t1])) 

2688 

2689 bind = { 

2690 "t1": t1, 

2691 "t2": t2, 

2692 "t3": t3, 

2693 "t4": t4, 

2694 "ts23": Timespan(t2, t3), 

2695 } 

2696 

2697 def query(where): 

2698 """Return results as a sorted, deduplicated list of visit IDs. 

2699 

2700 Parameters 

2701 ---------- 

2702 where : `str` 

2703 The WHERE clause for the query. 

2704 """ 

2705 return sorted( 

2706 { 

2707 dataId["visit"] 

2708 for dataId in registry.queryDataIds("visit", instrument="HSC", bind=bind, where=where) 

2709 } 

2710 ) 

2711 

2712 # Try a bunch of timespan queries, mixing up the bounds themselves, 

2713 # where they appear in the expression, and how we get the timespan into 

2714 # the expression. 

2715 

2716 # t1 is before the start of i1, so this should not include i1. 

2717 self.assertEqual(ids[:i1], query("visit.timespan OVERLAPS (null, t1)")) 

2718 # t2 is exactly at the start of i2, but ends are exclusive, so these 

2719 # should not include i2. 

2720 self.assertEqual(ids[i1:i2], query("(t1, t2) OVERLAPS visit.timespan")) 

2721 self.assertEqual(ids[:i2], query("visit.timespan < (t2, t4)")) 

2722 # t3 is in the middle of i3, so this should include i3. 

2723 self.assertEqual(ids[i2 : i3 + 1], query("visit.timespan OVERLAPS ts23")) 

2724 # This one should not include t3 by the same reasoning. 

2725 self.assertEqual(ids[i3 + 1 :], query("visit.timespan > (t1, t3)")) 

2726 # t4 is exactly at the end of i4, so this should include i4. 

2727 self.assertEqual(ids[i3 : i4 + 1], query(f"visit.timespan OVERLAPS (T'{t3.tai.isot}', t4)")) 

2728 # i4's upper bound of t4 is exclusive so this should not include t4. 

2729 self.assertEqual(ids[i4 + 1 :], query("visit.timespan OVERLAPS (t4, NULL)")) 

2730 

2731 # Now some timespan vs. time scalar queries. 

2732 self.assertEqual(ids[:i2], query("visit.timespan < t2")) 

2733 self.assertEqual(ids[:i2], query("t2 > visit.timespan")) 

2734 self.assertEqual(ids[i3 + 1 :], query("visit.timespan > t3")) 

2735 self.assertEqual(ids[i3 + 1 :], query("t3 < visit.timespan")) 

2736 self.assertEqual(ids[i3 : i3 + 1], query("visit.timespan OVERLAPS t3")) 

2737 self.assertEqual(ids[i3 : i3 + 1], query(f"T'{t3.tai.isot}' OVERLAPS visit.timespan")) 

2738 

2739 # Empty timespans should not overlap anything. 

2740 self.assertEqual([], query("visit.timespan OVERLAPS (t3, t2)")) 

2741 

2742 def testCollectionSummaries(self): 

2743 """Test recording and retrieval of collection summaries.""" 

2744 self.maxDiff = None 

2745 registry = self.makeRegistry() 

2746 # Importing datasets from yaml should go through the code path where 

2747 # we update collection summaries as we insert datasets. 

2748 self.loadData(registry, "base.yaml") 

2749 self.loadData(registry, "datasets.yaml") 

2750 flat = registry.getDatasetType("flat") 

2751 expected1 = CollectionSummary() 

2752 expected1.dataset_types.add(registry.getDatasetType("bias")) 

2753 expected1.add_data_ids( 

2754 flat, [DataCoordinate.standardize(instrument="Cam1", universe=registry.dimensions)] 

2755 ) 

2756 self.assertEqual(registry.getCollectionSummary("imported_g"), expected1) 

2757 self.assertEqual(registry.getCollectionSummary("imported_r"), expected1) 

2758 # Create a chained collection with both of the imported runs; the 

2759 # summary should be the same, because it's a union with itself. 

2760 chain = "chain" 

2761 registry.registerCollection(chain, CollectionType.CHAINED) 

2762 registry.setCollectionChain(chain, ["imported_r", "imported_g"]) 

2763 self.assertEqual(registry.getCollectionSummary(chain), expected1) 

2764 # Associate flats only into a tagged collection and a calibration 

2765 # collection to check summaries of those. 

2766 tag = "tag" 

2767 registry.registerCollection(tag, CollectionType.TAGGED) 

2768 registry.associate(tag, registry.queryDatasets(flat, collections="imported_g")) 

2769 calibs = "calibs" 

2770 registry.registerCollection(calibs, CollectionType.CALIBRATION) 

2771 registry.certify( 

2772 calibs, registry.queryDatasets(flat, collections="imported_g"), timespan=Timespan(None, None) 

2773 ) 

2774 expected2 = expected1.copy() 

2775 expected2.dataset_types.discard("bias") 

2776 self.assertEqual(registry.getCollectionSummary(tag), expected2) 

2777 self.assertEqual(registry.getCollectionSummary(calibs), expected2) 

2778 # Explicitly calling SqlRegistry.refresh() should load those same 

2779 # summaries, via a totally different code path. 

2780 registry.refresh() 

2781 self.assertEqual(registry.getCollectionSummary("imported_g"), expected1) 

2782 self.assertEqual(registry.getCollectionSummary("imported_r"), expected1) 

2783 self.assertEqual(registry.getCollectionSummary(tag), expected2) 

2784 self.assertEqual(registry.getCollectionSummary(calibs), expected2) 

2785 

2786 def testBindInQueryDatasets(self): 

2787 """Test that the bind parameter is correctly forwarded in 

2788 queryDatasets recursion. 

2789 """ 

2790 registry = self.makeRegistry() 

2791 # Importing datasets from yaml should go through the code path where 

2792 # we update collection summaries as we insert datasets. 

2793 self.loadData(registry, "base.yaml") 

2794 self.loadData(registry, "datasets.yaml") 

2795 self.assertEqual( 

2796 set(registry.queryDatasets("flat", band="r", collections=...)), 

2797 set(registry.queryDatasets("flat", where="band=my_band", bind={"my_band": "r"}, collections=...)), 

2798 ) 

2799 

2800 def testQueryIntRangeExpressions(self): 

2801 """Test integer range expressions in ``where`` arguments. 

2802 

2803 Note that our expressions use inclusive stop values, unlike Python's. 

2804 """ 

2805 registry = self.makeRegistry() 

2806 self.loadData(registry, "base.yaml") 

2807 self.assertEqual( 

2808 set(registry.queryDataIds(["detector"], instrument="Cam1", where="detector IN (1..2)")), 

2809 {registry.expandDataId(instrument="Cam1", detector=n) for n in [1, 2]}, 

2810 ) 

2811 self.assertEqual( 

2812 set(registry.queryDataIds(["detector"], instrument="Cam1", where="detector IN (1..4:2)")), 

2813 {registry.expandDataId(instrument="Cam1", detector=n) for n in [1, 3]}, 

2814 ) 

2815 self.assertEqual( 

2816 set(registry.queryDataIds(["detector"], instrument="Cam1", where="detector IN (2..4:2)")), 

2817 {registry.expandDataId(instrument="Cam1", detector=n) for n in [2, 4]}, 

2818 ) 

2819 

2820 def testQueryResultSummaries(self): 

2821 """Test summary methods like `count`, `any`, and `explain_no_results` 

2822 on `DataCoordinateQueryResults` and `DatasetQueryResults`. 

2823 """ 

2824 registry = self.makeRegistry() 

2825 self.loadData(registry, "base.yaml") 

2826 self.loadData(registry, "datasets.yaml") 

2827 self.loadData(registry, "spatial.yaml") 

2828 # Default test dataset has two collections, each with both flats and 

2829 # biases. Add a new collection with only biases. 

2830 registry.registerCollection("biases", CollectionType.TAGGED) 

2831 registry.associate("biases", registry.queryDatasets("bias", collections=["imported_g"])) 

2832 # First query yields two results, and involves no postprocessing. 

2833 query1 = registry.queryDataIds(["physical_filter"], band="r") 

2834 self.assertTrue(query1.any(execute=False, exact=False)) 

2835 self.assertTrue(query1.any(execute=True, exact=False)) 

2836 self.assertTrue(query1.any(execute=True, exact=True)) 

2837 self.assertEqual(query1.count(exact=False), 2) 

2838 self.assertEqual(query1.count(exact=True), 2) 

2839 self.assertFalse(list(query1.explain_no_results())) 

2840 # Second query should yield no results, which we should see when 

2841 # we attempt to expand the data ID. 

2842 query2 = registry.queryDataIds(["physical_filter"], band="h") 

2843 # There's no execute=False, exact=Fals test here because the behavior 

2844 # not something we want to guarantee in this case (and exact=False 

2845 # says either answer is legal). 

2846 self.assertFalse(query2.any(execute=True, exact=False)) 

2847 self.assertFalse(query2.any(execute=True, exact=True)) 

2848 self.assertEqual(query2.count(exact=False), 0) 

2849 self.assertEqual(query2.count(exact=True), 0) 

2850 self.assertTrue(list(query2.explain_no_results())) 

2851 # These queries yield no results due to various problems that can be 

2852 # spotted prior to execution, yielding helpful diagnostics. 

2853 base_query = registry.queryDataIds(["detector", "physical_filter"]) 

2854 queries_and_snippets = [ 

2855 ( 

2856 # Dataset type name doesn't match any existing dataset types. 

2857 registry.queryDatasets("nonexistent", collections=...), 

2858 ["nonexistent"], 

2859 ), 

2860 ( 

2861 # Dataset type object isn't registered. 

2862 registry.queryDatasets( 

2863 DatasetType( 

2864 "nonexistent", 

2865 dimensions=["instrument"], 

2866 universe=registry.dimensions, 

2867 storageClass="Image", 

2868 ), 

2869 collections=..., 

2870 ), 

2871 ["nonexistent"], 

2872 ), 

2873 ( 

2874 # No datasets of this type in this collection. 

2875 registry.queryDatasets("flat", collections=["biases"]), 

2876 ["flat", "biases"], 

2877 ), 

2878 ( 

2879 # No datasets of this type in this collection. 

2880 base_query.findDatasets("flat", collections=["biases"]), 

2881 ["flat", "biases"], 

2882 ), 

2883 ( 

2884 # No collections matching at all. 

2885 registry.queryDatasets("flat", collections=re.compile("potato.+")), 

2886 ["potato"], 

2887 ), 

2888 ] 

2889 with self.assertRaises(MissingDatasetTypeError): 

2890 # Dataset type name doesn't match any existing dataset types. 

2891 registry.queryDataIds(["detector"], datasets=["nonexistent"], collections=...) 

2892 with self.assertRaises(MissingDatasetTypeError): 

2893 # Dataset type name doesn't match any existing dataset types. 

2894 registry.queryDimensionRecords("detector", datasets=["nonexistent"], collections=...).any() 

2895 with self.assertRaises(DatasetTypeExpressionError): 

2896 # queryDimensionRecords does not allow dataset type wildcards. 

2897 registry.queryDimensionRecords("detector", datasets=["f*"], collections=...).any() 

2898 for query, snippets in queries_and_snippets: 

2899 self.assertFalse(query.any(execute=False, exact=False)) 

2900 self.assertFalse(query.any(execute=True, exact=False)) 

2901 self.assertFalse(query.any(execute=True, exact=True)) 

2902 self.assertEqual(query.count(exact=False), 0) 

2903 self.assertEqual(query.count(exact=True), 0) 

2904 messages = list(query.explain_no_results()) 

2905 self.assertTrue(messages) 

2906 # Want all expected snippets to appear in at least one message. 

2907 self.assertTrue( 

2908 any( 

2909 all(snippet in message for snippet in snippets) for message in query.explain_no_results() 

2910 ), 

2911 messages, 

2912 ) 

2913 

2914 # Wildcards on dataset types are not permitted in queryDataIds. 

2915 with self.assertRaises(DatasetTypeExpressionError): 

2916 registry.queryDataIds(["detector"], datasets=re.compile("^nonexistent$"), collections=...) 

2917 

2918 # These queries yield no results due to problems that can be identified 

2919 # by cheap follow-up queries, yielding helpful diagnostics. 

2920 if self.supportsDetailedQueryExplain: 

2921 for query, snippets in [ 

2922 ( 

2923 # No records for one of the involved dimensions. 

2924 registry.queryDataIds(["subfilter"]), 

2925 ["no rows", "subfilter"], 

2926 ), 

2927 ( 

2928 # No records for one of the involved dimensions. 

2929 registry.queryDimensionRecords("subfilter"), 

2930 ["no rows", "subfilter"], 

2931 ), 

2932 ]: 

2933 self.assertFalse(query.any(execute=True, exact=False)) 

2934 self.assertFalse(query.any(execute=True, exact=True)) 

2935 self.assertEqual(query.count(exact=True), 0) 

2936 messages = list(query.explain_no_results()) 

2937 self.assertTrue(messages) 

2938 # Want all expected snippets to appear in at least one message. 

2939 self.assertTrue( 

2940 any( 

2941 all(snippet in message for snippet in snippets) 

2942 for message in query.explain_no_results() 

2943 ), 

2944 messages, 

2945 ) 

2946 

2947 # This query yields four overlaps in the database, but one is filtered 

2948 # out in postprocessing. The count queries aren't accurate because 

2949 # they don't account for duplication that happens due to an internal 

2950 # join against commonSkyPix. 

2951 query3 = registry.queryDataIds(["visit", "tract"], instrument="Cam1", skymap="SkyMap1") 

2952 self.assertEqual( 

2953 { 

2954 DataCoordinate.standardize( 

2955 instrument="Cam1", 

2956 skymap="SkyMap1", 

2957 visit=v, 

2958 tract=t, 

2959 universe=registry.dimensions, 

2960 ) 

2961 for v, t in [(1, 0), (2, 0), (2, 1)] 

2962 }, 

2963 set(query3), 

2964 ) 

2965 self.assertTrue(query3.any(execute=False, exact=False)) 

2966 self.assertTrue(query3.any(execute=True, exact=False)) 

2967 self.assertTrue(query3.any(execute=True, exact=True)) 

2968 self.assertGreaterEqual(query3.count(exact=False), 4) 

2969 self.assertGreaterEqual(query3.count(exact=True, discard=True), 3) 

2970 self.assertFalse(list(query3.explain_no_results())) 

2971 # This query yields overlaps in the database, but all are filtered 

2972 # out in postprocessing. The count queries again aren't very useful. 

2973 # We have to use `where=` here to avoid an optimization that 

2974 # (currently) skips the spatial postprocess-filtering because it 

2975 # recognizes that no spatial join is necessary. That's not ideal, but 

2976 # fixing it is out of scope for this ticket. 

2977 query4 = registry.queryDataIds( 

2978 ["visit", "tract"], 

2979 instrument="Cam1", 

2980 skymap="SkyMap1", 

2981 where="visit=1 AND detector=1 AND tract=0 AND patch=4", 

2982 ) 

2983 self.assertFalse(set(query4)) 

2984 self.assertTrue(query4.any(execute=False, exact=False)) 

2985 self.assertTrue(query4.any(execute=True, exact=False)) 

2986 self.assertFalse(query4.any(execute=True, exact=True)) 

2987 self.assertGreaterEqual(query4.count(exact=False), 1) 

2988 self.assertEqual(query4.count(exact=True, discard=True), 0) 

2989 messages = query4.explain_no_results() 

2990 self.assertTrue(messages) 

2991 self.assertTrue(any("overlap" in message for message in messages)) 

2992 # This query should yield results from one dataset type but not the 

2993 # other, which is not registered. 

2994 query5 = registry.queryDatasets(["bias", "nonexistent"], collections=["biases"]) 

2995 self.assertTrue(set(query5)) 

2996 self.assertTrue(query5.any(execute=False, exact=False)) 

2997 self.assertTrue(query5.any(execute=True, exact=False)) 

2998 self.assertTrue(query5.any(execute=True, exact=True)) 

2999 self.assertGreaterEqual(query5.count(exact=False), 1) 

3000 self.assertGreaterEqual(query5.count(exact=True), 1) 

3001 self.assertFalse(list(query5.explain_no_results())) 

3002 # This query applies a selection that yields no results, fully in the 

3003 # database. Explaining why it fails involves traversing the relation 

3004 # tree and running a LIMIT 1 query at each level that has the potential 

3005 # to remove rows. 

3006 query6 = registry.queryDimensionRecords( 

3007 "detector", where="detector.purpose = 'no-purpose'", instrument="Cam1" 

3008 ) 

3009 self.assertEqual(query6.count(exact=True), 0) 

3010 self.assertFalse(query6.any()) 

3011 if self.supportsDetailedQueryExplain: 

3012 messages = query6.explain_no_results() 

3013 self.assertTrue(messages) 

3014 self.assertTrue(any("no-purpose" in message for message in messages)) 

3015 

3016 def testQueryDataIdsExpressionError(self): 

3017 """Test error checking of 'where' expressions in queryDataIds.""" 

3018 registry = self.makeRegistry() 

3019 self.loadData(registry, "base.yaml") 

3020 bind = {"time": astropy.time.Time("2020-01-01T01:00:00", format="isot", scale="tai")} 

3021 with self.assertRaisesRegex(LookupError, r"No dimension element with name 'foo' in 'foo\.bar'\."): 

3022 registry.queryDataIds(["detector"], where="foo.bar = 12") 

3023 with self.assertRaisesRegex( 

3024 LookupError, "Dimension element name cannot be inferred in this context." 

3025 ): 

3026 registry.queryDataIds(["detector"], where="timespan.end < time", bind=bind) 

3027 

3028 def testQueryDataIdsOrderBy(self): 

3029 """Test order_by and limit on result returned by queryDataIds().""" 

3030 registry = self.makeRegistry() 

3031 self.loadData(registry, "base.yaml") 

3032 self.loadData(registry, "datasets.yaml") 

3033 self.loadData(registry, "spatial.yaml") 

3034 

3035 def do_query(dimensions=("visit", "tract"), datasets=None, collections=None): 

3036 return registry.queryDataIds( 

3037 dimensions, datasets=datasets, collections=collections, instrument="Cam1", skymap="SkyMap1" 

3038 ) 

3039 

3040 Test = namedtuple( 

3041 "testQueryDataIdsOrderByTest", 

3042 ("order_by", "keys", "result", "limit", "datasets", "collections"), 

3043 defaults=(None, None, None), 

3044 ) 

3045 

3046 test_data = ( 

3047 Test("tract,visit", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))), 

3048 Test("-tract,visit", "tract,visit", ((1, 2), (1, 2), (0, 1), (0, 1), (0, 2), (0, 2))), 

3049 Test("tract,-visit", "tract,visit", ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2))), 

3050 Test("-tract,-visit", "tract,visit", ((1, 2), (1, 2), (0, 2), (0, 2), (0, 1), (0, 1))), 

3051 Test( 

3052 "tract.id,visit.id", 

3053 "tract,visit", 

3054 ((0, 1), (0, 1), (0, 2)), 

3055 limit=(3,), 

3056 ), 

3057 Test("-tract,-visit", "tract,visit", ((1, 2), (1, 2), (0, 2)), limit=(3,)), 

3058 Test("tract,visit", "tract,visit", ((0, 2), (1, 2), (1, 2)), limit=(3, 3)), 

3059 Test("-tract,-visit", "tract,visit", ((0, 1),), limit=(3, 5)), 

3060 Test( 

3061 "tract,visit.exposure_time", "tract,visit", ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2)) 

3062 ), 

3063 Test( 

3064 "-tract,-visit.exposure_time", "tract,visit", ((1, 2), (1, 2), (0, 1), (0, 1), (0, 2), (0, 2)) 

3065 ), 

3066 Test("tract,-exposure_time", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))), 

3067 Test("tract,visit.name", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))), 

3068 Test( 

3069 "tract,-visit.timespan.begin,visit.timespan.end", 

3070 "tract,visit", 

3071 ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2)), 

3072 ), 

3073 Test("visit.day_obs,exposure.day_obs", "visit,exposure", ()), 

3074 Test("visit.timespan.begin,-exposure.timespan.begin", "visit,exposure", ()), 

3075 Test( 

3076 "tract,detector", 

3077 "tract,detector", 

3078 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)), 

3079 datasets="flat", 

3080 collections="imported_r", 

3081 ), 

3082 Test( 

3083 "tract,detector.full_name", 

3084 "tract,detector", 

3085 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)), 

3086 datasets="flat", 

3087 collections="imported_r", 

3088 ), 

3089 Test( 

3090 "tract,detector.raft,detector.name_in_raft", 

3091 "tract,detector", 

3092 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)), 

3093 datasets="flat", 

3094 collections="imported_r", 

3095 ), 

3096 ) 

3097 

3098 for test in test_data: 

3099 order_by = test.order_by.split(",") 

3100 keys = test.keys.split(",") 

3101 query = do_query(keys, test.datasets, test.collections).order_by(*order_by) 

3102 if test.limit is not None: 

3103 query = query.limit(*test.limit) 

3104 dataIds = tuple(tuple(dataId[k] for k in keys) for dataId in query) 

3105 self.assertEqual(dataIds, test.result) 

3106 

3107 # and materialize 

3108 query = do_query(keys).order_by(*order_by) 

3109 if test.limit is not None: 

3110 query = query.limit(*test.limit) 

3111 with self.assertRaises(RelationalAlgebraError): 

3112 with query.materialize(): 

3113 pass 

3114 

3115 # errors in a name 

3116 for order_by in ("", "-"): 

3117 with self.assertRaisesRegex(ValueError, "Empty dimension name in ORDER BY"): 

3118 list(do_query().order_by(order_by)) 

3119 

3120 for order_by in ("undimension.name", "-undimension.name"): 

3121 with self.assertRaisesRegex(ValueError, "Unknown dimension element 'undimension'"): 

3122 list(do_query().order_by(order_by)) 

3123 

3124 for order_by in ("attract", "-attract"): 

3125 with self.assertRaisesRegex(ValueError, "Metadata 'attract' cannot be found in any dimension"): 

3126 list(do_query().order_by(order_by)) 

3127 

3128 with self.assertRaisesRegex(ValueError, "Metadata 'exposure_time' exists in more than one dimension"): 

3129 list(do_query(("exposure", "visit")).order_by("exposure_time")) 

3130 

3131 with self.assertRaisesRegex( 

3132 ValueError, 

3133 r"Timespan exists in more than one dimension element \(day_obs, exposure, visit\); " 

3134 r"qualify timespan with specific dimension name\.", 

3135 ): 

3136 list(do_query(("exposure", "visit")).order_by("timespan.begin")) 

3137 

3138 with self.assertRaisesRegex( 

3139 ValueError, "Cannot find any temporal dimension element for 'timespan.begin'" 

3140 ): 

3141 list(do_query("tract").order_by("timespan.begin")) 

3142 

3143 with self.assertRaisesRegex(ValueError, "Cannot use 'timespan.begin' with non-temporal element"): 

3144 list(do_query("tract").order_by("tract.timespan.begin")) 

3145 

3146 with self.assertRaisesRegex(ValueError, "Field 'name' does not exist in 'tract'."): 

3147 list(do_query("tract").order_by("tract.name")) 

3148 

3149 with self.assertRaisesRegex( 

3150 ValueError, r"Unknown dimension element 'timestamp'; perhaps you meant 'timespan.begin'\?" 

3151 ): 

3152 list(do_query("visit").order_by("timestamp.begin")) 

3153 

3154 def testQueryDataIdsGovernorExceptions(self): 

3155 """Test exceptions raised by queryDataIds() for incorrect governors.""" 

3156 registry = self.makeRegistry() 

3157 self.loadData(registry, "base.yaml") 

3158 self.loadData(registry, "datasets.yaml") 

3159 self.loadData(registry, "spatial.yaml") 

3160 

3161 def do_query(dimensions, dataId=None, where="", bind=None, **kwargs): 

3162 return registry.queryDataIds(dimensions, dataId=dataId, where=where, bind=bind, **kwargs) 

3163 

3164 Test = namedtuple( 

3165 "testQueryDataIdExceptionsTest", 

3166 ("dimensions", "dataId", "where", "bind", "kwargs", "exception", "count"), 

3167 defaults=(None, None, None, {}, None, 0), 

3168 ) 

3169 

3170 test_data = ( 

3171 Test("tract,visit", count=6), 

3172 Test("tract,visit", kwargs={"instrument": "Cam1", "skymap": "SkyMap1"}, count=6), 

3173 Test( 

3174 "tract,visit", kwargs={"instrument": "Cam2", "skymap": "SkyMap1"}, exception=DataIdValueError 

3175 ), 

3176 Test("tract,visit", dataId={"instrument": "Cam1", "skymap": "SkyMap1"}, count=6), 

3177 Test( 

3178 "tract,visit", dataId={"instrument": "Cam1", "skymap": "SkyMap2"}, exception=DataIdValueError 

3179 ), 

3180 Test("tract,visit", where="instrument='Cam1' AND skymap='SkyMap1'", count=6), 

3181 Test("tract,visit", where="instrument='Cam1' AND skymap='SkyMap5'", exception=DataIdValueError), 

3182 Test( 

3183 "tract,visit", 

3184 where="instrument=cam AND skymap=map", 

3185 bind={"cam": "Cam1", "map": "SkyMap1"}, 

3186 count=6, 

3187 ), 

3188 Test( 

3189 "tract,visit", 

3190 where="instrument=cam AND skymap=map", 

3191 bind={"cam": "Cam", "map": "SkyMap"}, 

3192 exception=DataIdValueError, 

3193 ), 

3194 ) 

3195 

3196 for test in test_data: 

3197 dimensions = test.dimensions.split(",") 

3198 if test.exception: 

3199 with self.assertRaises(test.exception): 

3200 do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs).count() 

3201 else: 

3202 query = do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs) 

3203 self.assertEqual(query.count(discard=True), test.count) 

3204 

3205 # and materialize 

3206 if test.exception: 

3207 with self.assertRaises(test.exception): 

3208 query = do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs) 

3209 with query.materialize() as materialized: 

3210 materialized.count(discard=True) 

3211 else: 

3212 query = do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs) 

3213 with query.materialize() as materialized: 

3214 self.assertEqual(materialized.count(discard=True), test.count) 

3215 

3216 def testQueryDimensionRecordsOrderBy(self): 

3217 """Test order_by and limit on result returned by 

3218 queryDimensionRecords(). 

3219 """ 

3220 registry = self.makeRegistry() 

3221 self.loadData(registry, "base.yaml") 

3222 self.loadData(registry, "datasets.yaml") 

3223 self.loadData(registry, "spatial.yaml") 

3224 

3225 def do_query(element, datasets=None, collections=None): 

3226 return registry.queryDimensionRecords( 

3227 element, instrument="Cam1", datasets=datasets, collections=collections 

3228 ) 

3229 

3230 query = do_query("detector") 

3231 self.assertEqual(len(list(query)), 4) 

3232 

3233 Test = namedtuple( 

3234 "testQueryDataIdsOrderByTest", 

3235 ("element", "order_by", "result", "limit", "datasets", "collections"), 

3236 defaults=(None, None, None), 

3237 ) 

3238 

3239 test_data = [ 

3240 Test("detector", "detector", (1, 2, 3, 4)), 

3241 Test("detector", "-detector", (4, 3, 2, 1)), 

3242 Test("detector", "raft,-name_in_raft", (2, 1, 4, 3)), 

3243 Test("detector", "-detector.purpose", (4,), limit=(1,)), 

3244 Test("visit", "visit", (1, 2)), 

3245 Test("visit", "-visit.id", (2, 1)), 

3246 Test("visit", "zenith_angle", (1, 2)), 

3247 Test("visit", "-visit.name", (2, 1)), 

3248 Test("visit", "day_obs,-visit.timespan.begin", (2, 1)), 

3249 ] 

3250 if self.supportsQueryOffset: 

3251 test_data.append(Test("detector", "-purpose,detector.raft,name_in_raft", (2, 3), limit=(2, 2))) 

3252 

3253 for test in test_data: 

3254 order_by = test.order_by.split(",") 

3255 query = do_query(test.element).order_by(*order_by) 

3256 if test.limit is not None: 

3257 query = query.limit(*test.limit) 

3258 dataIds = tuple(rec.id for rec in query) 

3259 self.assertEqual(dataIds, test.result) 

3260 

3261 # errors in a name 

3262 for order_by in ("", "-"): 

3263 with self.assertRaisesRegex( 

3264 (ValueError, InvalidQueryError), 

3265 "(Empty dimension name in ORDER BY)|(Unrecognized identifier)", 

3266 ): 

3267 list(do_query("detector").order_by(order_by)) 

3268 

3269 for order_by in ("undimension.name", "-undimension.name"): 

3270 with self.assertRaisesRegex( 

3271 (ValueError, InvalidQueryError), 

3272 "(Element name mismatch: 'undimension')|(Unrecognized identifier)", 

3273 ): 

3274 list(do_query("detector").order_by(order_by)) 

3275 

3276 for order_by in ("attract", "-attract"): 

3277 with self.assertRaisesRegex( 

3278 (ValueError, InvalidQueryError), 

3279 "(Field 'attract' does not exist in 'detector'.)|(Unrecognized identifier)", 

3280 ): 

3281 list(do_query("detector").order_by(order_by)) 

3282 

3283 for order_by in ("timestamp.begin", "-timestamp.begin"): 

3284 with self.assertRaisesRegex( 

3285 (ValueError, InvalidQueryError), 

3286 r"(Element name mismatch: 'timestamp' instead of 'visit'; " 

3287 r"perhaps you meant 'timespan.begin'\?)" 

3288 r"|(Unrecognized identifier)", 

3289 ): 

3290 list(do_query("visit").order_by(order_by)) 

3291 

3292 def testQueryDimensionRecordsExceptions(self): 

3293 """Test exceptions raised by queryDimensionRecords().""" 

3294 registry = self.makeRegistry() 

3295 self.loadData(registry, "base.yaml") 

3296 self.loadData(registry, "datasets.yaml") 

3297 self.loadData(registry, "spatial.yaml") 

3298 

3299 result = registry.queryDimensionRecords("detector") 

3300 self.assertEqual(result.count(), 4) 

3301 result = registry.queryDimensionRecords("detector", instrument="Cam1") 

3302 self.assertEqual(result.count(), 4) 

3303 result = registry.queryDimensionRecords("detector", dataId={"instrument": "Cam1"}) 

3304 self.assertEqual(result.count(), 4) 

3305 

3306 # Test that values specified in kwargs override those specified in 

3307 # dataId. 

3308 result = registry.queryDimensionRecords( 

3309 "detector", dataId={"instrument": "NotCam1"}, instrument="Cam1" 

3310 ) 

3311 self.assertEqual(result.count(), 4) 

3312 

3313 result = registry.queryDimensionRecords("detector", where="instrument='Cam1'") 

3314 self.assertEqual(result.count(), 4) 

3315 result = registry.queryDimensionRecords("detector", where="instrument=instr", bind={"instr": "Cam1"}) 

3316 self.assertTrue(result.any()) 

3317 self.assertEqual(result.count(), 4) 

3318 

3319 if self.supportsQueryGovernorValidation: 

3320 with self.assertRaisesRegex(DataIdValueError, "dimension instrument"): 

3321 result = registry.queryDimensionRecords("detector", instrument="NotCam1") 

3322 result.count() 

3323 

3324 with self.assertRaisesRegex(DataIdValueError, "dimension instrument"): 

3325 result = registry.queryDimensionRecords("detector", dataId={"instrument": "NotCam1"}) 

3326 result.count() 

3327 

3328 with self.assertRaisesRegex(DataIdValueError, "Unknown values specified for governor dimension"): 

3329 result = registry.queryDimensionRecords("detector", where="instrument='NotCam1'") 

3330 result.count() 

3331 

3332 with self.assertRaisesRegex(DataIdValueError, "Unknown values specified for governor dimension"): 

3333 result = registry.queryDimensionRecords( 

3334 "detector", where="instrument=instr", bind={"instr": "NotCam1"} 

3335 ) 

3336 result.count() 

3337 

3338 def testDatasetConstrainedDimensionRecordQueries(self): 

3339 """Test that queryDimensionRecords works even when given a dataset 

3340 constraint whose dimensions extend beyond the requested dimension 

3341 element's. 

3342 """ 

3343 registry = self.makeRegistry() 

3344 self.loadData(registry, "base.yaml") 

3345 self.loadData(registry, "datasets.yaml") 

3346 # Query for physical_filter dimension records, using a dataset that 

3347 # has both physical_filter and dataset dimensions. 

3348 records = registry.queryDimensionRecords( 

3349 "physical_filter", 

3350 datasets=["flat"], 

3351 collections="imported_r", 

3352 ) 

3353 self.assertEqual({record.name for record in records}, {"Cam1-R1", "Cam1-R2"}) 

3354 # Trying to constrain by all dataset types is an error. 

3355 with self.assertRaises(TypeError): 

3356 list(registry.queryDimensionRecords("physical_filter", datasets=..., collections="imported_r")) 

3357 

3358 def testSkyPixDatasetQueries(self): 

3359 """Test that we can build queries involving skypix dimensions as long 

3360 as a dataset type that uses those dimensions is included. 

3361 """ 

3362 registry = self.makeRegistry() 

3363 self.loadData(registry, "base.yaml") 

3364 dataset_type = DatasetType( 

3365 "a", dimensions=["htm7", "instrument"], universe=registry.dimensions, storageClass="int" 

3366 ) 

3367 registry.registerDatasetType(dataset_type) 

3368 run = "r" 

3369 registry.registerRun(run) 

3370 # First try queries where there are no datasets; the concern is whether 

3371 # we can even build and execute these queries without raising, even 

3372 # when "doomed" query shortcuts are in play. 

3373 self.assertFalse( 

3374 list(registry.queryDataIds(["htm7", "instrument"], datasets=dataset_type, collections=run)) 

3375 ) 

3376 self.assertFalse(list(registry.queryDatasets(dataset_type, collections=run))) 

3377 # Now add a dataset and see that we can get it back. 

3378 htm7 = registry.dimensions.skypix["htm"][7].pixelization 

3379 data_id = registry.expandDataId(instrument="Cam1", htm7=htm7.universe()[0][0]) 

3380 (ref,) = registry.insertDatasets(dataset_type, [data_id], run=run) 

3381 self.assertEqual( 

3382 set(registry.queryDataIds(["htm7", "instrument"], datasets=dataset_type, collections=run)), 

3383 {data_id}, 

3384 ) 

3385 self.assertEqual(set(registry.queryDatasets(dataset_type, collections=run)), {ref}) 

3386 

3387 def testDatasetIdFactory(self): 

3388 """Simple test for DatasetIdFactory, mostly to catch potential changes 

3389 in its API. 

3390 """ 

3391 registry = self.makeRegistry() 

3392 factory = DatasetIdFactory() 

3393 dataset_type = DatasetType( 

3394 "datasetType", 

3395 dimensions=["detector", "instrument"], 

3396 universe=registry.dimensions, 

3397 storageClass="int", 

3398 ) 

3399 run = "run" 

3400 data_id = DataCoordinate.standardize( 

3401 instrument="Cam1", detector=1, dimensions=dataset_type.dimensions 

3402 ) 

3403 

3404 datasetId = factory.makeDatasetId(run, dataset_type, data_id, DatasetIdGenEnum.UNIQUE) 

3405 self.assertIsInstance(datasetId, uuid.UUID) 

3406 self.assertEqual(datasetId.version, 4) 

3407 

3408 datasetId = factory.makeDatasetId(run, dataset_type, data_id, DatasetIdGenEnum.DATAID_TYPE) 

3409 self.assertIsInstance(datasetId, uuid.UUID) 

3410 self.assertEqual(datasetId.version, 5) 

3411 

3412 datasetId = factory.makeDatasetId(run, dataset_type, data_id, DatasetIdGenEnum.DATAID_TYPE_RUN) 

3413 self.assertIsInstance(datasetId, uuid.UUID) 

3414 self.assertEqual(datasetId.version, 5) 

3415 

3416 def testExposureQueries(self): 

3417 """Test query methods using arguments sourced from the exposure log 

3418 service. 

3419 

3420 The most complete test dataset currently available to daf_butler tests 

3421 is hsc-rc2-subset.yaml export (which is unfortunately distinct from the 

3422 the lsst/rc2_subset GitHub repo), but that does not have 'exposure' 

3423 dimension records as it was focused on providing nontrivial spatial 

3424 overlaps between visit+detector and tract+patch. So in this test we 

3425 need to translate queries that originally used the exposure dimension 

3426 to use the (very similar) visit dimension instead. 

3427 """ 

3428 registry = self.makeRegistry() 

3429 self.loadData(registry, "hsc-rc2-subset.yaml") 

3430 self.assertEqual( 

3431 [ 

3432 record.id 

3433 for record in registry.queryDimensionRecords("visit", instrument="HSC") 

3434 .order_by("visit") 

3435 .limit(5) 

3436 ], 

3437 [318, 322, 326, 330, 332], 

3438 ) 

3439 self.assertEqual( 

3440 [ 

3441 data_id["visit"] 

3442 for data_id in registry.queryDataIds(["visit"], instrument="HSC").order_by("visit").limit(5) 

3443 ], 

3444 [318, 322, 326, 330, 332], 

3445 ) 

3446 self.assertEqual( 

3447 [ 

3448 record.id 

3449 for record in registry.queryDimensionRecords("detector", instrument="HSC") 

3450 .order_by("full_name") 

3451 .limit(5) 

3452 ], 

3453 [73, 72, 71, 70, 65], 

3454 ) 

3455 self.assertEqual( 

3456 [ 

3457 data_id["detector"] 

3458 for data_id in registry.queryDataIds(["detector"], instrument="HSC") 

3459 .order_by("full_name") 

3460 .limit(5) 

3461 ], 

3462 [73, 72, 71, 70, 65], 

3463 ) 

3464 

3465 def test_long_query_names(self) -> None: 

3466 """Test that queries involving very long names are handled correctly. 

3467 

3468 This is especially important for PostgreSQL, which truncates symbols 

3469 longer than 64 chars, but it's worth testing for all DBs. 

3470 """ 

3471 registry = self.makeRegistry() 

3472 name = "abcd" * 17 

3473 registry.registerDatasetType( 

3474 DatasetType( 

3475 name, 

3476 dimensions=(), 

3477 storageClass="Exposure", 

3478 universe=registry.dimensions, 

3479 ) 

3480 ) 

3481 # Need to search more than one collection actually containing a 

3482 # matching dataset to avoid optimizations that sidestep bugs due to 

3483 # truncation by making findFirst=True a no-op. 

3484 run1 = "run1" 

3485 registry.registerRun(run1) 

3486 run2 = "run2" 

3487 registry.registerRun(run2) 

3488 (ref1,) = registry.insertDatasets(name, [DataCoordinate.make_empty(registry.dimensions)], run1) 

3489 registry.insertDatasets(name, [DataCoordinate.make_empty(registry.dimensions)], run2) 

3490 self.assertEqual( 

3491 set(registry.queryDatasets(name, collections=[run1, run2], findFirst=True)), 

3492 {ref1}, 

3493 ) 

3494 

3495 def test_skypix_constraint_queries(self) -> None: 

3496 """Test queries spatially constrained by a skypix data ID.""" 

3497 registry = self.makeRegistry() 

3498 self.loadData(registry, "hsc-rc2-subset.yaml") 

3499 patch_regions = { 

3500 (data_id["tract"], data_id["patch"]): data_id.region 

3501 for data_id in registry.queryDataIds(["patch"]).expanded() 

3502 } 

3503 skypix_dimension: SkyPixDimension = registry.dimensions["htm11"] 

3504 # This check ensures the test doesn't become trivial due to a config 

3505 # change; if it does, just pick a different HTML level. 

3506 self.assertNotEqual(skypix_dimension, registry.dimensions.commonSkyPix) 

3507 # Gather all skypix IDs that definitely overlap at least one of these 

3508 # patches. 

3509 relevant_skypix_ids = lsst.sphgeom.RangeSet() 

3510 for patch_region in patch_regions.values(): 

3511 relevant_skypix_ids |= skypix_dimension.pixelization.interior(patch_region) 

3512 # Look for a "nontrivial" skypix_id that overlaps at least one patch 

3513 # and does not overlap at least one other patch. 

3514 for skypix_id in itertools.chain.from_iterable( 

3515 range(begin, end) for begin, end in relevant_skypix_ids 

3516 ): 

3517 skypix_region = skypix_dimension.pixelization.pixel(skypix_id) 

3518 overlapping_patches = { 

3519 patch_key 

3520 for patch_key, patch_region in patch_regions.items() 

3521 if not patch_region.isDisjointFrom(skypix_region) 

3522 } 

3523 if overlapping_patches and overlapping_patches != patch_regions.keys(): 

3524 break 

3525 else: 

3526 raise RuntimeError("Could not find usable skypix ID for this dimension configuration.") 

3527 self.assertEqual( 

3528 { 

3529 (data_id["tract"], data_id["patch"]) 

3530 for data_id in registry.queryDataIds( 

3531 ["patch"], 

3532 dataId={skypix_dimension.name: skypix_id}, 

3533 ) 

3534 }, 

3535 overlapping_patches, 

3536 ) 

3537 # Test that a three-way join that includes the common skypix system in 

3538 # the dimensions doesn't generate redundant join terms in the query. 

3539 full_data_ids = set( 

3540 registry.queryDataIds( 

3541 ["tract", "visit", "htm7"], skymap="hsc_rings_v1", instrument="HSC" 

3542 ).expanded() 

3543 ) 

3544 self.assertGreater(len(full_data_ids), 0) 

3545 for data_id in full_data_ids: 

3546 self.assertFalse(data_id.records["tract"].region.isDisjointFrom(data_id.records["htm7"].region)) 

3547 self.assertFalse(data_id.records["visit"].region.isDisjointFrom(data_id.records["htm7"].region)) 

3548 

3549 def test_spatial_constraint_queries(self) -> None: 

3550 """Test queries in which one spatial dimension in the constraint (data 

3551 ID or ``where`` string) constrains a different spatial dimension in the 

3552 query result columns. 

3553 """ 

3554 registry = self.makeRegistry() 

3555 self.loadData(registry, "hsc-rc2-subset.yaml") 

3556 patch_regions = { 

3557 (data_id["tract"], data_id["patch"]): data_id.region 

3558 for data_id in registry.queryDataIds(["patch"]).expanded() 

3559 } 

3560 observation_regions = { 

3561 (data_id["visit"], data_id["detector"]): data_id.region 

3562 for data_id in registry.queryDataIds(["visit", "detector"]).expanded() 

3563 } 

3564 all_combos = { 

3565 (patch_key, observation_key) 

3566 for patch_key, observation_key in itertools.product(patch_regions, observation_regions) 

3567 } 

3568 overlapping_combos = { 

3569 (patch_key, observation_key) 

3570 for patch_key, observation_key in all_combos 

3571 if not patch_regions[patch_key].isDisjointFrom(observation_regions[observation_key]) 

3572 } 

3573 # Check a direct spatial join with no constraint first. 

3574 self.assertEqual( 

3575 { 

3576 ((data_id["tract"], data_id["patch"]), (data_id["visit"], data_id["detector"])) 

3577 for data_id in registry.queryDataIds(["patch", "visit", "detector"]) 

3578 }, 

3579 overlapping_combos, 

3580 ) 

3581 overlaps_by_patch: defaultdict[tuple[int, int], set[tuple[str, str]]] = defaultdict(set) 

3582 overlaps_by_observation: defaultdict[tuple[int, int], set[tuple[str, str]]] = defaultdict(set) 

3583 for patch_key, observation_key in overlapping_combos: 

3584 overlaps_by_patch[patch_key].add(observation_key) 

3585 overlaps_by_observation[observation_key].add(patch_key) 

3586 # Find patches and observations that overlap at least one of the other 

3587 # but not all of the other. 

3588 nontrivial_patch = next( 

3589 iter( 

3590 patch_key 

3591 for patch_key, observation_keys in overlaps_by_patch.items() 

3592 if observation_keys and observation_keys != observation_regions.keys() 

3593 ) 

3594 ) 

3595 nontrivial_observation = next( 

3596 iter( 

3597 observation_key 

3598 for observation_key, patch_keys in overlaps_by_observation.items() 

3599 if patch_keys and patch_keys != patch_regions.keys() 

3600 ) 

3601 ) 

3602 # Use the nontrivial patches and observations as constraints on the 

3603 # other dimensions in various ways, first via a 'where' expression. 

3604 # It's better in general to us 'bind' instead of f-strings, but these 

3605 # all integers so there are no quoting concerns. 

3606 self.assertEqual( 

3607 { 

3608 (data_id["visit"], data_id["detector"]) 

3609 for data_id in registry.queryDataIds( 

3610 ["visit", "detector"], 

3611 where=f"tract={nontrivial_patch[0]} AND patch={nontrivial_patch[1]}", 

3612 skymap="hsc_rings_v1", 

3613 ) 

3614 }, 

3615 overlaps_by_patch[nontrivial_patch], 

3616 ) 

3617 self.assertEqual( 

3618 { 

3619 (data_id["tract"], data_id["patch"]) 

3620 for data_id in registry.queryDataIds( 

3621 ["patch"], 

3622 where=f"visit={nontrivial_observation[0]} AND detector={nontrivial_observation[1]}", 

3623 instrument="HSC", 

3624 ) 

3625 }, 

3626 overlaps_by_observation[nontrivial_observation], 

3627 ) 

3628 # and then via the dataId argument. 

3629 self.assertEqual( 

3630 { 

3631 (data_id["visit"], data_id["detector"]) 

3632 for data_id in registry.queryDataIds( 

3633 ["visit", "detector"], 

3634 dataId={ 

3635 "tract": nontrivial_patch[0], 

3636 "patch": nontrivial_patch[1], 

3637 }, 

3638 skymap="hsc_rings_v1", 

3639 ) 

3640 }, 

3641 overlaps_by_patch[nontrivial_patch], 

3642 ) 

3643 self.assertEqual( 

3644 { 

3645 (data_id["tract"], data_id["patch"]) 

3646 for data_id in registry.queryDataIds( 

3647 ["patch"], 

3648 dataId={ 

3649 "visit": nontrivial_observation[0], 

3650 "detector": nontrivial_observation[1], 

3651 }, 

3652 instrument="HSC", 

3653 ) 

3654 }, 

3655 overlaps_by_observation[nontrivial_observation], 

3656 ) 

3657 

3658 def test_query_projection_drop_postprocessing(self) -> None: 

3659 """Test that projections and deduplications on query objects can 

3660 drop post-query region filtering to ensure the query remains in 

3661 the SQL engine. 

3662 """ 

3663 registry = self.makeRegistry() 

3664 self.loadData(registry, "base.yaml") 

3665 self.loadData(registry, "spatial.yaml") 

3666 

3667 def pop_transfer(tree: Relation) -> Relation: 

3668 """If a relation tree terminates with a transfer to a new engine, 

3669 return the relation prior to that transfer. If not, return the 

3670 original relation. 

3671 

3672 Parameters 

3673 ---------- 

3674 tree : `Relation` 

3675 The relation tree to modify. 

3676 """ 

3677 match tree: 

3678 case Transfer(target=target): 

3679 return target 

3680 case _: 

3681 return tree 

3682 

3683 # There's no public way to get a Query object yet, so we get one from a 

3684 # DataCoordinateQueryResults private attribute. When a public API is 

3685 # available this test should use it. 

3686 query = registry.queryDataIds(["visit", "detector", "tract", "patch"])._query 

3687 # We expect this query to terminate in the iteration engine originally, 

3688 # because region-filtering is necessary. 

3689 self.assertIsInstance(pop_transfer(query.relation).engine, iteration.Engine) 

3690 # If we deduplicate, we usually have to do that downstream of the 

3691 # filtering. That means the deduplication has to happen in the 

3692 # iteration engine. 

3693 self.assertIsInstance(pop_transfer(query.projected(unique=True).relation).engine, iteration.Engine) 

3694 # If we pass drop_postprocessing, we instead drop the region filtering 

3695 # so the deduplication can happen in SQL (though there might still be 

3696 # transfer to iteration at the tail of the tree that we can ignore; 

3697 # that's what the pop_transfer takes care of here). 

3698 self.assertIsInstance( 

3699 pop_transfer(query.projected(unique=True, drop_postprocessing=True).relation).engine, 

3700 sql.Engine, 

3701 ) 

3702 

3703 def test_query_find_datasets_drop_postprocessing(self) -> None: 

3704 """Test that DataCoordinateQueryResults.findDatasets avoids commutator 

3705 problems with the FindFirstDataset relation operation. 

3706 """ 

3707 # Setup: load some visit, tract, and patch records, and insert two 

3708 # datasets with dimensions {visit, patch}, with one in each of two 

3709 # RUN collections. 

3710 registry = self.makeRegistry() 

3711 self.loadData(registry, "base.yaml") 

3712 self.loadData(registry, "spatial.yaml") 

3713 storage_class = StorageClass("Warpy") 

3714 registry.storageClasses.registerStorageClass(storage_class) 

3715 dataset_type = DatasetType( 

3716 "warp", {"visit", "patch"}, storageClass=storage_class, universe=registry.dimensions 

3717 ) 

3718 registry.registerDatasetType(dataset_type) 

3719 (data_id,) = registry.queryDataIds(["visit", "patch"]).limit(1) 

3720 registry.registerRun("run1") 

3721 registry.registerRun("run2") 

3722 (ref1,) = registry.insertDatasets(dataset_type, [data_id], run="run1") 

3723 (ref2,) = registry.insertDatasets(dataset_type, [data_id], run="run2") 

3724 # Query for the dataset using queryDataIds(...).findDatasets(...) 

3725 # against only one of the two collections. This should work even 

3726 # though the relation returned by queryDataIds ends with 

3727 # iteration-engine region-filtering, because we can recognize before 

3728 # running the query that there is only one collecton to search and 

3729 # hence the (default) findFirst=True is irrelevant, and joining in the 

3730 # dataset query commutes past the iteration-engine postprocessing. 

3731 query1 = registry.queryDataIds( 

3732 {"visit", "patch"}, visit=data_id["visit"], instrument=data_id["instrument"] 

3733 ) 

3734 self.assertEqual( 

3735 set(query1.findDatasets(dataset_type.name, collections=["run1"])), 

3736 {ref1}, 

3737 ) 

3738 # Query for the dataset using queryDataIds(...).findDatasets(...) 

3739 # against both collections. This can only work if the FindFirstDataset 

3740 # operation can be commuted past the iteration-engine options into SQL. 

3741 query2 = registry.queryDataIds( 

3742 {"visit", "patch"}, visit=data_id["visit"], instrument=data_id["instrument"] 

3743 ) 

3744 self.assertEqual( 

3745 set(query2.findDatasets(dataset_type.name, collections=["run2", "run1"])), 

3746 {ref2}, 

3747 ) 

3748 

3749 def test_query_empty_collections(self) -> None: 

3750 """Test for registry query methods with empty collections. The methods 

3751 should return empty result set (or None when applicable) and provide 

3752 "doomed" diagnostics. 

3753 """ 

3754 registry = self.makeRegistry() 

3755 self.loadData(registry, "base.yaml") 

3756 self.loadData(registry, "datasets.yaml") 

3757 

3758 # Tests for registry.findDataset() 

3759 with self.assertRaises(NoDefaultCollectionError): 

3760 registry.findDataset("bias", instrument="Cam1", detector=1) 

3761 self.assertIsNotNone(registry.findDataset("bias", instrument="Cam1", detector=1, collections=...)) 

3762 self.assertIsNone(registry.findDataset("bias", instrument="Cam1", detector=1, collections=[])) 

3763 

3764 # Tests for registry.queryDatasets() 

3765 with self.assertRaises(NoDefaultCollectionError): 

3766 registry.queryDatasets("bias") 

3767 self.assertTrue(list(registry.queryDatasets("bias", collections=...))) 

3768 

3769 result = registry.queryDatasets("bias", collections=[]) 

3770 self.assertEqual(len(list(result)), 0) 

3771 messages = list(result.explain_no_results()) 

3772 self.assertTrue(messages) 

3773 self.assertTrue(any("because collection list is empty" in message for message in messages)) 

3774 

3775 # Tests for registry.queryDataIds() 

3776 with self.assertRaises(NoDefaultCollectionError): 

3777 registry.queryDataIds("detector", datasets="bias") 

3778 self.assertTrue(list(registry.queryDataIds("detector", datasets="bias", collections=...))) 

3779 

3780 result = registry.queryDataIds("detector", datasets="bias", collections=[]) 

3781 self.assertEqual(len(list(result)), 0) 

3782 messages = list(result.explain_no_results()) 

3783 self.assertTrue(messages) 

3784 self.assertTrue(any("because collection list is empty" in message for message in messages)) 

3785 

3786 # Tests for registry.queryDimensionRecords() 

3787 with self.assertRaises(NoDefaultCollectionError): 

3788 registry.queryDimensionRecords("detector", datasets="bias") 

3789 self.assertTrue(list(registry.queryDimensionRecords("detector", datasets="bias", collections=...))) 

3790 

3791 result = registry.queryDimensionRecords("detector", datasets="bias", collections=[]) 

3792 self.assertEqual(len(list(result)), 0) 

3793 messages = list(result.explain_no_results()) 

3794 self.assertTrue(messages) 

3795 self.assertTrue(any("because collection list is empty" in message for message in messages)) 

3796 

3797 def test_dataset_followup_spatial_joins(self) -> None: 

3798 """Test queryDataIds(...).findRelatedDatasets(...) where a spatial join 

3799 is involved. 

3800 """ 

3801 registry = self.makeRegistry() 

3802 self.loadData(registry, "base.yaml") 

3803 self.loadData(registry, "spatial.yaml") 

3804 pvi_dataset_type = DatasetType( 

3805 "pvi", {"visit", "detector"}, storageClass="StructuredDataDict", universe=registry.dimensions 

3806 ) 

3807 registry.registerDatasetType(pvi_dataset_type) 

3808 collection = "datasets" 

3809 registry.registerRun(collection) 

3810 (pvi1,) = registry.insertDatasets( 

3811 pvi_dataset_type, [{"instrument": "Cam1", "visit": 1, "detector": 1}], run=collection 

3812 ) 

3813 (pvi2,) = registry.insertDatasets( 

3814 pvi_dataset_type, [{"instrument": "Cam1", "visit": 1, "detector": 2}], run=collection 

3815 ) 

3816 (pvi3,) = registry.insertDatasets( 

3817 pvi_dataset_type, [{"instrument": "Cam1", "visit": 1, "detector": 3}], run=collection 

3818 ) 

3819 self.assertEqual( 

3820 set( 

3821 registry.queryDataIds(["patch"], skymap="SkyMap1", tract=0) 

3822 .expanded() 

3823 .findRelatedDatasets("pvi", [collection]) 

3824 ), 

3825 { 

3826 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=0), pvi1), 

3827 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=0), pvi2), 

3828 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=1), pvi2), 

3829 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=2), pvi1), 

3830 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=2), pvi2), 

3831 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=2), pvi3), 

3832 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=3), pvi2), 

3833 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=4), pvi3), 

3834 }, 

3835 )