Coverage for python/lsst/daf/butler/registry/tests/_registry.py: 6%

1545 statements  

« prev     ^ index     » next       coverage.py v7.5.0, created at 2024-05-02 03:16 -0700

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27from __future__ import annotations 

28 

29from ... import ddl 

30 

31__all__ = ["RegistryTests"] 

32 

33import datetime 

34import itertools 

35import os 

36import re 

37import time 

38import unittest 

39import uuid 

40from abc import ABC, abstractmethod 

41from collections import defaultdict, namedtuple 

42from collections.abc import Callable, Iterator 

43from concurrent.futures import ThreadPoolExecutor 

44from datetime import timedelta 

45from threading import Barrier 

46 

47import astropy.time 

48import sqlalchemy 

49 

50try: 

51 import numpy as np 

52except ImportError: 

53 np = None 

54 

55import lsst.sphgeom 

56from lsst.daf.relation import Relation, RelationalAlgebraError, Transfer, iteration, sql 

57 

58from ..._dataset_association import DatasetAssociation 

59from ..._dataset_ref import DatasetIdFactory, DatasetIdGenEnum, DatasetRef 

60from ..._dataset_type import DatasetType 

61from ..._exceptions import ( 

62 CollectionTypeError, 

63 DataIdValueError, 

64 InconsistentDataIdError, 

65 MissingCollectionError, 

66 MissingDatasetTypeError, 

67) 

68from ..._exceptions_legacy import DatasetTypeError 

69from ..._storage_class import StorageClass 

70from ..._timespan import Timespan 

71from ...dimensions import DataCoordinate, DataCoordinateSet, SkyPixDimension 

72from .._collection_summary import CollectionSummary 

73from .._collection_type import CollectionType 

74from .._config import RegistryConfig 

75from .._exceptions import ( 

76 ArgumentError, 

77 CollectionError, 

78 ConflictingDefinitionError, 

79 DatasetTypeExpressionError, 

80 NoDefaultCollectionError, 

81 OrphanedRecordError, 

82) 

83from .._registry import Registry 

84from ..interfaces import ButlerAttributeExistsError 

85from ..sql_registry import SqlRegistry 

86 

87 

88class RegistryTests(ABC): 

89 """Generic tests for the `SqlRegistry` class that can be subclassed to 

90 generate tests for different configurations. 

91 """ 

92 

93 collectionsManager: str | None = None 

94 """Name of the collections manager class, if subclass provides value for 

95 this member then it overrides name specified in default configuration 

96 (`str`). 

97 """ 

98 

99 datasetsManager: str | dict[str, str] | None = None 

100 """Name or configuration dictionary of the datasets manager class, if 

101 subclass provides value for this member then it overrides name specified 

102 in default configuration (`str` or `dict`). 

103 """ 

104 

105 supportsCollectionRegex: bool = True 

106 """True if the registry class being tested supports regex searches for 

107 collections.""" 

108 

109 @classmethod 

110 @abstractmethod 

111 def getDataDir(cls) -> str: 

112 """Return the root directory containing test data YAML files.""" 

113 raise NotImplementedError() 

114 

115 def makeRegistryConfig(self) -> RegistryConfig: 

116 """Create RegistryConfig used to create a registry. 

117 

118 This method should be called by a subclass from `makeRegistry`. 

119 Returned instance will be pre-configured based on the values of class 

120 members, and default-configured for all other parameters. Subclasses 

121 that need default configuration should just instantiate 

122 `RegistryConfig` directly. 

123 """ 

124 config = RegistryConfig() 

125 if self.collectionsManager: 

126 config["managers", "collections"] = self.collectionsManager 

127 if self.datasetsManager: 

128 config["managers", "datasets"] = self.datasetsManager 

129 return config 

130 

131 @abstractmethod 

132 def makeRegistry(self, share_repo_with: Registry | None = None) -> Registry | None: 

133 """Return the Registry instance to be tested. 

134 

135 Parameters 

136 ---------- 

137 share_repo_with : `Registry`, optional 

138 If provided, the new registry should point to the same data 

139 repository as this existing registry. 

140 

141 Returns 

142 ------- 

143 registry : `Registry` 

144 New `Registry` instance, or `None` *only* if `share_repo_with` 

145 is not `None` and this test case does not support that argument 

146 (e.g. it is impossible with in-memory SQLite DBs). 

147 """ 

148 raise NotImplementedError() 

149 

150 def loadData(self, registry: SqlRegistry, filename: str) -> None: 

151 """Load registry test data from ``getDataDir/<filename>``, 

152 which should be a YAML import/export file. 

153 

154 Parameters 

155 ---------- 

156 registry : `SqlRegistry` 

157 The registry to load into. 

158 filename : `str` 

159 The name of the file to load. 

160 """ 

161 from ...transfers import YamlRepoImportBackend 

162 

163 with open(os.path.join(self.getDataDir(), filename)) as stream: 

164 backend = YamlRepoImportBackend(stream, registry) 

165 backend.register() 

166 backend.load(datastore=None) 

167 

168 def checkQueryResults(self, results, expected): 

169 """Check that a query results object contains expected values. 

170 

171 Parameters 

172 ---------- 

173 results : `DataCoordinateQueryResults` or `DatasetQueryResults` 

174 A lazy-evaluation query results object. 

175 expected : `list` 

176 A list of `DataCoordinate` o `DatasetRef` objects that should be 

177 equal to results of the query, aside from ordering. 

178 """ 

179 self.assertCountEqual(list(results), expected) 

180 self.assertEqual(results.count(), len(expected)) 

181 if expected: 

182 self.assertTrue(results.any()) 

183 else: 

184 self.assertFalse(results.any()) 

185 

186 def testOpaque(self): 

187 """Tests for `SqlRegistry.registerOpaqueTable`, 

188 `SqlRegistry.insertOpaqueData`, `SqlRegistry.fetchOpaqueData`, and 

189 `SqlRegistry.deleteOpaqueData`. 

190 """ 

191 registry = self.makeRegistry() 

192 table = "opaque_table_for_testing" 

193 registry.registerOpaqueTable( 

194 table, 

195 spec=ddl.TableSpec( 

196 fields=[ 

197 ddl.FieldSpec("id", dtype=sqlalchemy.BigInteger, primaryKey=True), 

198 ddl.FieldSpec("name", dtype=sqlalchemy.String, length=16, nullable=False), 

199 ddl.FieldSpec("count", dtype=sqlalchemy.SmallInteger, nullable=True), 

200 ], 

201 ), 

202 ) 

203 rows = [ 

204 {"id": 1, "name": "one", "count": None}, 

205 {"id": 2, "name": "two", "count": 5}, 

206 {"id": 3, "name": "three", "count": 6}, 

207 ] 

208 registry.insertOpaqueData(table, *rows) 

209 self.assertCountEqual(rows, list(registry.fetchOpaqueData(table))) 

210 self.assertEqual(rows[0:1], list(registry.fetchOpaqueData(table, id=1))) 

211 self.assertEqual(rows[1:2], list(registry.fetchOpaqueData(table, name="two"))) 

212 self.assertEqual(rows[0:1], list(registry.fetchOpaqueData(table, id=(1, 3), name=("one", "two")))) 

213 self.assertEqual(rows, list(registry.fetchOpaqueData(table, id=(1, 2, 3)))) 

214 # Test very long IN clause which exceeds sqlite limit on number of 

215 # parameters. SQLite says the limit is 32k but it looks like it is 

216 # much higher. 

217 self.assertEqual(rows, list(registry.fetchOpaqueData(table, id=list(range(300_000))))) 

218 # Two IN clauses, each longer than 1k batch size, first with 

219 # duplicates, second has matching elements in different batches (after 

220 # sorting). 

221 self.assertEqual( 

222 rows[0:2], 

223 list( 

224 registry.fetchOpaqueData( 

225 table, 

226 id=list(range(1000)) + list(range(100, 0, -1)), 

227 name=["one"] + [f"q{i}" for i in range(2200)] + ["two"], 

228 ) 

229 ), 

230 ) 

231 self.assertEqual([], list(registry.fetchOpaqueData(table, id=1, name="two"))) 

232 registry.deleteOpaqueData(table, id=3) 

233 self.assertCountEqual(rows[:2], list(registry.fetchOpaqueData(table))) 

234 registry.deleteOpaqueData(table) 

235 self.assertEqual([], list(registry.fetchOpaqueData(table))) 

236 

237 def testDatasetType(self): 

238 """Tests for `SqlRegistry.registerDatasetType` and 

239 `SqlRegistry.getDatasetType`. 

240 """ 

241 registry = self.makeRegistry() 

242 # Check valid insert 

243 datasetTypeName = "test" 

244 storageClass = StorageClass("testDatasetType") 

245 registry.storageClasses.registerStorageClass(storageClass) 

246 dimensions = registry.dimensions.conform(("instrument", "visit")) 

247 differentDimensions = registry.dimensions.conform(("instrument", "patch")) 

248 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

249 # Inserting for the first time should return True 

250 self.assertTrue(registry.registerDatasetType(inDatasetType)) 

251 outDatasetType1 = registry.getDatasetType(datasetTypeName) 

252 self.assertEqual(outDatasetType1, inDatasetType) 

253 

254 # Re-inserting should work 

255 self.assertFalse(registry.registerDatasetType(inDatasetType)) 

256 # Except when they are not identical 

257 with self.assertRaises(ConflictingDefinitionError): 

258 nonIdenticalDatasetType = DatasetType(datasetTypeName, differentDimensions, storageClass) 

259 registry.registerDatasetType(nonIdenticalDatasetType) 

260 

261 # Template can be None 

262 datasetTypeName = "testNoneTemplate" 

263 storageClass = StorageClass("testDatasetType2") 

264 registry.storageClasses.registerStorageClass(storageClass) 

265 dimensions = registry.dimensions.conform(("instrument", "visit")) 

266 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

267 registry.registerDatasetType(inDatasetType) 

268 outDatasetType2 = registry.getDatasetType(datasetTypeName) 

269 self.assertEqual(outDatasetType2, inDatasetType) 

270 

271 allTypes = set(registry.queryDatasetTypes()) 

272 self.assertEqual(allTypes, {outDatasetType1, outDatasetType2}) 

273 

274 # Test some basic queryDatasetTypes functionality 

275 missing: list[str] = [] 

276 types = registry.queryDatasetTypes(["te*", "notarealdatasettype"], missing=missing) 

277 self.assertCountEqual([dt.name for dt in types], ["test", "testNoneTemplate"]) 

278 self.assertEqual(missing, ["notarealdatasettype"]) 

279 

280 def testDimensions(self): 

281 """Tests for `SqlRegistry.insertDimensionData`, 

282 `SqlRegistry.syncDimensionData`, and `SqlRegistry.expandDataId`. 

283 """ 

284 registry = self.makeRegistry() 

285 dimensionName = "instrument" 

286 dimension = registry.dimensions[dimensionName] 

287 dimensionValue = { 

288 "name": "DummyCam", 

289 "visit_max": 10, 

290 "visit_system": 0, 

291 "exposure_max": 10, 

292 "detector_max": 2, 

293 "class_name": "lsst.pipe.base.Instrument", 

294 } 

295 registry.insertDimensionData(dimensionName, dimensionValue) 

296 # Inserting the same value twice should fail 

297 with self.assertRaises(sqlalchemy.exc.IntegrityError): 

298 registry.insertDimensionData(dimensionName, dimensionValue) 

299 # expandDataId should retrieve the record we just inserted 

300 self.assertEqual( 

301 registry.expandDataId(instrument="DummyCam", dimensions=dimension.minimal_group) 

302 .records[dimensionName] 

303 .toDict(), 

304 dimensionValue, 

305 ) 

306 # expandDataId should raise if there is no record with the given ID. 

307 with self.assertRaises(DataIdValueError): 

308 registry.expandDataId({"instrument": "Unknown"}, dimensions=dimension.minimal_group) 

309 # band doesn't have a table; insert should fail. 

310 with self.assertRaises(TypeError): 

311 registry.insertDimensionData("band", {"band": "i"}) 

312 dimensionName2 = "physical_filter" 

313 dimension2 = registry.dimensions[dimensionName2] 

314 dimensionValue2 = {"name": "DummyCam_i", "band": "i"} 

315 # Missing required dependency ("instrument") should fail 

316 with self.assertRaises(KeyError): 

317 registry.insertDimensionData(dimensionName2, dimensionValue2) 

318 # Adding required dependency should fix the failure 

319 dimensionValue2["instrument"] = "DummyCam" 

320 registry.insertDimensionData(dimensionName2, dimensionValue2) 

321 # expandDataId should retrieve the record we just inserted. 

322 self.assertEqual( 

323 registry.expandDataId( 

324 instrument="DummyCam", physical_filter="DummyCam_i", dimensions=dimension2.minimal_group 

325 ) 

326 .records[dimensionName2] 

327 .toDict(), 

328 dimensionValue2, 

329 ) 

330 # Use syncDimensionData to insert a new record successfully. 

331 dimensionName3 = "detector" 

332 dimensionValue3 = { 

333 "instrument": "DummyCam", 

334 "id": 1, 

335 "full_name": "one", 

336 "name_in_raft": "zero", 

337 "purpose": "SCIENCE", 

338 } 

339 self.assertTrue(registry.syncDimensionData(dimensionName3, dimensionValue3)) 

340 # Sync that again. Note that one field ("raft") is NULL, and that 

341 # should be okay. 

342 self.assertFalse(registry.syncDimensionData(dimensionName3, dimensionValue3)) 

343 # Now try that sync with the same primary key but a different value. 

344 # This should fail. 

345 with self.assertRaises(ConflictingDefinitionError): 

346 registry.syncDimensionData( 

347 dimensionName3, 

348 { 

349 "instrument": "DummyCam", 

350 "id": 1, 

351 "full_name": "one", 

352 "name_in_raft": "four", 

353 "purpose": "SCIENCE", 

354 }, 

355 ) 

356 

357 @unittest.skipIf(np is None, "numpy not available.") 

358 def testNumpyDataId(self): 

359 """Test that we can use a numpy int in a dataId.""" 

360 registry = self.makeRegistry() 

361 dimensionEntries = [ 

362 ("instrument", {"instrument": "DummyCam"}), 

363 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}), 

364 ("day_obs", {"instrument": "DummyCam", "id": 20250101}), 

365 # Using an np.int64 here fails unless Records.fromDict is also 

366 # patched to look for numbers.Integral 

367 ( 

368 "visit", 

369 { 

370 "instrument": "DummyCam", 

371 "id": 42, 

372 "name": "fortytwo", 

373 "physical_filter": "d-r", 

374 "day_obs": 20250101, 

375 }, 

376 ), 

377 ] 

378 for args in dimensionEntries: 

379 registry.insertDimensionData(*args) 

380 

381 # Try a normal integer and something that looks like an int but 

382 # is not. 

383 for visit_id in (42, np.int64(42)): 

384 with self.subTest(visit_id=visit_id, id_type=type(visit_id).__name__): 

385 expanded = registry.expandDataId({"instrument": "DummyCam", "visit": visit_id}) 

386 self.assertEqual(expanded["visit"], int(visit_id)) 

387 self.assertIsInstance(expanded["visit"], int) 

388 

389 def testDataIdRelationships(self): 

390 """Test that `SqlRegistry.expandDataId` raises an exception when the 

391 given keys are inconsistent. 

392 """ 

393 registry = self.makeRegistry() 

394 self.loadData(registry, "base.yaml") 

395 # Insert a few more dimension records for the next test. 

396 registry.insertDimensionData( 

397 "day_obs", 

398 {"instrument": "Cam1", "id": 20250101}, 

399 ) 

400 registry.insertDimensionData( 

401 "group", 

402 {"instrument": "Cam1", "name": "group1"}, 

403 ) 

404 registry.insertDimensionData( 

405 "exposure", 

406 { 

407 "instrument": "Cam1", 

408 "id": 1, 

409 "obs_id": "one", 

410 "physical_filter": "Cam1-G", 

411 "group": "group1", 

412 "day_obs": 20250101, 

413 }, 

414 ) 

415 registry.insertDimensionData( 

416 "group", 

417 {"instrument": "Cam1", "name": "group2"}, 

418 ) 

419 registry.insertDimensionData( 

420 "exposure", 

421 { 

422 "instrument": "Cam1", 

423 "id": 2, 

424 "obs_id": "two", 

425 "physical_filter": "Cam1-G", 

426 "group": "group2", 

427 "day_obs": 20250101, 

428 }, 

429 ) 

430 registry.insertDimensionData( 

431 "visit_system", 

432 {"instrument": "Cam1", "id": 0, "name": "one-to-one"}, 

433 ) 

434 registry.insertDimensionData( 

435 "visit", 

436 {"instrument": "Cam1", "id": 1, "name": "one", "physical_filter": "Cam1-G", "day_obs": 20250101}, 

437 ) 

438 registry.insertDimensionData( 

439 "visit_definition", 

440 {"instrument": "Cam1", "visit": 1, "exposure": 1}, 

441 ) 

442 with self.assertRaises(InconsistentDataIdError): 

443 registry.expandDataId( 

444 {"instrument": "Cam1", "visit": 1, "exposure": 2}, 

445 ) 

446 

447 def testDataset(self): 

448 """Basic tests for `SqlRegistry.insertDatasets`, 

449 `SqlRegistry.getDataset`, and `SqlRegistry.removeDatasets`. 

450 """ 

451 registry = self.makeRegistry() 

452 self.loadData(registry, "base.yaml") 

453 run = "tésτ" 

454 registry.registerRun(run) 

455 datasetType = registry.getDatasetType("bias") 

456 dataId = {"instrument": "Cam1", "detector": 2} 

457 (ref,) = registry.insertDatasets(datasetType, dataIds=[dataId], run=run) 

458 outRef = registry.getDataset(ref.id) 

459 self.assertIsNotNone(ref.id) 

460 self.assertEqual(ref, outRef) 

461 with self.assertRaises(ConflictingDefinitionError): 

462 registry.insertDatasets(datasetType, dataIds=[dataId], run=run) 

463 registry.removeDatasets([ref]) 

464 self.assertIsNone(registry.findDataset(datasetType, dataId, collections=[run])) 

465 

466 def testFindDataset(self): 

467 """Tests for `SqlRegistry.findDataset`.""" 

468 registry = self.makeRegistry() 

469 self.loadData(registry, "base.yaml") 

470 run = "tésτ" 

471 datasetType = registry.getDatasetType("bias") 

472 dataId = {"instrument": "Cam1", "detector": 4} 

473 registry.registerRun(run) 

474 (inputRef,) = registry.insertDatasets(datasetType, dataIds=[dataId], run=run) 

475 outputRef = registry.findDataset(datasetType, dataId, collections=[run]) 

476 self.assertEqual(outputRef, inputRef) 

477 # Check that retrieval with invalid dataId raises 

478 with self.assertRaises(LookupError): 

479 dataId = {"instrument": "Cam1"} # no detector 

480 registry.findDataset(datasetType, dataId, collections=run) 

481 # Check that different dataIds match to different datasets 

482 dataId1 = {"instrument": "Cam1", "detector": 1} 

483 (inputRef1,) = registry.insertDatasets(datasetType, dataIds=[dataId1], run=run) 

484 dataId2 = {"instrument": "Cam1", "detector": 2} 

485 (inputRef2,) = registry.insertDatasets(datasetType, dataIds=[dataId2], run=run) 

486 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef1) 

487 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef2) 

488 self.assertNotEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef2) 

489 self.assertNotEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef1) 

490 # Check that requesting a non-existing dataId returns None 

491 nonExistingDataId = {"instrument": "Cam1", "detector": 3} 

492 self.assertIsNone(registry.findDataset(datasetType, nonExistingDataId, collections=run)) 

493 # Search more than one collection, in which two have the right 

494 # dataset type and another does not. 

495 registry.registerRun("empty") 

496 self.loadData(registry, "datasets.yaml") 

497 bias1 = registry.findDataset("bias", instrument="Cam1", detector=2, collections=["imported_g"]) 

498 self.assertIsNotNone(bias1) 

499 bias2 = registry.findDataset("bias", instrument="Cam1", detector=2, collections=["imported_r"]) 

500 self.assertIsNotNone(bias2) 

501 self.assertEqual( 

502 bias1, 

503 registry.findDataset( 

504 "bias", instrument="Cam1", detector=2, collections=["empty", "imported_g", "imported_r"] 

505 ), 

506 ) 

507 self.assertEqual( 

508 bias2, 

509 registry.findDataset( 

510 "bias", instrument="Cam1", detector=2, collections=["empty", "imported_r", "imported_g"] 

511 ), 

512 ) 

513 # Search more than one collection, with one of them a CALIBRATION 

514 # collection. 

515 registry.registerCollection("Cam1/calib", CollectionType.CALIBRATION) 

516 timespan = Timespan( 

517 begin=astropy.time.Time("2020-01-01T01:00:00", format="isot", scale="tai"), 

518 end=astropy.time.Time("2020-01-01T02:00:00", format="isot", scale="tai"), 

519 ) 

520 registry.certify("Cam1/calib", [bias2], timespan=timespan) 

521 self.assertEqual( 

522 bias1, 

523 registry.findDataset( 

524 "bias", 

525 instrument="Cam1", 

526 detector=2, 

527 collections=["empty", "imported_g", "Cam1/calib"], 

528 timespan=timespan, 

529 ), 

530 ) 

531 self.assertEqual( 

532 bias2, 

533 registry.findDataset( 

534 "bias", 

535 instrument="Cam1", 

536 detector=2, 

537 collections=["empty", "Cam1/calib", "imported_g"], 

538 timespan=timespan, 

539 ), 

540 ) 

541 # If we try to search those same collections without a timespan, it 

542 # should still work, since the CALIBRATION collection is ignored. 

543 self.assertEqual( 

544 bias1, 

545 registry.findDataset( 

546 "bias", instrument="Cam1", detector=2, collections=["empty", "imported_g", "Cam1/calib"] 

547 ), 

548 ) 

549 self.assertEqual( 

550 bias1, 

551 registry.findDataset( 

552 "bias", instrument="Cam1", detector=2, collections=["empty", "Cam1/calib", "imported_g"] 

553 ), 

554 ) 

555 

556 def testRemoveDatasetTypeSuccess(self): 

557 """Test that SqlRegistry.removeDatasetType works when there are no 

558 datasets of that type present. 

559 """ 

560 registry = self.makeRegistry() 

561 self.loadData(registry, "base.yaml") 

562 registry.removeDatasetType("flat") 

563 with self.assertRaises(MissingDatasetTypeError): 

564 registry.getDatasetType("flat") 

565 

566 def testRemoveDatasetTypeFailure(self): 

567 """Test that SqlRegistry.removeDatasetType raises when there are 

568 datasets of that type present or if the dataset type is for a 

569 component. 

570 """ 

571 registry = self.makeRegistry() 

572 self.loadData(registry, "base.yaml") 

573 self.loadData(registry, "datasets.yaml") 

574 with self.assertRaises(OrphanedRecordError): 

575 registry.removeDatasetType("flat") 

576 with self.assertRaises(DatasetTypeError): 

577 registry.removeDatasetType(DatasetType.nameWithComponent("flat", "image")) 

578 

579 def testImportDatasetsUUID(self): 

580 """Test for `SqlRegistry._importDatasets` with UUID dataset ID.""" 

581 if isinstance(self.datasetsManager, str): 

582 if not self.datasetsManager.endswith(".ByDimensionsDatasetRecordStorageManagerUUID"): 

583 self.skipTest(f"Unexpected dataset manager {self.datasetsManager}") 

584 elif isinstance(self.datasetsManager, dict) and not self.datasetsManager["cls"].endswith( 

585 ".ByDimensionsDatasetRecordStorageManagerUUID" 

586 ): 

587 self.skipTest(f"Unexpected dataset manager {self.datasetsManager['cls']}") 

588 

589 registry = self.makeRegistry() 

590 self.loadData(registry, "base.yaml") 

591 for run in range(6): 

592 registry.registerRun(f"run{run}") 

593 datasetTypeBias = registry.getDatasetType("bias") 

594 datasetTypeFlat = registry.getDatasetType("flat") 

595 dataIdBias1 = {"instrument": "Cam1", "detector": 1} 

596 dataIdBias2 = {"instrument": "Cam1", "detector": 2} 

597 dataIdFlat1 = {"instrument": "Cam1", "detector": 1, "physical_filter": "Cam1-G", "band": "g"} 

598 

599 ref = DatasetRef(datasetTypeBias, dataIdBias1, run="run0") 

600 (ref1,) = registry._importDatasets([ref]) 

601 # UUID is used without change 

602 self.assertEqual(ref.id, ref1.id) 

603 

604 # All different failure modes 

605 refs = ( 

606 # Importing same DatasetRef with different dataset ID is an error 

607 DatasetRef(datasetTypeBias, dataIdBias1, run="run0"), 

608 # Same DatasetId but different DataId 

609 DatasetRef(datasetTypeBias, dataIdBias2, id=ref1.id, run="run0"), 

610 DatasetRef(datasetTypeFlat, dataIdFlat1, id=ref1.id, run="run0"), 

611 # Same DatasetRef and DatasetId but different run 

612 DatasetRef(datasetTypeBias, dataIdBias1, id=ref1.id, run="run1"), 

613 ) 

614 for ref in refs: 

615 with self.assertRaises(ConflictingDefinitionError): 

616 registry._importDatasets([ref]) 

617 

618 # Test for non-unique IDs, they can be re-imported multiple times. 

619 for run, idGenMode in ((2, DatasetIdGenEnum.DATAID_TYPE), (4, DatasetIdGenEnum.DATAID_TYPE_RUN)): 

620 with self.subTest(idGenMode=idGenMode): 

621 # Make dataset ref with reproducible dataset ID. 

622 ref = DatasetRef(datasetTypeBias, dataIdBias1, run=f"run{run}", id_generation_mode=idGenMode) 

623 (ref1,) = registry._importDatasets([ref]) 

624 self.assertIsInstance(ref1.id, uuid.UUID) 

625 self.assertEqual(ref1.id.version, 5) 

626 self.assertEqual(ref1.id, ref.id) 

627 

628 # Importing it again is OK 

629 (ref2,) = registry._importDatasets([ref1]) 

630 self.assertEqual(ref2.id, ref1.id) 

631 

632 # Cannot import to different run with the same ID 

633 ref = DatasetRef(datasetTypeBias, dataIdBias1, id=ref1.id, run=f"run{run+1}") 

634 with self.assertRaises(ConflictingDefinitionError): 

635 registry._importDatasets([ref]) 

636 

637 ref = DatasetRef( 

638 datasetTypeBias, dataIdBias1, run=f"run{run+1}", id_generation_mode=idGenMode 

639 ) 

640 if idGenMode is DatasetIdGenEnum.DATAID_TYPE: 

641 # Cannot import same DATAID_TYPE ref into a new run 

642 with self.assertRaises(ConflictingDefinitionError): 

643 (ref2,) = registry._importDatasets([ref]) 

644 else: 

645 # DATAID_TYPE_RUN ref can be imported into a new run 

646 (ref2,) = registry._importDatasets([ref]) 

647 

648 def testComponentLookups(self): 

649 """Test searching for component datasets via their parents. 

650 

651 Components can no longer be found by registry. This test checks 

652 that this now fails. 

653 """ 

654 registry = self.makeRegistry() 

655 self.loadData(registry, "base.yaml") 

656 self.loadData(registry, "datasets.yaml") 

657 # Test getting the child dataset type (which does still exist in the 

658 # Registry), and check for consistency with 

659 # DatasetRef.makeComponentRef. 

660 collection = "imported_g" 

661 parentType = registry.getDatasetType("bias") 

662 childType = registry.getDatasetType("bias.wcs") 

663 parentRefResolved = registry.findDataset( 

664 parentType, collections=collection, instrument="Cam1", detector=1 

665 ) 

666 self.assertIsInstance(parentRefResolved, DatasetRef) 

667 self.assertEqual(childType, parentRefResolved.makeComponentRef("wcs").datasetType) 

668 # Search for a single dataset with findDataset. 

669 with self.assertRaises(DatasetTypeError): 

670 registry.findDataset("bias.wcs", collections=collection, dataId=parentRefResolved.dataId) 

671 

672 def testCollections(self): 

673 """Tests for registry methods that manage collections.""" 

674 registry = self.makeRegistry() 

675 other_registry = self.makeRegistry(share_repo_with=registry) 

676 self.loadData(registry, "base.yaml") 

677 self.loadData(registry, "datasets.yaml") 

678 run1 = "imported_g" 

679 run2 = "imported_r" 

680 # Test setting a collection docstring after it has been created. 

681 registry.setCollectionDocumentation(run1, "doc for run1") 

682 self.assertEqual(registry.getCollectionDocumentation(run1), "doc for run1") 

683 registry.setCollectionDocumentation(run1, None) 

684 self.assertIsNone(registry.getCollectionDocumentation(run1)) 

685 datasetType = "bias" 

686 # Find some datasets via their run's collection. 

687 dataId1 = {"instrument": "Cam1", "detector": 1} 

688 ref1 = registry.findDataset(datasetType, dataId1, collections=run1) 

689 self.assertIsNotNone(ref1) 

690 dataId2 = {"instrument": "Cam1", "detector": 2} 

691 ref2 = registry.findDataset(datasetType, dataId2, collections=run1) 

692 self.assertIsNotNone(ref2) 

693 # Associate those into a new collection, then look for them there. 

694 tag1 = "tag1" 

695 registry.registerCollection(tag1, type=CollectionType.TAGGED, doc="doc for tag1") 

696 # Check that we can query for old and new collections by type. 

697 self.assertEqual(set(registry.queryCollections(collectionTypes=CollectionType.RUN)), {run1, run2}) 

698 self.assertEqual( 

699 set(registry.queryCollections(collectionTypes={CollectionType.TAGGED, CollectionType.RUN})), 

700 {tag1, run1, run2}, 

701 ) 

702 self.assertEqual(registry.getCollectionDocumentation(tag1), "doc for tag1") 

703 registry.associate(tag1, [ref1, ref2]) 

704 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1) 

705 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

706 # Disassociate one and verify that we can't it there anymore... 

707 registry.disassociate(tag1, [ref1]) 

708 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=tag1)) 

709 # ...but we can still find ref2 in tag1, and ref1 in the run. 

710 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run1), ref1) 

711 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

712 collections = set(registry.queryCollections()) 

713 self.assertEqual(collections, {run1, run2, tag1}) 

714 # Associate both refs into tag1 again; ref2 is already there, but that 

715 # should be a harmless no-op. 

716 registry.associate(tag1, [ref1, ref2]) 

717 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1) 

718 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

719 # Get a different dataset (from a different run) that has the same 

720 # dataset type and data ID as ref2. 

721 ref2b = registry.findDataset(datasetType, dataId2, collections=run2) 

722 self.assertNotEqual(ref2, ref2b) 

723 # Attempting to associate that into tag1 should be an error. 

724 with self.assertRaises(ConflictingDefinitionError): 

725 registry.associate(tag1, [ref2b]) 

726 # That error shouldn't have messed up what we had before. 

727 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1) 

728 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

729 # Attempt to associate the conflicting dataset again, this time with 

730 # a dataset that isn't in the collection and won't cause a conflict. 

731 # Should also fail without modifying anything. 

732 dataId3 = {"instrument": "Cam1", "detector": 3} 

733 ref3 = registry.findDataset(datasetType, dataId3, collections=run1) 

734 with self.assertRaises(ConflictingDefinitionError): 

735 registry.associate(tag1, [ref3, ref2b]) 

736 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1) 

737 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

738 self.assertIsNone(registry.findDataset(datasetType, dataId3, collections=tag1)) 

739 # Register a chained collection that searches [tag1, run2] 

740 chain1 = "chain1" 

741 registry.registerCollection(chain1, type=CollectionType.CHAINED) 

742 self.assertIs(registry.getCollectionType(chain1), CollectionType.CHAINED) 

743 # Chained collection exists, but has no collections in it. 

744 self.assertFalse(registry.getCollectionChain(chain1)) 

745 # If we query for all collections, we should get the chained collection 

746 # only if we don't ask to flatten it (i.e. yield only its children). 

747 self.assertEqual(set(registry.queryCollections(flattenChains=False)), {tag1, run1, run2, chain1}) 

748 self.assertEqual(set(registry.queryCollections(flattenChains=True)), {tag1, run1, run2}) 

749 # Attempt to set its child collections to something circular; that 

750 # should fail. 

751 with self.assertRaises(ValueError): 

752 registry.setCollectionChain(chain1, [tag1, chain1]) 

753 # Add the child collections. 

754 registry.setCollectionChain(chain1, [tag1, run2]) 

755 self.assertEqual(list(registry.getCollectionChain(chain1)), [tag1, run2]) 

756 self.assertEqual(registry.getCollectionParentChains(tag1), {chain1}) 

757 self.assertEqual(registry.getCollectionParentChains(run2), {chain1}) 

758 # Refresh the other registry that points to the same repo, and make 

759 # sure it can see the things we've done (note that this does require 

760 # an explicit refresh(); that's the documented behavior, because 

761 # caching is ~impossible otherwise). 

762 if other_registry is not None: 

763 other_registry.refresh() 

764 self.assertEqual(list(other_registry.getCollectionChain(chain1)), [tag1, run2]) 

765 self.assertEqual(other_registry.getCollectionParentChains(tag1), {chain1}) 

766 self.assertEqual(other_registry.getCollectionParentChains(run2), {chain1}) 

767 # Searching for dataId1 or dataId2 in the chain should return ref1 and 

768 # ref2, because both are in tag1. 

769 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain1), ref1) 

770 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain1), ref2) 

771 # Now disassociate ref2 from tag1. The search (for bias) with 

772 # dataId2 in chain1 should then: 

773 # 1. not find it in tag1 

774 # 2. find a different dataset in run2 

775 registry.disassociate(tag1, [ref2]) 

776 ref2b = registry.findDataset(datasetType, dataId2, collections=chain1) 

777 self.assertNotEqual(ref2b, ref2) 

778 self.assertEqual(ref2b, registry.findDataset(datasetType, dataId2, collections=run2)) 

779 # Define a new chain so we can test recursive chains. 

780 chain2 = "chain2" 

781 registry.registerCollection(chain2, type=CollectionType.CHAINED) 

782 registry.setCollectionChain(chain2, [run2, chain1]) 

783 self.assertEqual(registry.getCollectionParentChains(chain1), {chain2}) 

784 self.assertEqual(registry.getCollectionParentChains(run2), {chain1, chain2}) 

785 

786 if self.supportsCollectionRegex: 

787 # Query for collections matching a regex. 

788 self.assertCountEqual( 

789 list(registry.queryCollections(re.compile("imported_."), flattenChains=False)), 

790 ["imported_r", "imported_g"], 

791 ) 

792 # Query for collections matching a regex or an explicit str. 

793 self.assertCountEqual( 

794 list(registry.queryCollections([re.compile("imported_."), "chain1"], flattenChains=False)), 

795 ["imported_r", "imported_g", "chain1"], 

796 ) 

797 # Same queries as the regex ones above, but using globs instead of 

798 # regex. 

799 self.assertCountEqual( 

800 list(registry.queryCollections("imported_*", flattenChains=False)), 

801 ["imported_r", "imported_g"], 

802 ) 

803 # Query for collections matching a regex or an explicit str. 

804 self.assertCountEqual( 

805 list(registry.queryCollections(["imported_*", "chain1"], flattenChains=False)), 

806 ["imported_r", "imported_g", "chain1"], 

807 ) 

808 

809 # Search for bias with dataId1 should find it via tag1 in chain2, 

810 # recursing, because is not in run1. 

811 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=run2)) 

812 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain2), ref1) 

813 # Search for bias with dataId2 should find it in run2 (ref2b). 

814 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain2), ref2b) 

815 # Search for a flat that is in run2. That should not be found 

816 # at the front of chain2, because of the restriction to bias 

817 # on run2 there, but it should be found in at the end of chain1. 

818 dataId4 = {"instrument": "Cam1", "detector": 3, "physical_filter": "Cam1-R2"} 

819 ref4 = registry.findDataset("flat", dataId4, collections=run2) 

820 self.assertIsNotNone(ref4) 

821 self.assertEqual(ref4, registry.findDataset("flat", dataId4, collections=chain2)) 

822 # Deleting a collection that's part of a CHAINED collection is not 

823 # allowed, and is exception-safe. 

824 with self.assertRaises(sqlalchemy.exc.IntegrityError): 

825 registry.removeCollection(run2) 

826 self.assertEqual(registry.getCollectionType(run2), CollectionType.RUN) 

827 with self.assertRaises(sqlalchemy.exc.IntegrityError): 

828 registry.removeCollection(chain1) 

829 self.assertEqual(registry.getCollectionType(chain1), CollectionType.CHAINED) 

830 # Actually remove chain2, test that it's gone by asking for its type. 

831 registry.removeCollection(chain2) 

832 with self.assertRaises(MissingCollectionError): 

833 registry.getCollectionType(chain2) 

834 # Actually remove run2 and chain1, which should work now. 

835 registry.removeCollection(chain1) 

836 registry.removeCollection(run2) 

837 with self.assertRaises(MissingCollectionError): 

838 registry.getCollectionType(run2) 

839 with self.assertRaises(MissingCollectionError): 

840 registry.getCollectionType(chain1) 

841 # Remove tag1 as well, just to test that we can remove TAGGED 

842 # collections. 

843 registry.removeCollection(tag1) 

844 with self.assertRaises(MissingCollectionError): 

845 registry.getCollectionType(tag1) 

846 

847 def testCollectionChainCaching(self): 

848 registry = self.makeRegistry() 

849 with registry.caching_context(): 

850 registry.registerCollection("a") 

851 registry.registerCollection("chain", CollectionType.CHAINED) 

852 # There used to be a caching bug (DM-43750) that would throw an 

853 # exception if you modified a collection chain for a collection 

854 # that was already in the cache. 

855 registry.setCollectionChain("chain", ["a"]) 

856 self.assertEqual(list(registry.getCollectionChain("chain")), ["a"]) 

857 

858 def testCollectionChainFlatten(self): 

859 """Test that `SqlRegistry.setCollectionChain` obeys its 'flatten' 

860 option. 

861 """ 

862 registry = self.makeRegistry() 

863 registry.registerCollection("inner", CollectionType.CHAINED) 

864 registry.registerCollection("innermost", CollectionType.RUN) 

865 registry.setCollectionChain("inner", ["innermost"]) 

866 registry.registerCollection("outer", CollectionType.CHAINED) 

867 registry.setCollectionChain("outer", ["inner"], flatten=False) 

868 self.assertEqual(list(registry.getCollectionChain("outer")), ["inner"]) 

869 registry.setCollectionChain("outer", ["inner"], flatten=True) 

870 self.assertEqual(list(registry.getCollectionChain("outer")), ["innermost"]) 

871 

872 def testCollectionChainPrependConcurrency(self): 

873 """Verify that locking via database row locks is working as 

874 expected. 

875 """ 

876 

877 def blocked_thread_func(registry: SqlRegistry): 

878 # This call will become blocked after it has decided on positions 

879 # for the new children in the collection chain, but before 

880 # inserting them. 

881 registry._managers.collections.prepend_collection_chain("chain", ["a"]) 

882 

883 def unblocked_thread_func(registry: SqlRegistry): 

884 registry._managers.collections.prepend_collection_chain("chain", ["b"]) 

885 

886 registry = self._do_collection_concurrency_test(blocked_thread_func, unblocked_thread_func) 

887 

888 # blocked_thread_func should have finished first, inserting "a". 

889 # unblocked_thread_func should have finished second, prepending "b". 

890 self.assertEqual(("b", "a"), registry.getCollectionChain("chain")) 

891 

892 def testCollectionChainReplaceConcurrency(self): 

893 """Verify that locking via database row locks is working as 

894 expected. 

895 """ 

896 

897 def blocked_thread_func(registry: SqlRegistry): 

898 # This call will become blocked after deleting children, but before 

899 # inserting new ones. 

900 registry.setCollectionChain("chain", ["a"]) 

901 

902 def unblocked_thread_func(registry: SqlRegistry): 

903 registry.setCollectionChain("chain", ["b"]) 

904 

905 registry = self._do_collection_concurrency_test(blocked_thread_func, unblocked_thread_func) 

906 

907 # blocked_thread_func should have finished first. 

908 # unblocked_thread_func should have finished second, overwriting the 

909 # chain with "b". 

910 self.assertEqual(("b",), registry.getCollectionChain("chain")) 

911 

912 def _do_collection_concurrency_test( 

913 self, blocked_thread_func: Callable[[SqlRegistry]], unblocked_thread_func: Callable[[SqlRegistry]] 

914 ) -> SqlRegistry: 

915 # This function: 

916 # 1. Sets up two registries pointing at the same database. 

917 # 2. Start running 'blocked_thread_func' in a background thread, 

918 # arranging for it to become blocked during a critical section in 

919 # the collections manager. 

920 # 3. Wait for 'blocked_thread_func' to reach the critical section 

921 # 4. Start running 'unblocked_thread_func'. 

922 # 5. Allow both functions to run to completion. 

923 

924 # Set up two registries pointing to the same DB 

925 registry1 = self.makeRegistry() 

926 assert isinstance(registry1, SqlRegistry) 

927 registry2 = self.makeRegistry(share_repo_with=registry1) 

928 if registry2 is None: 

929 # This will happen for in-memory SQL databases. 

930 raise unittest.SkipTest("Testing concurrency requires two connections to the same DB.") 

931 

932 registry1.registerCollection("chain", CollectionType.CHAINED) 

933 for collection in ["a", "b"]: 

934 registry1.registerCollection(collection) 

935 

936 # Arrange for registry1 to block during its critical section, allowing 

937 # us to detect this and control when it becomes unblocked. 

938 enter_barrier = Barrier(2, timeout=60) 

939 exit_barrier = Barrier(2, timeout=60) 

940 

941 def wait_for_barrier(): 

942 enter_barrier.wait() 

943 exit_barrier.wait() 

944 

945 registry1._managers.collections._block_for_concurrency_test = wait_for_barrier 

946 

947 with ThreadPoolExecutor(max_workers=1) as exec1: 

948 with ThreadPoolExecutor(max_workers=1) as exec2: 

949 future1 = exec1.submit(blocked_thread_func, registry1) 

950 enter_barrier.wait() 

951 

952 # At this point registry 1 has entered the critical section and 

953 # is waiting for us to release it. Start the other thread. 

954 future2 = exec2.submit(unblocked_thread_func, registry2) 

955 # thread2 should block inside a database call, but we have no 

956 # way to detect when it is in this state. 

957 time.sleep(0.200) 

958 

959 # Let the threads run to completion. 

960 exit_barrier.wait() 

961 future1.result() 

962 future2.result() 

963 

964 return registry1 

965 

966 def testBasicTransaction(self): 

967 """Test that all operations within a single transaction block are 

968 rolled back if an exception propagates out of the block. 

969 """ 

970 registry = self.makeRegistry() 

971 storageClass = StorageClass("testDatasetType") 

972 registry.storageClasses.registerStorageClass(storageClass) 

973 with registry.transaction(): 

974 registry.insertDimensionData("instrument", {"name": "Cam1", "class_name": "A"}) 

975 with self.assertRaises(ValueError): 

976 with registry.transaction(): 

977 registry.insertDimensionData("instrument", {"name": "Cam2"}) 

978 raise ValueError("Oops, something went wrong") 

979 # Cam1 should exist 

980 self.assertEqual(registry.expandDataId(instrument="Cam1").records["instrument"].class_name, "A") 

981 # But Cam2 and Cam3 should both not exist 

982 with self.assertRaises(DataIdValueError): 

983 registry.expandDataId(instrument="Cam2") 

984 with self.assertRaises(DataIdValueError): 

985 registry.expandDataId(instrument="Cam3") 

986 

987 def testNestedTransaction(self): 

988 """Test that operations within a transaction block are not rolled back 

989 if an exception propagates out of an inner transaction block and is 

990 then caught. 

991 """ 

992 registry = self.makeRegistry() 

993 dimension = registry.dimensions["instrument"] 

994 dataId1 = {"instrument": "DummyCam"} 

995 dataId2 = {"instrument": "DummyCam2"} 

996 checkpointReached = False 

997 with registry.transaction(): 

998 # This should be added and (ultimately) committed. 

999 registry.insertDimensionData(dimension, dataId1) 

1000 with self.assertRaises(sqlalchemy.exc.IntegrityError): 

1001 with registry.transaction(savepoint=True): 

1002 # This does not conflict, and should succeed (but not 

1003 # be committed). 

1004 registry.insertDimensionData(dimension, dataId2) 

1005 checkpointReached = True 

1006 # This should conflict and raise, triggerring a rollback 

1007 # of the previous insertion within the same transaction 

1008 # context, but not the original insertion in the outer 

1009 # block. 

1010 registry.insertDimensionData(dimension, dataId1) 

1011 self.assertTrue(checkpointReached) 

1012 self.assertIsNotNone(registry.expandDataId(dataId1, dimensions=dimension.minimal_group)) 

1013 with self.assertRaises(DataIdValueError): 

1014 registry.expandDataId(dataId2, dimensions=dimension.minimal_group) 

1015 

1016 def testInstrumentDimensions(self): 

1017 """Test queries involving only instrument dimensions, with no joins to 

1018 skymap. 

1019 """ 

1020 registry = self.makeRegistry() 

1021 

1022 # need a bunch of dimensions and datasets for test 

1023 registry.insertDimensionData( 

1024 "instrument", dict(name="DummyCam", visit_max=25, exposure_max=300, detector_max=6) 

1025 ) 

1026 registry.insertDimensionData("day_obs", dict(instrument="DummyCam", id=20250101)) 

1027 registry.insertDimensionData( 

1028 "physical_filter", 

1029 dict(instrument="DummyCam", name="dummy_r", band="r"), 

1030 dict(instrument="DummyCam", name="dummy_i", band="i"), 

1031 ) 

1032 registry.insertDimensionData( 

1033 "detector", *[dict(instrument="DummyCam", id=i, full_name=str(i)) for i in range(1, 6)] 

1034 ) 

1035 registry.insertDimensionData( 

1036 "visit", 

1037 dict(instrument="DummyCam", id=10, name="ten", physical_filter="dummy_i", day_obs=20250101), 

1038 dict(instrument="DummyCam", id=11, name="eleven", physical_filter="dummy_r", day_obs=20250101), 

1039 dict(instrument="DummyCam", id=20, name="twelve", physical_filter="dummy_r", day_obs=20250101), 

1040 ) 

1041 registry.insertDimensionData( 

1042 "group", 

1043 dict(instrument="DummyCam", name="ten"), 

1044 dict(instrument="DummyCam", name="eleven"), 

1045 dict(instrument="DummyCam", name="twelve"), 

1046 ) 

1047 for i in range(1, 6): 

1048 registry.insertDimensionData( 

1049 "visit_detector_region", 

1050 dict(instrument="DummyCam", visit=10, detector=i), 

1051 dict(instrument="DummyCam", visit=11, detector=i), 

1052 dict(instrument="DummyCam", visit=20, detector=i), 

1053 ) 

1054 registry.insertDimensionData( 

1055 "exposure", 

1056 dict( 

1057 instrument="DummyCam", 

1058 id=100, 

1059 obs_id="100", 

1060 physical_filter="dummy_i", 

1061 group="ten", 

1062 day_obs=20250101, 

1063 ), 

1064 dict( 

1065 instrument="DummyCam", 

1066 id=101, 

1067 obs_id="101", 

1068 physical_filter="dummy_i", 

1069 group="ten", 

1070 day_obs=20250101, 

1071 ), 

1072 dict( 

1073 instrument="DummyCam", 

1074 id=110, 

1075 obs_id="110", 

1076 physical_filter="dummy_r", 

1077 group="eleven", 

1078 day_obs=20250101, 

1079 ), 

1080 dict( 

1081 instrument="DummyCam", 

1082 id=111, 

1083 obs_id="111", 

1084 physical_filter="dummy_r", 

1085 group="eleven", 

1086 day_obs=20250101, 

1087 ), 

1088 dict( 

1089 instrument="DummyCam", 

1090 id=200, 

1091 obs_id="200", 

1092 physical_filter="dummy_r", 

1093 group="twelve", 

1094 day_obs=20250101, 

1095 ), 

1096 dict( 

1097 instrument="DummyCam", 

1098 id=201, 

1099 obs_id="201", 

1100 physical_filter="dummy_r", 

1101 group="twelve", 

1102 day_obs=20250101, 

1103 ), 

1104 ) 

1105 registry.insertDimensionData( 

1106 "visit_definition", 

1107 dict(instrument="DummyCam", exposure=100, visit=10), 

1108 dict(instrument="DummyCam", exposure=101, visit=10), 

1109 dict(instrument="DummyCam", exposure=110, visit=11), 

1110 dict(instrument="DummyCam", exposure=111, visit=11), 

1111 dict(instrument="DummyCam", exposure=200, visit=20), 

1112 dict(instrument="DummyCam", exposure=201, visit=20), 

1113 ) 

1114 # dataset types 

1115 run1 = "test1_r" 

1116 run2 = "test2_r" 

1117 tagged2 = "test2_t" 

1118 registry.registerRun(run1) 

1119 registry.registerRun(run2) 

1120 registry.registerCollection(tagged2) 

1121 storageClass = StorageClass("testDataset") 

1122 registry.storageClasses.registerStorageClass(storageClass) 

1123 rawType = DatasetType( 

1124 name="RAW", 

1125 dimensions=registry.dimensions.conform(("instrument", "exposure", "detector")), 

1126 storageClass=storageClass, 

1127 ) 

1128 registry.registerDatasetType(rawType) 

1129 calexpType = DatasetType( 

1130 name="CALEXP", 

1131 dimensions=registry.dimensions.conform(("instrument", "visit", "detector")), 

1132 storageClass=storageClass, 

1133 ) 

1134 registry.registerDatasetType(calexpType) 

1135 

1136 # add pre-existing datasets 

1137 for exposure in (100, 101, 110, 111): 

1138 for detector in (1, 2, 3): 

1139 # note that only 3 of 5 detectors have datasets 

1140 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector) 

1141 (ref,) = registry.insertDatasets(rawType, dataIds=[dataId], run=run1) 

1142 # exposures 100 and 101 appear in both run1 and tagged2. 

1143 # 100 has different datasets in the different collections 

1144 # 101 has the same dataset in both collections. 

1145 if exposure == 100: 

1146 (ref,) = registry.insertDatasets(rawType, dataIds=[dataId], run=run2) 

1147 if exposure in (100, 101): 

1148 registry.associate(tagged2, [ref]) 

1149 # Add pre-existing datasets to tagged2. 

1150 for exposure in (200, 201): 

1151 for detector in (3, 4, 5): 

1152 # note that only 3 of 5 detectors have datasets 

1153 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector) 

1154 (ref,) = registry.insertDatasets(rawType, dataIds=[dataId], run=run2) 

1155 registry.associate(tagged2, [ref]) 

1156 

1157 dimensions = registry.dimensions.conform( 

1158 rawType.dimensions.required.names | calexpType.dimensions.required.names 

1159 ) 

1160 # Test that single dim string works as well as list of str 

1161 rows = registry.queryDataIds("visit", datasets=rawType, collections=run1).expanded().toSet() 

1162 rowsI = registry.queryDataIds(["visit"], datasets=rawType, collections=run1).expanded().toSet() 

1163 self.assertEqual(rows, rowsI) 

1164 # with empty expression 

1165 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1).expanded().toSet() 

1166 self.assertEqual(len(rows), 4 * 3) # 4 exposures times 3 detectors 

1167 self.assertCountEqual({dataId["exposure"] for dataId in rows}, (100, 101, 110, 111)) 

1168 self.assertCountEqual({dataId["visit"] for dataId in rows}, (10, 11)) 

1169 self.assertCountEqual({dataId["detector"] for dataId in rows}, (1, 2, 3)) 

1170 

1171 # second collection 

1172 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=tagged2).toSet() 

1173 self.assertEqual(len(rows), 4 * 3) # 4 exposures times 3 detectors 

1174 for dataId in rows: 

1175 self.assertCountEqual(dataId.dimensions.required, ("instrument", "detector", "exposure", "visit")) 

1176 self.assertCountEqual({dataId["exposure"] for dataId in rows}, (100, 101, 200, 201)) 

1177 self.assertCountEqual({dataId["visit"] for dataId in rows}, (10, 20)) 

1178 self.assertCountEqual({dataId["detector"] for dataId in rows}, (1, 2, 3, 4, 5)) 

1179 

1180 # with two input datasets 

1181 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=[run1, tagged2]).toSet() 

1182 self.assertEqual(len(set(rows)), 6 * 3) # 6 exposures times 3 detectors; set needed to de-dupe 

1183 for dataId in rows: 

1184 self.assertCountEqual(dataId.dimensions.required, ("instrument", "detector", "exposure", "visit")) 

1185 self.assertCountEqual({dataId["exposure"] for dataId in rows}, (100, 101, 110, 111, 200, 201)) 

1186 self.assertCountEqual({dataId["visit"] for dataId in rows}, (10, 11, 20)) 

1187 self.assertCountEqual({dataId["detector"] for dataId in rows}, (1, 2, 3, 4, 5)) 

1188 

1189 # limit to single visit 

1190 rows = registry.queryDataIds( 

1191 dimensions, datasets=rawType, collections=run1, where="visit = 10", instrument="DummyCam" 

1192 ).toSet() 

1193 self.assertEqual(len(rows), 2 * 3) # 2 exposures times 3 detectors 

1194 self.assertCountEqual({dataId["exposure"] for dataId in rows}, (100, 101)) 

1195 self.assertCountEqual({dataId["visit"] for dataId in rows}, (10,)) 

1196 self.assertCountEqual({dataId["detector"] for dataId in rows}, (1, 2, 3)) 

1197 

1198 # more limiting expression, using link names instead of Table.column 

1199 rows = registry.queryDataIds( 

1200 dimensions, 

1201 datasets=rawType, 

1202 collections=run1, 

1203 where="visit = 10 and detector > 1 and 'DummyCam'=instrument", 

1204 ).toSet() 

1205 self.assertEqual(len(rows), 2 * 2) # 2 exposures times 2 detectors 

1206 self.assertCountEqual({dataId["exposure"] for dataId in rows}, (100, 101)) 

1207 self.assertCountEqual({dataId["visit"] for dataId in rows}, (10,)) 

1208 self.assertCountEqual({dataId["detector"] for dataId in rows}, (2, 3)) 

1209 

1210 # queryDataIds with only one of `datasets` and `collections` is an 

1211 # error. 

1212 with self.assertRaises(CollectionError): 

1213 registry.queryDataIds(dimensions, datasets=rawType) 

1214 with self.assertRaises(ArgumentError): 

1215 registry.queryDataIds(dimensions, collections=run1) 

1216 

1217 # expression excludes everything 

1218 rows = registry.queryDataIds( 

1219 dimensions, datasets=rawType, collections=run1, where="visit > 1000", instrument="DummyCam" 

1220 ).toSet() 

1221 self.assertEqual(len(rows), 0) 

1222 

1223 # Selecting by physical_filter, this is not in the dimensions, but it 

1224 # is a part of the full expression so it should work too. 

1225 rows = registry.queryDataIds( 

1226 dimensions, 

1227 datasets=rawType, 

1228 collections=run1, 

1229 where="physical_filter = 'dummy_r'", 

1230 instrument="DummyCam", 

1231 ).toSet() 

1232 self.assertEqual(len(rows), 2 * 3) # 2 exposures times 3 detectors 

1233 self.assertCountEqual({dataId["exposure"] for dataId in rows}, (110, 111)) 

1234 self.assertCountEqual({dataId["visit"] for dataId in rows}, (11,)) 

1235 self.assertCountEqual({dataId["detector"] for dataId in rows}, (1, 2, 3)) 

1236 

1237 def testSkyMapDimensions(self): 

1238 """Tests involving only skymap dimensions, no joins to instrument.""" 

1239 registry = self.makeRegistry() 

1240 

1241 # need a bunch of dimensions and datasets for test, we want 

1242 # "band" in the test so also have to add physical_filter 

1243 # dimensions 

1244 registry.insertDimensionData("instrument", dict(instrument="DummyCam")) 

1245 registry.insertDimensionData( 

1246 "physical_filter", 

1247 dict(instrument="DummyCam", name="dummy_r", band="r"), 

1248 dict(instrument="DummyCam", name="dummy_i", band="i"), 

1249 ) 

1250 registry.insertDimensionData("skymap", dict(name="DummyMap", hash=b"sha!")) 

1251 for tract in range(10): 

1252 registry.insertDimensionData("tract", dict(skymap="DummyMap", id=tract)) 

1253 registry.insertDimensionData( 

1254 "patch", 

1255 *[dict(skymap="DummyMap", tract=tract, id=patch, cell_x=0, cell_y=0) for patch in range(10)], 

1256 ) 

1257 

1258 # dataset types 

1259 run = "tésτ" 

1260 registry.registerRun(run) 

1261 storageClass = StorageClass("testDataset") 

1262 registry.storageClasses.registerStorageClass(storageClass) 

1263 calexpType = DatasetType( 

1264 name="deepCoadd_calexp", 

1265 dimensions=registry.dimensions.conform(("skymap", "tract", "patch", "band")), 

1266 storageClass=storageClass, 

1267 ) 

1268 registry.registerDatasetType(calexpType) 

1269 mergeType = DatasetType( 

1270 name="deepCoadd_mergeDet", 

1271 dimensions=registry.dimensions.conform(("skymap", "tract", "patch")), 

1272 storageClass=storageClass, 

1273 ) 

1274 registry.registerDatasetType(mergeType) 

1275 measType = DatasetType( 

1276 name="deepCoadd_meas", 

1277 dimensions=registry.dimensions.conform(("skymap", "tract", "patch", "band")), 

1278 storageClass=storageClass, 

1279 ) 

1280 registry.registerDatasetType(measType) 

1281 

1282 dimensions = registry.dimensions.conform( 

1283 calexpType.dimensions.required.names 

1284 | mergeType.dimensions.required.names 

1285 | measType.dimensions.required.names 

1286 ) 

1287 

1288 # add pre-existing datasets 

1289 for tract in (1, 3, 5): 

1290 for patch in (2, 4, 6, 7): 

1291 dataId = dict(skymap="DummyMap", tract=tract, patch=patch) 

1292 registry.insertDatasets(mergeType, dataIds=[dataId], run=run) 

1293 for aFilter in ("i", "r"): 

1294 dataId = dict(skymap="DummyMap", tract=tract, patch=patch, band=aFilter) 

1295 registry.insertDatasets(calexpType, dataIds=[dataId], run=run) 

1296 

1297 # with empty expression 

1298 rows = registry.queryDataIds(dimensions, datasets=[calexpType, mergeType], collections=run).toSet() 

1299 self.assertEqual(len(rows), 3 * 4 * 2) # 4 tracts x 4 patches x 2 filters 

1300 for dataId in rows: 

1301 self.assertCountEqual(dataId.dimensions.required, ("skymap", "tract", "patch", "band")) 

1302 self.assertCountEqual({dataId["tract"] for dataId in rows}, (1, 3, 5)) 

1303 self.assertCountEqual({dataId["patch"] for dataId in rows}, (2, 4, 6, 7)) 

1304 self.assertCountEqual({dataId["band"] for dataId in rows}, ("i", "r")) 

1305 

1306 # limit to 2 tracts and 2 patches 

1307 rows = registry.queryDataIds( 

1308 dimensions, 

1309 datasets=[calexpType, mergeType], 

1310 collections=run, 

1311 where="tract IN (1, 5) AND patch IN (2, 7)", 

1312 skymap="DummyMap", 

1313 ).toSet() 

1314 self.assertEqual(len(rows), 2 * 2 * 2) # 2 tracts x 2 patches x 2 filters 

1315 self.assertCountEqual({dataId["tract"] for dataId in rows}, (1, 5)) 

1316 self.assertCountEqual({dataId["patch"] for dataId in rows}, (2, 7)) 

1317 self.assertCountEqual({dataId["band"] for dataId in rows}, ("i", "r")) 

1318 

1319 # limit to single filter 

1320 rows = registry.queryDataIds( 

1321 dimensions, datasets=[calexpType, mergeType], collections=run, where="band = 'i'" 

1322 ).toSet() 

1323 self.assertEqual(len(rows), 3 * 4 * 1) # 4 tracts x 4 patches x 2 filters 

1324 self.assertCountEqual({dataId["tract"] for dataId in rows}, (1, 3, 5)) 

1325 self.assertCountEqual({dataId["patch"] for dataId in rows}, (2, 4, 6, 7)) 

1326 self.assertCountEqual({dataId["band"] for dataId in rows}, ("i",)) 

1327 

1328 # Specifying non-existing skymap is an exception 

1329 with self.assertRaisesRegex(DataIdValueError, "Unknown values specified for governor dimension"): 

1330 rows = registry.queryDataIds( 

1331 dimensions, datasets=[calexpType, mergeType], collections=run, where="skymap = 'Mars'" 

1332 ).toSet() 

1333 

1334 def testSpatialJoin(self): 

1335 """Test queries that involve spatial overlap joins.""" 

1336 registry = self.makeRegistry() 

1337 self.loadData(registry, "hsc-rc2-subset.yaml") 

1338 

1339 # Dictionary of spatial DatabaseDimensionElements, keyed by the name of 

1340 # the TopologicalFamily they belong to. We'll relate all elements in 

1341 # each family to all of the elements in each other family. 

1342 families = defaultdict(set) 

1343 # Dictionary of {element.name: {dataId: region}}. 

1344 regions = {} 

1345 for element in registry.dimensions.database_elements: 

1346 if element.spatial is not None: 

1347 families[element.spatial.name].add(element) 

1348 regions[element.name] = { 

1349 record.dataId: record.region for record in registry.queryDimensionRecords(element) 

1350 } 

1351 

1352 # If this check fails, it's not necessarily a problem - it may just be 

1353 # a reasonable change to the default dimension definitions - but the 

1354 # test below depends on there being more than one family to do anything 

1355 # useful. 

1356 self.assertEqual(len(families), 2) 

1357 

1358 # Overlap DatabaseDimensionElements with each other. 

1359 for family1, family2 in itertools.combinations(families, 2): 

1360 for element1, element2 in itertools.product(families[family1], families[family2]): 

1361 dimensions = element1.minimal_group | element2.minimal_group 

1362 # Construct expected set of overlapping data IDs via a 

1363 # brute-force comparison of the regions we've already fetched. 

1364 expected = { 

1365 DataCoordinate.standardize( 

1366 {**dataId1.required, **dataId2.required}, dimensions=dimensions 

1367 ) 

1368 for (dataId1, region1), (dataId2, region2) in itertools.product( 

1369 regions[element1.name].items(), regions[element2.name].items() 

1370 ) 

1371 if not region1.isDisjointFrom(region2) 

1372 } 

1373 self.assertGreater(len(expected), 2, msg="Test that we aren't just comparing empty sets.") 

1374 queried = set(registry.queryDataIds(dimensions)) 

1375 self.assertEqual(expected, queried) 

1376 

1377 # Overlap each DatabaseDimensionElement with the commonSkyPix system. 

1378 commonSkyPix = registry.dimensions.commonSkyPix 

1379 for elementName, these_regions in regions.items(): 

1380 dimensions = registry.dimensions[elementName].minimal_group | commonSkyPix.minimal_group 

1381 expected = set() 

1382 for dataId, region in these_regions.items(): 

1383 for begin, end in commonSkyPix.pixelization.envelope(region): 

1384 expected.update( 

1385 DataCoordinate.standardize( 

1386 {commonSkyPix.name: index, **dataId.required}, dimensions=dimensions 

1387 ) 

1388 for index in range(begin, end) 

1389 ) 

1390 self.assertGreater(len(expected), 2, msg="Test that we aren't just comparing empty sets.") 

1391 queried = set(registry.queryDataIds(dimensions)) 

1392 self.assertEqual(expected, queried) 

1393 

1394 def testAbstractQuery(self): 

1395 """Test that we can run a query that just lists the known 

1396 bands. This is tricky because band is 

1397 backed by a query against physical_filter. 

1398 """ 

1399 registry = self.makeRegistry() 

1400 registry.insertDimensionData("instrument", dict(name="DummyCam")) 

1401 registry.insertDimensionData( 

1402 "physical_filter", 

1403 dict(instrument="DummyCam", name="dummy_i", band="i"), 

1404 dict(instrument="DummyCam", name="dummy_i2", band="i"), 

1405 dict(instrument="DummyCam", name="dummy_r", band="r"), 

1406 ) 

1407 rows = registry.queryDataIds(["band"]).toSet() 

1408 self.assertCountEqual( 

1409 rows, 

1410 [ 

1411 DataCoordinate.standardize(band="i", universe=registry.dimensions), 

1412 DataCoordinate.standardize(band="r", universe=registry.dimensions), 

1413 ], 

1414 ) 

1415 

1416 def testAttributeManager(self): 

1417 """Test basic functionality of attribute manager.""" 

1418 # number of attributes with schema versions in a fresh database, 

1419 # 6 managers with 2 records per manager, plus config for dimensions 

1420 VERSION_COUNT = 6 * 2 + 1 

1421 

1422 registry = self.makeRegistry() 

1423 attributes = registry._managers.attributes 

1424 

1425 # check what get() returns for non-existing key 

1426 self.assertIsNone(attributes.get("attr")) 

1427 self.assertEqual(attributes.get("attr", ""), "") 

1428 self.assertEqual(attributes.get("attr", "Value"), "Value") 

1429 self.assertEqual(len(list(attributes.items())), VERSION_COUNT) 

1430 

1431 # cannot store empty key or value 

1432 with self.assertRaises(ValueError): 

1433 attributes.set("", "value") 

1434 with self.assertRaises(ValueError): 

1435 attributes.set("attr", "") 

1436 

1437 # set value of non-existing key 

1438 attributes.set("attr", "value") 

1439 self.assertEqual(len(list(attributes.items())), VERSION_COUNT + 1) 

1440 self.assertEqual(attributes.get("attr"), "value") 

1441 

1442 # update value of existing key 

1443 with self.assertRaises(ButlerAttributeExistsError): 

1444 attributes.set("attr", "value2") 

1445 

1446 attributes.set("attr", "value2", force=True) 

1447 self.assertEqual(len(list(attributes.items())), VERSION_COUNT + 1) 

1448 self.assertEqual(attributes.get("attr"), "value2") 

1449 

1450 # delete existing key 

1451 self.assertTrue(attributes.delete("attr")) 

1452 self.assertEqual(len(list(attributes.items())), VERSION_COUNT) 

1453 

1454 # delete non-existing key 

1455 self.assertFalse(attributes.delete("non-attr")) 

1456 

1457 # store bunch of keys and get the list back 

1458 data = [ 

1459 ("version.core", "1.2.3"), 

1460 ("version.dimensions", "3.2.1"), 

1461 ("config.managers.opaque", "ByNameOpaqueTableStorageManager"), 

1462 ] 

1463 for key, value in data: 

1464 attributes.set(key, value) 

1465 items = dict(attributes.items()) 

1466 for key, value in data: 

1467 self.assertEqual(items[key], value) 

1468 

1469 def testQueryDatasetsDeduplication(self): 

1470 """Test that the findFirst option to queryDatasets selects datasets 

1471 from collections in the order given". 

1472 """ 

1473 registry = self.makeRegistry() 

1474 self.loadData(registry, "base.yaml") 

1475 self.loadData(registry, "datasets.yaml") 

1476 self.assertCountEqual( 

1477 list(registry.queryDatasets("bias", collections=["imported_g", "imported_r"])), 

1478 [ 

1479 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"), 

1480 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g"), 

1481 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g"), 

1482 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r"), 

1483 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r"), 

1484 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

1485 ], 

1486 ) 

1487 self.assertCountEqual( 

1488 list(registry.queryDatasets("bias", collections=["imported_g", "imported_r"], findFirst=True)), 

1489 [ 

1490 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"), 

1491 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g"), 

1492 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g"), 

1493 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

1494 ], 

1495 ) 

1496 self.assertCountEqual( 

1497 list(registry.queryDatasets("bias", collections=["imported_r", "imported_g"], findFirst=True)), 

1498 [ 

1499 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"), 

1500 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r"), 

1501 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r"), 

1502 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

1503 ], 

1504 ) 

1505 

1506 def testQueryResults(self): 

1507 """Test querying for data IDs and then manipulating the QueryResults 

1508 object returned to perform other queries. 

1509 """ 

1510 registry = self.makeRegistry() 

1511 self.loadData(registry, "base.yaml") 

1512 self.loadData(registry, "datasets.yaml") 

1513 bias = registry.getDatasetType("bias") 

1514 flat = registry.getDatasetType("flat") 

1515 # Obtain expected results from methods other than those we're testing 

1516 # here. That includes: 

1517 # - the dimensions of the data IDs we want to query: 

1518 expected_dimensions = registry.dimensions.conform(["detector", "physical_filter"]) 

1519 # - the dimensions of some other data IDs we'll extract from that: 

1520 expected_subset_dimensions = registry.dimensions.conform(["detector"]) 

1521 # - the data IDs we expect to obtain from the first queries: 

1522 expectedDataIds = DataCoordinateSet( 

1523 { 

1524 DataCoordinate.standardize( 

1525 instrument="Cam1", detector=d, physical_filter=p, universe=registry.dimensions 

1526 ) 

1527 for d, p in itertools.product({1, 2, 3}, {"Cam1-G", "Cam1-R1", "Cam1-R2"}) 

1528 }, 

1529 dimensions=expected_dimensions, 

1530 hasFull=False, 

1531 hasRecords=False, 

1532 ) 

1533 # - the flat datasets we expect to find from those data IDs, in just 

1534 # one collection (so deduplication is irrelevant): 

1535 expectedFlats = [ 

1536 registry.findDataset( 

1537 flat, instrument="Cam1", detector=1, physical_filter="Cam1-R1", collections="imported_r" 

1538 ), 

1539 registry.findDataset( 

1540 flat, instrument="Cam1", detector=2, physical_filter="Cam1-R1", collections="imported_r" 

1541 ), 

1542 registry.findDataset( 

1543 flat, instrument="Cam1", detector=3, physical_filter="Cam1-R2", collections="imported_r" 

1544 ), 

1545 ] 

1546 # - the data IDs we expect to extract from that: 

1547 expectedSubsetDataIds = expectedDataIds.subset(expected_subset_dimensions) 

1548 # - the bias datasets we expect to find from those data IDs, after we 

1549 # subset-out the physical_filter dimension, both with duplicates: 

1550 expectedAllBiases = [ 

1551 registry.findDataset(bias, instrument="Cam1", detector=1, collections="imported_g"), 

1552 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_g"), 

1553 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_g"), 

1554 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_r"), 

1555 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_r"), 

1556 ] 

1557 # - ...and without duplicates: 

1558 expectedDeduplicatedBiases = [ 

1559 registry.findDataset(bias, instrument="Cam1", detector=1, collections="imported_g"), 

1560 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_r"), 

1561 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_r"), 

1562 ] 

1563 # Test against those expected results, using a "lazy" query for the 

1564 # data IDs (which re-executes that query each time we use it to do 

1565 # something new). 

1566 dataIds = registry.queryDataIds( 

1567 ["detector", "physical_filter"], 

1568 where="detector.purpose = 'SCIENCE'", # this rejects detector=4 

1569 instrument="Cam1", 

1570 ) 

1571 self.assertEqual(dataIds.dimensions, expected_dimensions) 

1572 self.assertEqual(dataIds.toSet(), expectedDataIds) 

1573 self.assertCountEqual( 

1574 list( 

1575 dataIds.findDatasets( 

1576 flat, 

1577 collections=["imported_r"], 

1578 ) 

1579 ), 

1580 expectedFlats, 

1581 ) 

1582 subsetDataIds = dataIds.subset(expected_subset_dimensions, unique=True) 

1583 self.assertEqual(subsetDataIds.dimensions, expected_subset_dimensions) 

1584 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds) 

1585 self.assertCountEqual( 

1586 list(subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=False)), 

1587 expectedAllBiases, 

1588 ) 

1589 self.assertCountEqual( 

1590 list(subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=True)), 

1591 expectedDeduplicatedBiases, 

1592 ) 

1593 

1594 # Searching for a dataset with dimensions we had projected away 

1595 # restores those dimensions. 

1596 self.assertCountEqual( 

1597 list(subsetDataIds.findDatasets("flat", collections=["imported_r"], findFirst=True)), 

1598 expectedFlats, 

1599 ) 

1600 

1601 # Use a named dataset type that does not exist and a dataset type 

1602 # object that does not exist. 

1603 unknown_type = DatasetType("not_known", dimensions=bias.dimensions, storageClass="Exposure") 

1604 

1605 # Test both string name and dataset type object. 

1606 test_type: str | DatasetType 

1607 for test_type, test_type_name in ( 

1608 (unknown_type, unknown_type.name), 

1609 (unknown_type.name, unknown_type.name), 

1610 ): 

1611 with self.assertRaisesRegex(DatasetTypeError, expected_regex=test_type_name): 

1612 list( 

1613 subsetDataIds.findDatasets( 

1614 test_type, collections=["imported_r", "imported_g"], findFirst=True 

1615 ) 

1616 ) 

1617 

1618 # Materialize the bias dataset queries (only) by putting the results 

1619 # into temporary tables, then repeat those tests. 

1620 with subsetDataIds.findDatasets( 

1621 bias, collections=["imported_r", "imported_g"], findFirst=False 

1622 ).materialize() as biases: 

1623 self.assertCountEqual(list(biases), expectedAllBiases) 

1624 with subsetDataIds.findDatasets( 

1625 bias, collections=["imported_r", "imported_g"], findFirst=True 

1626 ).materialize() as biases: 

1627 self.assertCountEqual(list(biases), expectedDeduplicatedBiases) 

1628 # Materialize the data ID subset query, but not the dataset queries. 

1629 with subsetDataIds.materialize() as subsetDataIds: 

1630 self.assertEqual(subsetDataIds.dimensions, expected_subset_dimensions) 

1631 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds) 

1632 self.assertCountEqual( 

1633 list( 

1634 subsetDataIds.findDatasets( 

1635 bias, collections=["imported_r", "imported_g"], findFirst=False 

1636 ) 

1637 ), 

1638 expectedAllBiases, 

1639 ) 

1640 self.assertCountEqual( 

1641 list( 

1642 subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=True) 

1643 ), 

1644 expectedDeduplicatedBiases, 

1645 ) 

1646 # Materialize the dataset queries, too. 

1647 with subsetDataIds.findDatasets( 

1648 bias, collections=["imported_r", "imported_g"], findFirst=False 

1649 ).materialize() as biases: 

1650 self.assertCountEqual(list(biases), expectedAllBiases) 

1651 with subsetDataIds.findDatasets( 

1652 bias, collections=["imported_r", "imported_g"], findFirst=True 

1653 ).materialize() as biases: 

1654 self.assertCountEqual(list(biases), expectedDeduplicatedBiases) 

1655 # Materialize the original query, but none of the follow-up queries. 

1656 with dataIds.materialize() as dataIds: 

1657 self.assertEqual(dataIds.dimensions, expected_dimensions) 

1658 self.assertEqual(dataIds.toSet(), expectedDataIds) 

1659 self.assertCountEqual( 

1660 list( 

1661 dataIds.findDatasets( 

1662 flat, 

1663 collections=["imported_r"], 

1664 ) 

1665 ), 

1666 expectedFlats, 

1667 ) 

1668 subsetDataIds = dataIds.subset(expected_subset_dimensions, unique=True) 

1669 self.assertEqual(subsetDataIds.dimensions, expected_subset_dimensions) 

1670 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds) 

1671 self.assertCountEqual( 

1672 list( 

1673 subsetDataIds.findDatasets( 

1674 bias, collections=["imported_r", "imported_g"], findFirst=False 

1675 ) 

1676 ), 

1677 expectedAllBiases, 

1678 ) 

1679 self.assertCountEqual( 

1680 list( 

1681 subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=True) 

1682 ), 

1683 expectedDeduplicatedBiases, 

1684 ) 

1685 # Materialize just the bias dataset queries. 

1686 with subsetDataIds.findDatasets( 

1687 bias, collections=["imported_r", "imported_g"], findFirst=False 

1688 ).materialize() as biases: 

1689 self.assertCountEqual(list(biases), expectedAllBiases) 

1690 with subsetDataIds.findDatasets( 

1691 bias, collections=["imported_r", "imported_g"], findFirst=True 

1692 ).materialize() as biases: 

1693 self.assertCountEqual(list(biases), expectedDeduplicatedBiases) 

1694 # Materialize the subset data ID query, but not the dataset 

1695 # queries. 

1696 with subsetDataIds.materialize() as subsetDataIds: 

1697 self.assertEqual(subsetDataIds.dimensions, expected_subset_dimensions) 

1698 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds) 

1699 self.assertCountEqual( 

1700 list( 

1701 subsetDataIds.findDatasets( 

1702 bias, collections=["imported_r", "imported_g"], findFirst=False 

1703 ) 

1704 ), 

1705 expectedAllBiases, 

1706 ) 

1707 self.assertCountEqual( 

1708 list( 

1709 subsetDataIds.findDatasets( 

1710 bias, collections=["imported_r", "imported_g"], findFirst=True 

1711 ) 

1712 ), 

1713 expectedDeduplicatedBiases, 

1714 ) 

1715 # Materialize the bias dataset queries, too, so now we're 

1716 # materializing every single step. 

1717 with subsetDataIds.findDatasets( 

1718 bias, collections=["imported_r", "imported_g"], findFirst=False 

1719 ).materialize() as biases: 

1720 self.assertCountEqual(list(biases), expectedAllBiases) 

1721 with subsetDataIds.findDatasets( 

1722 bias, collections=["imported_r", "imported_g"], findFirst=True 

1723 ).materialize() as biases: 

1724 self.assertCountEqual(list(biases), expectedDeduplicatedBiases) 

1725 

1726 def testStorageClassPropagation(self): 

1727 """Test that queries for datasets respect the storage class passed in 

1728 as part of a full dataset type. 

1729 """ 

1730 registry = self.makeRegistry() 

1731 self.loadData(registry, "base.yaml") 

1732 dataset_type_in_registry = DatasetType( 

1733 "tbl", dimensions=["instrument"], storageClass="Packages", universe=registry.dimensions 

1734 ) 

1735 registry.registerDatasetType(dataset_type_in_registry) 

1736 run = "run1" 

1737 registry.registerRun(run) 

1738 (inserted_ref,) = registry.insertDatasets( 

1739 dataset_type_in_registry, [registry.expandDataId(instrument="Cam1")], run=run 

1740 ) 

1741 self.assertEqual(inserted_ref.datasetType, dataset_type_in_registry) 

1742 query_dataset_type = DatasetType( 

1743 "tbl", dimensions=["instrument"], storageClass="StructuredDataDict", universe=registry.dimensions 

1744 ) 

1745 self.assertNotEqual(dataset_type_in_registry, query_dataset_type) 

1746 query_datasets_result = registry.queryDatasets(query_dataset_type, collections=[run]) 

1747 self.assertEqual(query_datasets_result.parentDatasetType, query_dataset_type) # type: ignore 

1748 (query_datasets_ref,) = query_datasets_result 

1749 self.assertEqual(query_datasets_ref.datasetType, query_dataset_type) 

1750 query_data_ids_find_datasets_result = registry.queryDataIds(["instrument"]).findDatasets( 

1751 query_dataset_type, collections=[run] 

1752 ) 

1753 self.assertEqual(query_data_ids_find_datasets_result.parentDatasetType, query_dataset_type) 

1754 (query_data_ids_find_datasets_ref,) = query_data_ids_find_datasets_result 

1755 self.assertEqual(query_data_ids_find_datasets_ref.datasetType, query_dataset_type) 

1756 query_dataset_types_result = registry.queryDatasetTypes(query_dataset_type) 

1757 self.assertEqual(list(query_dataset_types_result), [query_dataset_type]) 

1758 find_dataset_ref = registry.findDataset(query_dataset_type, instrument="Cam1", collections=[run]) 

1759 self.assertEqual(find_dataset_ref.datasetType, query_dataset_type) 

1760 

1761 def testEmptyDimensionsQueries(self): 

1762 """Test Query and QueryResults objects in the case where there are no 

1763 dimensions. 

1764 """ 

1765 # Set up test data: one dataset type, two runs, one dataset in each. 

1766 registry = self.makeRegistry() 

1767 self.loadData(registry, "base.yaml") 

1768 schema = DatasetType("schema", dimensions=registry.dimensions.empty, storageClass="Catalog") 

1769 registry.registerDatasetType(schema) 

1770 dataId = DataCoordinate.make_empty(registry.dimensions) 

1771 run1 = "run1" 

1772 run2 = "run2" 

1773 registry.registerRun(run1) 

1774 registry.registerRun(run2) 

1775 (dataset1,) = registry.insertDatasets(schema, dataIds=[dataId], run=run1) 

1776 (dataset2,) = registry.insertDatasets(schema, dataIds=[dataId], run=run2) 

1777 # Query directly for both of the datasets, and each one, one at a time. 

1778 self.checkQueryResults( 

1779 registry.queryDatasets(schema, collections=[run1, run2], findFirst=False), [dataset1, dataset2] 

1780 ) 

1781 self.checkQueryResults( 

1782 registry.queryDatasets(schema, collections=[run1, run2], findFirst=True), 

1783 [dataset1], 

1784 ) 

1785 self.checkQueryResults( 

1786 registry.queryDatasets(schema, collections=[run2, run1], findFirst=True), 

1787 [dataset2], 

1788 ) 

1789 # Query for data IDs with no dimensions. 

1790 dataIds = registry.queryDataIds([]) 

1791 self.checkQueryResults(dataIds, [dataId]) 

1792 # Use queried data IDs to find the datasets. 

1793 self.checkQueryResults( 

1794 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False), 

1795 [dataset1, dataset2], 

1796 ) 

1797 self.checkQueryResults( 

1798 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), 

1799 [dataset1], 

1800 ) 

1801 self.checkQueryResults( 

1802 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), 

1803 [dataset2], 

1804 ) 

1805 # Now materialize the data ID query results and repeat those tests. 

1806 with dataIds.materialize() as dataIds: 

1807 self.checkQueryResults(dataIds, [dataId]) 

1808 self.checkQueryResults( 

1809 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), 

1810 [dataset1], 

1811 ) 

1812 self.checkQueryResults( 

1813 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), 

1814 [dataset2], 

1815 ) 

1816 # Query for non-empty data IDs, then subset that to get the empty one. 

1817 # Repeat the above tests starting from that. 

1818 dataIds = registry.queryDataIds(["instrument"]).subset(registry.dimensions.empty, unique=True) 

1819 self.checkQueryResults(dataIds, [dataId]) 

1820 self.checkQueryResults( 

1821 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False), 

1822 [dataset1, dataset2], 

1823 ) 

1824 self.checkQueryResults( 

1825 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), 

1826 [dataset1], 

1827 ) 

1828 self.checkQueryResults( 

1829 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), 

1830 [dataset2], 

1831 ) 

1832 with dataIds.materialize() as dataIds: 

1833 self.checkQueryResults(dataIds, [dataId]) 

1834 self.checkQueryResults( 

1835 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False), 

1836 [dataset1, dataset2], 

1837 ) 

1838 self.checkQueryResults( 

1839 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), 

1840 [dataset1], 

1841 ) 

1842 self.checkQueryResults( 

1843 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), 

1844 [dataset2], 

1845 ) 

1846 # Query for non-empty data IDs, then materialize, then subset to get 

1847 # the empty one. Repeat again. 

1848 with registry.queryDataIds(["instrument"]).materialize() as nonEmptyDataIds: 

1849 dataIds = nonEmptyDataIds.subset(registry.dimensions.empty, unique=True) 

1850 self.checkQueryResults(dataIds, [dataId]) 

1851 self.checkQueryResults( 

1852 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False), 

1853 [dataset1, dataset2], 

1854 ) 

1855 self.checkQueryResults( 

1856 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), 

1857 [dataset1], 

1858 ) 

1859 self.checkQueryResults( 

1860 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), 

1861 [dataset2], 

1862 ) 

1863 with dataIds.materialize() as dataIds: 

1864 self.checkQueryResults(dataIds, [dataId]) 

1865 self.checkQueryResults( 

1866 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False), 

1867 [dataset1, dataset2], 

1868 ) 

1869 self.checkQueryResults( 

1870 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), 

1871 [dataset1], 

1872 ) 

1873 self.checkQueryResults( 

1874 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), 

1875 [dataset2], 

1876 ) 

1877 # Repeat the materialization tests with a dimension element that isn't 

1878 # cached, so there's no way we can know when building the query where 

1879 # there are any rows are not (there aren't). 

1880 dataIds = registry.queryDataIds(["exposure"]).subset(registry.dimensions.empty, unique=True) 

1881 with dataIds.materialize() as dataIds: 

1882 self.checkQueryResults(dataIds, []) 

1883 self.checkQueryResults( 

1884 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False), [] 

1885 ) 

1886 self.checkQueryResults(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), []) 

1887 self.checkQueryResults(dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), []) 

1888 # Query for non-empty data IDs with a constraint on an empty-data-ID 

1889 # dataset that exists. 

1890 dataIds = registry.queryDataIds(["instrument"], datasets="schema", collections=...) 

1891 self.checkQueryResults( 

1892 dataIds.subset(unique=True), 

1893 [DataCoordinate.standardize(instrument="Cam1", universe=registry.dimensions)], 

1894 ) 

1895 # Again query for non-empty data IDs with a constraint on empty-data-ID 

1896 # datasets, but when the datasets don't exist. We delete the existing 

1897 # dataset and query just that collection rather than creating a new 

1898 # empty collection because this is a bit less likely for our build-time 

1899 # logic to shortcut-out (via the collection summaries), and such a 

1900 # shortcut would make this test a bit more trivial than we'd like. 

1901 registry.removeDatasets([dataset2]) 

1902 dataIds = registry.queryDataIds(["instrument"], datasets="schema", collections=run2) 

1903 self.checkQueryResults(dataIds, []) 

1904 

1905 def testDimensionDataModifications(self): 

1906 """Test that modifying dimension records via: 

1907 syncDimensionData(..., update=True) and 

1908 insertDimensionData(..., replace=True) works as expected, even in the 

1909 presence of datasets using those dimensions and spatial overlap 

1910 relationships. 

1911 """ 

1912 

1913 def _unpack_range_set(ranges: lsst.sphgeom.RangeSet) -> Iterator[int]: 

1914 """Unpack a sphgeom.RangeSet into the integers it contains.""" 

1915 for begin, end in ranges: 

1916 yield from range(begin, end) 

1917 

1918 def _range_set_hull( 

1919 ranges: lsst.sphgeom.RangeSet, 

1920 pixelization: lsst.sphgeom.HtmPixelization, 

1921 ) -> lsst.sphgeom.ConvexPolygon: 

1922 """Create a ConvexPolygon hull of the region defined by a set of 

1923 HTM pixelization index ranges. 

1924 """ 

1925 points = [] 

1926 for index in _unpack_range_set(ranges): 

1927 points.extend(pixelization.triangle(index).getVertices()) 

1928 return lsst.sphgeom.ConvexPolygon(points) 

1929 

1930 # Use HTM to set up an initial parent region (one arbitrary trixel) 

1931 # and four child regions (the trixels within the parent at the next 

1932 # level. We'll use the parent as a tract/visit region and the children 

1933 # as its patch/visit_detector regions. 

1934 registry = self.makeRegistry() 

1935 htm6 = registry.dimensions.skypix["htm"][6].pixelization 

1936 commonSkyPix = registry.dimensions.commonSkyPix.pixelization 

1937 index = 12288 

1938 child_ranges_small = lsst.sphgeom.RangeSet(index).scaled(4) 

1939 assert htm6.universe().contains(child_ranges_small) 

1940 child_regions_small = [htm6.triangle(i) for i in _unpack_range_set(child_ranges_small)] 

1941 parent_region_small = lsst.sphgeom.ConvexPolygon( 

1942 list(itertools.chain.from_iterable(c.getVertices() for c in child_regions_small)) 

1943 ) 

1944 assert all(parent_region_small.contains(c) for c in child_regions_small) 

1945 # Make a larger version of each child region, defined to be the set of 

1946 # htm6 trixels that overlap the original's bounding circle. Make a new 

1947 # parent that's the convex hull of the new children. 

1948 child_regions_large = [ 

1949 _range_set_hull(htm6.envelope(c.getBoundingCircle()), htm6) for c in child_regions_small 

1950 ] 

1951 assert all( 

1952 large.contains(small) 

1953 for large, small in zip(child_regions_large, child_regions_small, strict=True) 

1954 ) 

1955 parent_region_large = lsst.sphgeom.ConvexPolygon( 

1956 list(itertools.chain.from_iterable(c.getVertices() for c in child_regions_large)) 

1957 ) 

1958 assert all(parent_region_large.contains(c) for c in child_regions_large) 

1959 assert parent_region_large.contains(parent_region_small) 

1960 assert not parent_region_small.contains(parent_region_large) 

1961 assert not all(parent_region_small.contains(c) for c in child_regions_large) 

1962 # Find some commonSkyPix indices that overlap the large regions but not 

1963 # overlap the small regions. We use commonSkyPix here to make sure the 

1964 # real tests later involve what's in the database, not just post-query 

1965 # filtering of regions. 

1966 child_difference_indices = [] 

1967 for large, small in zip(child_regions_large, child_regions_small, strict=True): 

1968 difference = list(_unpack_range_set(commonSkyPix.envelope(large) - commonSkyPix.envelope(small))) 

1969 assert difference, "if this is empty, we can't test anything useful with these regions" 

1970 assert all( 

1971 not commonSkyPix.triangle(d).isDisjointFrom(large) 

1972 and commonSkyPix.triangle(d).isDisjointFrom(small) 

1973 for d in difference 

1974 ) 

1975 child_difference_indices.append(difference) 

1976 parent_difference_indices = list( 

1977 _unpack_range_set( 

1978 commonSkyPix.envelope(parent_region_large) - commonSkyPix.envelope(parent_region_small) 

1979 ) 

1980 ) 

1981 assert parent_difference_indices, "if this is empty, we can't test anything useful with these regions" 

1982 assert all( 

1983 ( 

1984 not commonSkyPix.triangle(d).isDisjointFrom(parent_region_large) 

1985 and commonSkyPix.triangle(d).isDisjointFrom(parent_region_small) 

1986 ) 

1987 for d in parent_difference_indices 

1988 ) 

1989 # Now that we've finally got those regions, we'll insert the large ones 

1990 # as tract/patch dimension records. 

1991 skymap_name = "testing_v1" 

1992 registry.insertDimensionData( 

1993 "skymap", 

1994 { 

1995 "name": skymap_name, 

1996 "hash": bytes([42]), 

1997 "tract_max": 1, 

1998 "patch_nx_max": 2, 

1999 "patch_ny_max": 2, 

2000 }, 

2001 ) 

2002 registry.insertDimensionData("tract", {"skymap": skymap_name, "id": 0, "region": parent_region_large}) 

2003 registry.insertDimensionData( 

2004 "patch", 

2005 *[ 

2006 {"skymap": skymap_name, "tract": 0, "id": n, "cell_x": n % 2, "cell_y": n // 2, "region": c} 

2007 for n, c in enumerate(child_regions_large) 

2008 ], 

2009 ) 

2010 # Add at dataset that uses these dimensions to make sure that modifying 

2011 # them doesn't disrupt foreign keys (need to make sure DB doesn't 

2012 # implement insert with replace=True as delete-then-insert). 

2013 dataset_type = DatasetType( 

2014 "coadd", 

2015 dimensions=["tract", "patch"], 

2016 universe=registry.dimensions, 

2017 storageClass="Exposure", 

2018 ) 

2019 registry.registerDatasetType(dataset_type) 

2020 registry.registerCollection("the_run", CollectionType.RUN) 

2021 registry.insertDatasets( 

2022 dataset_type, 

2023 [{"skymap": skymap_name, "tract": 0, "patch": 2}], 

2024 run="the_run", 

2025 ) 

2026 # Query for tracts and patches that overlap some "difference" htm9 

2027 # pixels; there should be overlaps, because the database has 

2028 # the "large" suite of regions. 

2029 self.assertEqual( 

2030 {0}, 

2031 { 

2032 data_id["tract"] 

2033 for data_id in registry.queryDataIds( 

2034 ["tract"], 

2035 skymap=skymap_name, 

2036 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]}, 

2037 ) 

2038 }, 

2039 ) 

2040 for patch_id, patch_difference_indices in enumerate(child_difference_indices): 

2041 self.assertIn( 

2042 patch_id, 

2043 { 

2044 data_id["patch"] 

2045 for data_id in registry.queryDataIds( 

2046 ["patch"], 

2047 skymap=skymap_name, 

2048 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]}, 

2049 ) 

2050 }, 

2051 ) 

2052 # Use sync to update the tract region and insert to update the regions 

2053 # of the patches, to the "small" suite. 

2054 updated = registry.syncDimensionData( 

2055 "tract", 

2056 {"skymap": skymap_name, "id": 0, "region": parent_region_small}, 

2057 update=True, 

2058 ) 

2059 self.assertEqual(updated, {"region": parent_region_large}) 

2060 registry.insertDimensionData( 

2061 "patch", 

2062 *[ 

2063 {"skymap": skymap_name, "tract": 0, "id": n, "cell_x": n % 2, "cell_y": n // 2, "region": c} 

2064 for n, c in enumerate(child_regions_small) 

2065 ], 

2066 replace=True, 

2067 ) 

2068 # Query again; there now should be no such overlaps, because the 

2069 # database has the "small" suite of regions. 

2070 self.assertFalse( 

2071 set( 

2072 registry.queryDataIds( 

2073 ["tract"], 

2074 skymap=skymap_name, 

2075 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]}, 

2076 ) 

2077 ) 

2078 ) 

2079 for patch_id, patch_difference_indices in enumerate(child_difference_indices): 

2080 self.assertNotIn( 

2081 patch_id, 

2082 { 

2083 data_id["patch"] 

2084 for data_id in registry.queryDataIds( 

2085 ["patch"], 

2086 skymap=skymap_name, 

2087 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]}, 

2088 ) 

2089 }, 

2090 ) 

2091 # Update back to the large regions and query one more time. 

2092 updated = registry.syncDimensionData( 

2093 "tract", 

2094 {"skymap": skymap_name, "id": 0, "region": parent_region_large}, 

2095 update=True, 

2096 ) 

2097 self.assertEqual(updated, {"region": parent_region_small}) 

2098 registry.insertDimensionData( 

2099 "patch", 

2100 *[ 

2101 {"skymap": skymap_name, "tract": 0, "id": n, "cell_x": n % 2, "cell_y": n // 2, "region": c} 

2102 for n, c in enumerate(child_regions_large) 

2103 ], 

2104 replace=True, 

2105 ) 

2106 self.assertEqual( 

2107 {0}, 

2108 { 

2109 data_id["tract"] 

2110 for data_id in registry.queryDataIds( 

2111 ["tract"], 

2112 skymap=skymap_name, 

2113 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]}, 

2114 ) 

2115 }, 

2116 ) 

2117 for patch_id, patch_difference_indices in enumerate(child_difference_indices): 

2118 self.assertIn( 

2119 patch_id, 

2120 { 

2121 data_id["patch"] 

2122 for data_id in registry.queryDataIds( 

2123 ["patch"], 

2124 skymap=skymap_name, 

2125 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]}, 

2126 ) 

2127 }, 

2128 ) 

2129 

2130 def testCalibrationCollections(self): 

2131 """Test operations on `~CollectionType.CALIBRATION` collections, 

2132 including `SqlRegistry.certify`, `SqlRegistry.decertify`, 

2133 `SqlRegistry.findDataset`, and 

2134 `DataCoordinateQueryResults.findRelatedDatasets`. 

2135 """ 

2136 # Setup - make a Registry, fill it with some datasets in 

2137 # non-calibration collections. 

2138 registry = self.makeRegistry() 

2139 self.loadData(registry, "base.yaml") 

2140 self.loadData(registry, "datasets.yaml") 

2141 # Set up some timestamps. 

2142 t1 = astropy.time.Time("2020-01-01T01:00:00", format="isot", scale="tai") 

2143 t2 = astropy.time.Time("2020-01-01T02:00:00", format="isot", scale="tai") 

2144 t3 = astropy.time.Time("2020-01-01T03:00:00", format="isot", scale="tai") 

2145 t4 = astropy.time.Time("2020-01-01T04:00:00", format="isot", scale="tai") 

2146 t5 = astropy.time.Time("2020-01-01T05:00:00", format="isot", scale="tai") 

2147 allTimespans = [ 

2148 Timespan(a, b) for a, b in itertools.combinations([None, t1, t2, t3, t4, t5, None], r=2) 

2149 ] 

2150 # Insert some exposure records with timespans between each sequential 

2151 # pair of those. 

2152 registry.insertDimensionData( 

2153 "day_obs", {"instrument": "Cam1", "id": 20200101, "timespan": Timespan(t1, t5)} 

2154 ) 

2155 registry.insertDimensionData( 

2156 "group", 

2157 {"instrument": "Cam1", "name": "group0"}, 

2158 {"instrument": "Cam1", "name": "group1"}, 

2159 {"instrument": "Cam1", "name": "group2"}, 

2160 {"instrument": "Cam1", "name": "group3"}, 

2161 ) 

2162 registry.insertDimensionData( 

2163 "exposure", 

2164 { 

2165 "instrument": "Cam1", 

2166 "id": 0, 

2167 "group": "group0", 

2168 "obs_id": "zero", 

2169 "physical_filter": "Cam1-G", 

2170 "day_obs": 20200101, 

2171 "timespan": Timespan(t1, t2), 

2172 }, 

2173 { 

2174 "instrument": "Cam1", 

2175 "id": 1, 

2176 "group": "group1", 

2177 "obs_id": "one", 

2178 "physical_filter": "Cam1-G", 

2179 "day_obs": 20200101, 

2180 "timespan": Timespan(t2, t3), 

2181 }, 

2182 { 

2183 "instrument": "Cam1", 

2184 "id": 2, 

2185 "group": "group2", 

2186 "obs_id": "two", 

2187 "physical_filter": "Cam1-G", 

2188 "day_obs": 20200101, 

2189 "timespan": Timespan(t3, t4), 

2190 }, 

2191 { 

2192 "instrument": "Cam1", 

2193 "id": 3, 

2194 "group": "group3", 

2195 "obs_id": "three", 

2196 "physical_filter": "Cam1-G", 

2197 "day_obs": 20200101, 

2198 "timespan": Timespan(t4, t5), 

2199 }, 

2200 ) 

2201 # Get references to some datasets. 

2202 bias2a = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g") 

2203 bias3a = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g") 

2204 bias2b = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r") 

2205 bias3b = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r") 

2206 # Register the main calibration collection we'll be working with. 

2207 collection = "Cam1/calibs/default" 

2208 registry.registerCollection(collection, type=CollectionType.CALIBRATION) 

2209 # Cannot associate into a calibration collection (no timespan). 

2210 with self.assertRaises(CollectionTypeError): 

2211 registry.associate(collection, [bias2a]) 

2212 # Certify 2a dataset with [t2, t4) validity. 

2213 registry.certify(collection, [bias2a], Timespan(begin=t2, end=t4)) 

2214 # Test that we can query for this dataset via the new collection, both 

2215 # on its own and with a RUN collection. 

2216 self.assertEqual( 

2217 set(registry.queryDatasets("bias", findFirst=False, collections=collection)), 

2218 {bias2a}, 

2219 ) 

2220 self.assertEqual( 

2221 set(registry.queryDatasets("bias", findFirst=False, collections=[collection, "imported_r"])), 

2222 { 

2223 bias2a, 

2224 bias2b, 

2225 bias3b, 

2226 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

2227 }, 

2228 ) 

2229 self.assertEqual( 

2230 set(registry.queryDataIds("detector", datasets="bias", collections=collection)), 

2231 {registry.expandDataId(instrument="Cam1", detector=2)}, 

2232 ) 

2233 self.assertEqual( 

2234 set(registry.queryDataIds("detector", datasets="bias", collections=[collection, "imported_r"])), 

2235 { 

2236 registry.expandDataId(instrument="Cam1", detector=2), 

2237 registry.expandDataId(instrument="Cam1", detector=3), 

2238 registry.expandDataId(instrument="Cam1", detector=4), 

2239 }, 

2240 ) 

2241 self.assertEqual( 

2242 set( 

2243 registry.queryDataIds(["exposure", "detector"]).findRelatedDatasets( 

2244 "bias", findFirst=True, collections=[collection] 

2245 ) 

2246 ), 

2247 { 

2248 (registry.expandDataId(instrument="Cam1", detector=2, exposure=1), bias2a), 

2249 (registry.expandDataId(instrument="Cam1", detector=2, exposure=2), bias2a), 

2250 }, 

2251 ) 

2252 self.assertEqual( 

2253 set( 

2254 registry.queryDataIds( 

2255 ["exposure", "detector"], instrument="Cam1", detector=2 

2256 ).findRelatedDatasets("bias", findFirst=True, collections=[collection, "imported_r"]) 

2257 ), 

2258 { 

2259 (registry.expandDataId(instrument="Cam1", detector=2, exposure=1), bias2a), 

2260 (registry.expandDataId(instrument="Cam1", detector=2, exposure=2), bias2a), 

2261 (registry.expandDataId(instrument="Cam1", detector=2, exposure=0), bias2b), 

2262 (registry.expandDataId(instrument="Cam1", detector=2, exposure=3), bias2b), 

2263 }, 

2264 ) 

2265 

2266 # We should not be able to certify 2b with anything overlapping that 

2267 # window. 

2268 with self.assertRaises(ConflictingDefinitionError): 

2269 registry.certify(collection, [bias2b], Timespan(begin=None, end=t3)) 

2270 with self.assertRaises(ConflictingDefinitionError): 

2271 registry.certify(collection, [bias2b], Timespan(begin=None, end=t5)) 

2272 with self.assertRaises(ConflictingDefinitionError): 

2273 registry.certify(collection, [bias2b], Timespan(begin=t1, end=t3)) 

2274 with self.assertRaises(ConflictingDefinitionError): 

2275 registry.certify(collection, [bias2b], Timespan(begin=t1, end=t5)) 

2276 with self.assertRaises(ConflictingDefinitionError): 

2277 registry.certify(collection, [bias2b], Timespan(begin=t1, end=None)) 

2278 with self.assertRaises(ConflictingDefinitionError): 

2279 registry.certify(collection, [bias2b], Timespan(begin=t2, end=t3)) 

2280 with self.assertRaises(ConflictingDefinitionError): 

2281 registry.certify(collection, [bias2b], Timespan(begin=t2, end=t5)) 

2282 with self.assertRaises(ConflictingDefinitionError): 

2283 registry.certify(collection, [bias2b], Timespan(begin=t2, end=None)) 

2284 # We should be able to certify 3a with a range overlapping that window, 

2285 # because it's for a different detector. 

2286 # We'll certify 3a over [t1, t3). 

2287 registry.certify(collection, [bias3a], Timespan(begin=t1, end=t3)) 

2288 # Now we'll certify 2b and 3b together over [t4, ∞). 

2289 registry.certify(collection, [bias2b, bias3b], Timespan(begin=t4, end=None)) 

2290 

2291 # Fetch all associations and check that they are what we expect. 

2292 self.assertCountEqual( 

2293 list( 

2294 registry.queryDatasetAssociations( 

2295 "bias", 

2296 collections=[collection, "imported_g", "imported_r"], 

2297 ) 

2298 ), 

2299 [ 

2300 DatasetAssociation( 

2301 ref=registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"), 

2302 collection="imported_g", 

2303 timespan=None, 

2304 ), 

2305 DatasetAssociation( 

2306 ref=registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

2307 collection="imported_r", 

2308 timespan=None, 

2309 ), 

2310 DatasetAssociation(ref=bias2a, collection="imported_g", timespan=None), 

2311 DatasetAssociation(ref=bias3a, collection="imported_g", timespan=None), 

2312 DatasetAssociation(ref=bias2b, collection="imported_r", timespan=None), 

2313 DatasetAssociation(ref=bias3b, collection="imported_r", timespan=None), 

2314 DatasetAssociation(ref=bias2a, collection=collection, timespan=Timespan(begin=t2, end=t4)), 

2315 DatasetAssociation(ref=bias3a, collection=collection, timespan=Timespan(begin=t1, end=t3)), 

2316 DatasetAssociation(ref=bias2b, collection=collection, timespan=Timespan(begin=t4, end=None)), 

2317 DatasetAssociation(ref=bias3b, collection=collection, timespan=Timespan(begin=t4, end=None)), 

2318 ], 

2319 ) 

2320 

2321 class Ambiguous: 

2322 """Tag class to denote lookups that should be ambiguous.""" 

2323 

2324 pass 

2325 

2326 def _assertLookup( 

2327 detector: int, timespan: Timespan, expected: DatasetRef | type[Ambiguous] | None 

2328 ) -> None: 

2329 """Local function that asserts that a bias lookup returns the given 

2330 expected result. 

2331 """ 

2332 if expected is Ambiguous: 

2333 with self.assertRaises((DatasetTypeError, LookupError)): 

2334 registry.findDataset( 

2335 "bias", 

2336 collections=collection, 

2337 instrument="Cam1", 

2338 detector=detector, 

2339 timespan=timespan, 

2340 ) 

2341 else: 

2342 self.assertEqual( 

2343 expected, 

2344 registry.findDataset( 

2345 "bias", 

2346 collections=collection, 

2347 instrument="Cam1", 

2348 detector=detector, 

2349 timespan=timespan, 

2350 ), 

2351 ) 

2352 

2353 # Systematically test lookups against expected results. 

2354 _assertLookup(detector=2, timespan=Timespan(None, t1), expected=None) 

2355 _assertLookup(detector=2, timespan=Timespan(None, t2), expected=None) 

2356 _assertLookup(detector=2, timespan=Timespan(None, t3), expected=bias2a) 

2357 _assertLookup(detector=2, timespan=Timespan(None, t4), expected=bias2a) 

2358 _assertLookup(detector=2, timespan=Timespan(None, t5), expected=Ambiguous) 

2359 _assertLookup(detector=2, timespan=Timespan(None, None), expected=Ambiguous) 

2360 _assertLookup(detector=2, timespan=Timespan(t1, t2), expected=None) 

2361 _assertLookup(detector=2, timespan=Timespan(t1, t3), expected=bias2a) 

2362 _assertLookup(detector=2, timespan=Timespan(t1, t4), expected=bias2a) 

2363 _assertLookup(detector=2, timespan=Timespan(t1, t5), expected=Ambiguous) 

2364 _assertLookup(detector=2, timespan=Timespan(t1, None), expected=Ambiguous) 

2365 _assertLookup(detector=2, timespan=Timespan(t2, t3), expected=bias2a) 

2366 _assertLookup(detector=2, timespan=Timespan(t2, t4), expected=bias2a) 

2367 _assertLookup(detector=2, timespan=Timespan(t2, t5), expected=Ambiguous) 

2368 _assertLookup(detector=2, timespan=Timespan(t2, None), expected=Ambiguous) 

2369 _assertLookup(detector=2, timespan=Timespan(t3, t4), expected=bias2a) 

2370 _assertLookup(detector=2, timespan=Timespan(t3, t5), expected=Ambiguous) 

2371 _assertLookup(detector=2, timespan=Timespan(t3, None), expected=Ambiguous) 

2372 _assertLookup(detector=2, timespan=Timespan(t4, t5), expected=bias2b) 

2373 _assertLookup(detector=2, timespan=Timespan(t4, None), expected=bias2b) 

2374 _assertLookup(detector=2, timespan=Timespan(t5, None), expected=bias2b) 

2375 _assertLookup(detector=3, timespan=Timespan(None, t1), expected=None) 

2376 _assertLookup(detector=3, timespan=Timespan(None, t2), expected=bias3a) 

2377 _assertLookup(detector=3, timespan=Timespan(None, t3), expected=bias3a) 

2378 _assertLookup(detector=3, timespan=Timespan(None, t4), expected=bias3a) 

2379 _assertLookup(detector=3, timespan=Timespan(None, t5), expected=Ambiguous) 

2380 _assertLookup(detector=3, timespan=Timespan(None, None), expected=Ambiguous) 

2381 _assertLookup(detector=3, timespan=Timespan(t1, t2), expected=bias3a) 

2382 _assertLookup(detector=3, timespan=Timespan(t1, t3), expected=bias3a) 

2383 _assertLookup(detector=3, timespan=Timespan(t1, t4), expected=bias3a) 

2384 _assertLookup(detector=3, timespan=Timespan(t1, t5), expected=Ambiguous) 

2385 _assertLookup(detector=3, timespan=Timespan(t1, None), expected=Ambiguous) 

2386 _assertLookup(detector=3, timespan=Timespan(t2, t3), expected=bias3a) 

2387 _assertLookup(detector=3, timespan=Timespan(t2, t4), expected=bias3a) 

2388 _assertLookup(detector=3, timespan=Timespan(t2, t5), expected=Ambiguous) 

2389 _assertLookup(detector=3, timespan=Timespan(t2, None), expected=Ambiguous) 

2390 _assertLookup(detector=3, timespan=Timespan(t3, t4), expected=None) 

2391 _assertLookup(detector=3, timespan=Timespan(t3, t5), expected=bias3b) 

2392 _assertLookup(detector=3, timespan=Timespan(t3, None), expected=bias3b) 

2393 _assertLookup(detector=3, timespan=Timespan(t4, t5), expected=bias3b) 

2394 _assertLookup(detector=3, timespan=Timespan(t4, None), expected=bias3b) 

2395 _assertLookup(detector=3, timespan=Timespan(t5, None), expected=bias3b) 

2396 

2397 # Test lookups via temporal joins to exposures. 

2398 self.assertEqual( 

2399 set( 

2400 registry.queryDataIds( 

2401 ["exposure", "detector"], instrument="Cam1", detector=2 

2402 ).findRelatedDatasets("bias", collections=[collection]) 

2403 ), 

2404 { 

2405 (registry.expandDataId(instrument="Cam1", exposure=1, detector=2), bias2a), 

2406 (registry.expandDataId(instrument="Cam1", exposure=2, detector=2), bias2a), 

2407 (registry.expandDataId(instrument="Cam1", exposure=3, detector=2), bias2b), 

2408 }, 

2409 ) 

2410 self.assertEqual( 

2411 set( 

2412 registry.queryDataIds( 

2413 ["exposure", "detector"], instrument="Cam1", detector=3 

2414 ).findRelatedDatasets("bias", collections=[collection]) 

2415 ), 

2416 { 

2417 (registry.expandDataId(instrument="Cam1", exposure=0, detector=3), bias3a), 

2418 (registry.expandDataId(instrument="Cam1", exposure=1, detector=3), bias3a), 

2419 (registry.expandDataId(instrument="Cam1", exposure=3, detector=3), bias3b), 

2420 }, 

2421 ) 

2422 self.assertEqual( 

2423 set( 

2424 registry.queryDataIds( 

2425 ["exposure", "detector"], instrument="Cam1", detector=2 

2426 ).findRelatedDatasets("bias", collections=[collection, "imported_g"]) 

2427 ), 

2428 { 

2429 (registry.expandDataId(instrument="Cam1", exposure=0, detector=2), bias2a), 

2430 (registry.expandDataId(instrument="Cam1", exposure=1, detector=2), bias2a), 

2431 (registry.expandDataId(instrument="Cam1", exposure=2, detector=2), bias2a), 

2432 (registry.expandDataId(instrument="Cam1", exposure=3, detector=2), bias2b), 

2433 }, 

2434 ) 

2435 self.assertEqual( 

2436 set( 

2437 registry.queryDataIds( 

2438 ["exposure", "detector"], instrument="Cam1", detector=3 

2439 ).findRelatedDatasets("bias", collections=[collection, "imported_g"]) 

2440 ), 

2441 { 

2442 (registry.expandDataId(instrument="Cam1", exposure=0, detector=3), bias3a), 

2443 (registry.expandDataId(instrument="Cam1", exposure=1, detector=3), bias3a), 

2444 (registry.expandDataId(instrument="Cam1", exposure=2, detector=3), bias3a), 

2445 (registry.expandDataId(instrument="Cam1", exposure=3, detector=3), bias3b), 

2446 }, 

2447 ) 

2448 

2449 # Decertify [t3, t5) for all data IDs, and do test lookups again. 

2450 # This should truncate bias2a to [t2, t3), leave bias3a unchanged at 

2451 # [t1, t3), and truncate bias2b and bias3b to [t5, ∞). 

2452 registry.decertify(collection=collection, datasetType="bias", timespan=Timespan(t3, t5)) 

2453 _assertLookup(detector=2, timespan=Timespan(None, t1), expected=None) 

2454 _assertLookup(detector=2, timespan=Timespan(None, t2), expected=None) 

2455 _assertLookup(detector=2, timespan=Timespan(None, t3), expected=bias2a) 

2456 _assertLookup(detector=2, timespan=Timespan(None, t4), expected=bias2a) 

2457 _assertLookup(detector=2, timespan=Timespan(None, t5), expected=bias2a) 

2458 _assertLookup(detector=2, timespan=Timespan(None, None), expected=Ambiguous) 

2459 _assertLookup(detector=2, timespan=Timespan(t1, t2), expected=None) 

2460 _assertLookup(detector=2, timespan=Timespan(t1, t3), expected=bias2a) 

2461 _assertLookup(detector=2, timespan=Timespan(t1, t4), expected=bias2a) 

2462 _assertLookup(detector=2, timespan=Timespan(t1, t5), expected=bias2a) 

2463 _assertLookup(detector=2, timespan=Timespan(t1, None), expected=Ambiguous) 

2464 _assertLookup(detector=2, timespan=Timespan(t2, t3), expected=bias2a) 

2465 _assertLookup(detector=2, timespan=Timespan(t2, t4), expected=bias2a) 

2466 _assertLookup(detector=2, timespan=Timespan(t2, t5), expected=bias2a) 

2467 _assertLookup(detector=2, timespan=Timespan(t2, None), expected=Ambiguous) 

2468 _assertLookup(detector=2, timespan=Timespan(t3, t4), expected=None) 

2469 _assertLookup(detector=2, timespan=Timespan(t3, t5), expected=None) 

2470 _assertLookup(detector=2, timespan=Timespan(t3, None), expected=bias2b) 

2471 _assertLookup(detector=2, timespan=Timespan(t4, t5), expected=None) 

2472 _assertLookup(detector=2, timespan=Timespan(t4, None), expected=bias2b) 

2473 _assertLookup(detector=2, timespan=Timespan(t5, None), expected=bias2b) 

2474 _assertLookup(detector=3, timespan=Timespan(None, t1), expected=None) 

2475 _assertLookup(detector=3, timespan=Timespan(None, t2), expected=bias3a) 

2476 _assertLookup(detector=3, timespan=Timespan(None, t3), expected=bias3a) 

2477 _assertLookup(detector=3, timespan=Timespan(None, t4), expected=bias3a) 

2478 _assertLookup(detector=3, timespan=Timespan(None, t5), expected=bias3a) 

2479 _assertLookup(detector=3, timespan=Timespan(None, None), expected=Ambiguous) 

2480 _assertLookup(detector=3, timespan=Timespan(t1, t2), expected=bias3a) 

2481 _assertLookup(detector=3, timespan=Timespan(t1, t3), expected=bias3a) 

2482 _assertLookup(detector=3, timespan=Timespan(t1, t4), expected=bias3a) 

2483 _assertLookup(detector=3, timespan=Timespan(t1, t5), expected=bias3a) 

2484 _assertLookup(detector=3, timespan=Timespan(t1, None), expected=Ambiguous) 

2485 _assertLookup(detector=3, timespan=Timespan(t2, t3), expected=bias3a) 

2486 _assertLookup(detector=3, timespan=Timespan(t2, t4), expected=bias3a) 

2487 _assertLookup(detector=3, timespan=Timespan(t2, t5), expected=bias3a) 

2488 _assertLookup(detector=3, timespan=Timespan(t2, None), expected=Ambiguous) 

2489 _assertLookup(detector=3, timespan=Timespan(t3, t4), expected=None) 

2490 _assertLookup(detector=3, timespan=Timespan(t3, t5), expected=None) 

2491 _assertLookup(detector=3, timespan=Timespan(t3, None), expected=bias3b) 

2492 _assertLookup(detector=3, timespan=Timespan(t4, t5), expected=None) 

2493 _assertLookup(detector=3, timespan=Timespan(t4, None), expected=bias3b) 

2494 _assertLookup(detector=3, timespan=Timespan(t5, None), expected=bias3b) 

2495 

2496 # Decertify everything, this time with explicit data IDs, then check 

2497 # that no lookups succeed. 

2498 registry.decertify( 

2499 collection, 

2500 "bias", 

2501 Timespan(None, None), 

2502 dataIds=[ 

2503 dict(instrument="Cam1", detector=2), 

2504 dict(instrument="Cam1", detector=3), 

2505 ], 

2506 ) 

2507 for detector in (2, 3): 

2508 for timespan in allTimespans: 

2509 _assertLookup(detector=detector, timespan=timespan, expected=None) 

2510 # Certify bias2a and bias3a over (-∞, ∞), check that all lookups return 

2511 # those. 

2512 registry.certify( 

2513 collection, 

2514 [bias2a, bias3a], 

2515 Timespan(None, None), 

2516 ) 

2517 for timespan in allTimespans: 

2518 _assertLookup(detector=2, timespan=timespan, expected=bias2a) 

2519 _assertLookup(detector=3, timespan=timespan, expected=bias3a) 

2520 # Decertify just bias2 over [t2, t4). 

2521 # This should split a single certification row into two (and leave the 

2522 # other existing row, for bias3a, alone). 

2523 registry.decertify( 

2524 collection, "bias", Timespan(t2, t4), dataIds=[dict(instrument="Cam1", detector=2)] 

2525 ) 

2526 for timespan in allTimespans: 

2527 _assertLookup(detector=3, timespan=timespan, expected=bias3a) 

2528 overlapsBefore = timespan.overlaps(Timespan(None, t2)) 

2529 overlapsAfter = timespan.overlaps(Timespan(t4, None)) 

2530 if overlapsBefore and overlapsAfter: 

2531 expected = Ambiguous 

2532 elif overlapsBefore or overlapsAfter: 

2533 expected = bias2a 

2534 else: 

2535 expected = None 

2536 _assertLookup(detector=2, timespan=timespan, expected=expected) 

2537 

2538 def testSkipCalibs(self): 

2539 """Test how queries handle skipping of calibration collections.""" 

2540 registry = self.makeRegistry() 

2541 self.loadData(registry, "base.yaml") 

2542 self.loadData(registry, "datasets.yaml") 

2543 

2544 coll_calib = "Cam1/calibs/default" 

2545 registry.registerCollection(coll_calib, type=CollectionType.CALIBRATION) 

2546 

2547 # Add all biases to the calibration collection. 

2548 # Without this, the logic that prunes dataset subqueries based on 

2549 # datasetType-collection summary information will fire before the logic 

2550 # we want to test below. This is a good thing (it avoids the dreaded 

2551 # NotImplementedError a bit more often) everywhere but here. 

2552 registry.certify(coll_calib, registry.queryDatasets("bias", collections=...), Timespan(None, None)) 

2553 

2554 coll_list = [coll_calib, "imported_g", "imported_r"] 

2555 chain = "Cam1/chain" 

2556 registry.registerCollection(chain, type=CollectionType.CHAINED) 

2557 registry.setCollectionChain(chain, coll_list) 

2558 

2559 # explicit list will raise if findFirst=True or there are temporal 

2560 # dimensions 

2561 with self.assertRaises(NotImplementedError): 

2562 registry.queryDatasets("bias", collections=coll_list, findFirst=True) 

2563 with self.assertRaises(NotImplementedError): 

2564 registry.queryDataIds( 

2565 ["instrument", "detector", "exposure"], datasets="bias", collections=coll_list 

2566 ).count() 

2567 

2568 # chain will skip 

2569 datasets = list(registry.queryDatasets("bias", collections=chain)) 

2570 self.assertGreater(len(datasets), 0) 

2571 

2572 dataIds = list(registry.queryDataIds(["instrument", "detector"], datasets="bias", collections=chain)) 

2573 self.assertGreater(len(dataIds), 0) 

2574 

2575 # glob will skip too 

2576 datasets = list(registry.queryDatasets("bias", collections="*d*")) 

2577 self.assertGreater(len(datasets), 0) 

2578 

2579 # regular expression will skip too 

2580 pattern = re.compile(".*") 

2581 datasets = list(registry.queryDatasets("bias", collections=pattern)) 

2582 self.assertGreater(len(datasets), 0) 

2583 

2584 # ellipsis should work as usual 

2585 datasets = list(registry.queryDatasets("bias", collections=...)) 

2586 self.assertGreater(len(datasets), 0) 

2587 

2588 # few tests with findFirst 

2589 datasets = list(registry.queryDatasets("bias", collections=chain, findFirst=True)) 

2590 self.assertGreater(len(datasets), 0) 

2591 

2592 def testIngestTimeQuery(self): 

2593 registry = self.makeRegistry() 

2594 self.loadData(registry, "base.yaml") 

2595 dt0 = datetime.datetime.now(datetime.UTC) 

2596 self.loadData(registry, "datasets.yaml") 

2597 dt1 = datetime.datetime.now(datetime.UTC) 

2598 

2599 datasets = list(registry.queryDatasets(..., collections=...)) 

2600 len0 = len(datasets) 

2601 self.assertGreater(len0, 0) 

2602 

2603 where = "ingest_date > T'2000-01-01'" 

2604 datasets = list(registry.queryDatasets(..., collections=..., where=where)) 

2605 len1 = len(datasets) 

2606 self.assertEqual(len0, len1) 

2607 

2608 # no one will ever use this piece of software in 30 years 

2609 where = "ingest_date > T'2050-01-01'" 

2610 datasets = list(registry.queryDatasets(..., collections=..., where=where)) 

2611 len2 = len(datasets) 

2612 self.assertEqual(len2, 0) 

2613 

2614 # Check more exact timing to make sure there is no 37 seconds offset 

2615 # (after fixing DM-30124). SQLite time precision is 1 second, make 

2616 # sure that we don't test with higher precision. 

2617 tests = [ 

2618 # format: (timestamp, operator, expected_len) 

2619 (dt0 - timedelta(seconds=1), ">", len0), 

2620 (dt0 - timedelta(seconds=1), "<", 0), 

2621 (dt1 + timedelta(seconds=1), "<", len0), 

2622 (dt1 + timedelta(seconds=1), ">", 0), 

2623 ] 

2624 for dt, op, expect_len in tests: 

2625 dt_str = dt.isoformat(sep=" ") 

2626 

2627 where = f"ingest_date {op} T'{dt_str}'" 

2628 datasets = list(registry.queryDatasets(..., collections=..., where=where)) 

2629 self.assertEqual(len(datasets), expect_len) 

2630 

2631 # same with bind using datetime or astropy Time 

2632 where = f"ingest_date {op} ingest_time" 

2633 datasets = list( 

2634 registry.queryDatasets(..., collections=..., where=where, bind={"ingest_time": dt}) 

2635 ) 

2636 self.assertEqual(len(datasets), expect_len) 

2637 

2638 dt_astropy = astropy.time.Time(dt, format="datetime") 

2639 datasets = list( 

2640 registry.queryDatasets(..., collections=..., where=where, bind={"ingest_time": dt_astropy}) 

2641 ) 

2642 self.assertEqual(len(datasets), expect_len) 

2643 

2644 def testTimespanQueries(self): 

2645 """Test query expressions involving timespans.""" 

2646 registry = self.makeRegistry() 

2647 self.loadData(registry, "hsc-rc2-subset.yaml") 

2648 # All exposures in the database; mapping from ID to timespan. 

2649 visits = {record.id: record.timespan for record in registry.queryDimensionRecords("visit")} 

2650 # Just those IDs, sorted (which is also temporal sorting, because HSC 

2651 # exposure IDs are monotonically increasing). 

2652 ids = sorted(visits.keys()) 

2653 self.assertGreater(len(ids), 20) 

2654 # Pick some quasi-random indexes into `ids` to play with. 

2655 i1 = int(len(ids) * 0.1) 

2656 i2 = int(len(ids) * 0.3) 

2657 i3 = int(len(ids) * 0.6) 

2658 i4 = int(len(ids) * 0.8) 

2659 # Extract some times from those: just before the beginning of i1 (which 

2660 # should be after the end of the exposure before), exactly the 

2661 # beginning of i2, just after the beginning of i3 (and before its end), 

2662 # and the exact end of i4. 

2663 t1 = visits[ids[i1]].begin - astropy.time.TimeDelta(1.0, format="sec") 

2664 self.assertGreater(t1, visits[ids[i1 - 1]].end) 

2665 t2 = visits[ids[i2]].begin 

2666 t3 = visits[ids[i3]].begin + astropy.time.TimeDelta(1.0, format="sec") 

2667 self.assertLess(t3, visits[ids[i3]].end) 

2668 t4 = visits[ids[i4]].end 

2669 # Make sure those are actually in order. 

2670 self.assertEqual([t1, t2, t3, t4], sorted([t4, t3, t2, t1])) 

2671 

2672 bind = { 

2673 "t1": t1, 

2674 "t2": t2, 

2675 "t3": t3, 

2676 "t4": t4, 

2677 "ts23": Timespan(t2, t3), 

2678 } 

2679 

2680 def query(where): 

2681 """Return results as a sorted, deduplicated list of visit IDs. 

2682 

2683 Parameters 

2684 ---------- 

2685 where : `str` 

2686 The WHERE clause for the query. 

2687 """ 

2688 return sorted( 

2689 { 

2690 dataId["visit"] 

2691 for dataId in registry.queryDataIds("visit", instrument="HSC", bind=bind, where=where) 

2692 } 

2693 ) 

2694 

2695 # Try a bunch of timespan queries, mixing up the bounds themselves, 

2696 # where they appear in the expression, and how we get the timespan into 

2697 # the expression. 

2698 

2699 # t1 is before the start of i1, so this should not include i1. 

2700 self.assertEqual(ids[:i1], query("visit.timespan OVERLAPS (null, t1)")) 

2701 # t2 is exactly at the start of i2, but ends are exclusive, so these 

2702 # should not include i2. 

2703 self.assertEqual(ids[i1:i2], query("(t1, t2) OVERLAPS visit.timespan")) 

2704 self.assertEqual(ids[:i2], query("visit.timespan < (t2, t4)")) 

2705 # t3 is in the middle of i3, so this should include i3. 

2706 self.assertEqual(ids[i2 : i3 + 1], query("visit.timespan OVERLAPS ts23")) 

2707 # This one should not include t3 by the same reasoning. 

2708 self.assertEqual(ids[i3 + 1 :], query("visit.timespan > (t1, t3)")) 

2709 # t4 is exactly at the end of i4, so this should include i4. 

2710 self.assertEqual(ids[i3 : i4 + 1], query(f"visit.timespan OVERLAPS (T'{t3.tai.isot}', t4)")) 

2711 # i4's upper bound of t4 is exclusive so this should not include t4. 

2712 self.assertEqual(ids[i4 + 1 :], query("visit.timespan OVERLAPS (t4, NULL)")) 

2713 

2714 # Now some timespan vs. time scalar queries. 

2715 self.assertEqual(ids[:i2], query("visit.timespan < t2")) 

2716 self.assertEqual(ids[:i2], query("t2 > visit.timespan")) 

2717 self.assertEqual(ids[i3 + 1 :], query("visit.timespan > t3")) 

2718 self.assertEqual(ids[i3 + 1 :], query("t3 < visit.timespan")) 

2719 self.assertEqual(ids[i3 : i3 + 1], query("visit.timespan OVERLAPS t3")) 

2720 self.assertEqual(ids[i3 : i3 + 1], query(f"T'{t3.tai.isot}' OVERLAPS visit.timespan")) 

2721 

2722 # Empty timespans should not overlap anything. 

2723 self.assertEqual([], query("visit.timespan OVERLAPS (t3, t2)")) 

2724 

2725 def testCollectionSummaries(self): 

2726 """Test recording and retrieval of collection summaries.""" 

2727 self.maxDiff = None 

2728 registry = self.makeRegistry() 

2729 # Importing datasets from yaml should go through the code path where 

2730 # we update collection summaries as we insert datasets. 

2731 self.loadData(registry, "base.yaml") 

2732 self.loadData(registry, "datasets.yaml") 

2733 flat = registry.getDatasetType("flat") 

2734 expected1 = CollectionSummary() 

2735 expected1.dataset_types.add(registry.getDatasetType("bias")) 

2736 expected1.add_data_ids( 

2737 flat, [DataCoordinate.standardize(instrument="Cam1", universe=registry.dimensions)] 

2738 ) 

2739 self.assertEqual(registry.getCollectionSummary("imported_g"), expected1) 

2740 self.assertEqual(registry.getCollectionSummary("imported_r"), expected1) 

2741 # Create a chained collection with both of the imported runs; the 

2742 # summary should be the same, because it's a union with itself. 

2743 chain = "chain" 

2744 registry.registerCollection(chain, CollectionType.CHAINED) 

2745 registry.setCollectionChain(chain, ["imported_r", "imported_g"]) 

2746 self.assertEqual(registry.getCollectionSummary(chain), expected1) 

2747 # Associate flats only into a tagged collection and a calibration 

2748 # collection to check summaries of those. 

2749 tag = "tag" 

2750 registry.registerCollection(tag, CollectionType.TAGGED) 

2751 registry.associate(tag, registry.queryDatasets(flat, collections="imported_g")) 

2752 calibs = "calibs" 

2753 registry.registerCollection(calibs, CollectionType.CALIBRATION) 

2754 registry.certify( 

2755 calibs, registry.queryDatasets(flat, collections="imported_g"), timespan=Timespan(None, None) 

2756 ) 

2757 expected2 = expected1.copy() 

2758 expected2.dataset_types.discard("bias") 

2759 self.assertEqual(registry.getCollectionSummary(tag), expected2) 

2760 self.assertEqual(registry.getCollectionSummary(calibs), expected2) 

2761 # Explicitly calling SqlRegistry.refresh() should load those same 

2762 # summaries, via a totally different code path. 

2763 registry.refresh() 

2764 self.assertEqual(registry.getCollectionSummary("imported_g"), expected1) 

2765 self.assertEqual(registry.getCollectionSummary("imported_r"), expected1) 

2766 self.assertEqual(registry.getCollectionSummary(tag), expected2) 

2767 self.assertEqual(registry.getCollectionSummary(calibs), expected2) 

2768 

2769 def testBindInQueryDatasets(self): 

2770 """Test that the bind parameter is correctly forwarded in 

2771 queryDatasets recursion. 

2772 """ 

2773 registry = self.makeRegistry() 

2774 # Importing datasets from yaml should go through the code path where 

2775 # we update collection summaries as we insert datasets. 

2776 self.loadData(registry, "base.yaml") 

2777 self.loadData(registry, "datasets.yaml") 

2778 self.assertEqual( 

2779 set(registry.queryDatasets("flat", band="r", collections=...)), 

2780 set(registry.queryDatasets("flat", where="band=my_band", bind={"my_band": "r"}, collections=...)), 

2781 ) 

2782 

2783 def testQueryIntRangeExpressions(self): 

2784 """Test integer range expressions in ``where`` arguments. 

2785 

2786 Note that our expressions use inclusive stop values, unlike Python's. 

2787 """ 

2788 registry = self.makeRegistry() 

2789 self.loadData(registry, "base.yaml") 

2790 self.assertEqual( 

2791 set(registry.queryDataIds(["detector"], instrument="Cam1", where="detector IN (1..2)")), 

2792 {registry.expandDataId(instrument="Cam1", detector=n) for n in [1, 2]}, 

2793 ) 

2794 self.assertEqual( 

2795 set(registry.queryDataIds(["detector"], instrument="Cam1", where="detector IN (1..4:2)")), 

2796 {registry.expandDataId(instrument="Cam1", detector=n) for n in [1, 3]}, 

2797 ) 

2798 self.assertEqual( 

2799 set(registry.queryDataIds(["detector"], instrument="Cam1", where="detector IN (2..4:2)")), 

2800 {registry.expandDataId(instrument="Cam1", detector=n) for n in [2, 4]}, 

2801 ) 

2802 

2803 def testQueryResultSummaries(self): 

2804 """Test summary methods like `count`, `any`, and `explain_no_results` 

2805 on `DataCoordinateQueryResults` and `DatasetQueryResults`. 

2806 """ 

2807 registry = self.makeRegistry() 

2808 self.loadData(registry, "base.yaml") 

2809 self.loadData(registry, "datasets.yaml") 

2810 self.loadData(registry, "spatial.yaml") 

2811 # Default test dataset has two collections, each with both flats and 

2812 # biases. Add a new collection with only biases. 

2813 registry.registerCollection("biases", CollectionType.TAGGED) 

2814 registry.associate("biases", registry.queryDatasets("bias", collections=["imported_g"])) 

2815 # First query yields two results, and involves no postprocessing. 

2816 query1 = registry.queryDataIds(["physical_filter"], band="r") 

2817 self.assertTrue(query1.any(execute=False, exact=False)) 

2818 self.assertTrue(query1.any(execute=True, exact=False)) 

2819 self.assertTrue(query1.any(execute=True, exact=True)) 

2820 self.assertEqual(query1.count(exact=False), 2) 

2821 self.assertEqual(query1.count(exact=True), 2) 

2822 self.assertFalse(list(query1.explain_no_results())) 

2823 # Second query should yield no results, which we should see when 

2824 # we attempt to expand the data ID. 

2825 query2 = registry.queryDataIds(["physical_filter"], band="h") 

2826 # There's no execute=False, exact=Fals test here because the behavior 

2827 # not something we want to guarantee in this case (and exact=False 

2828 # says either answer is legal). 

2829 self.assertFalse(query2.any(execute=True, exact=False)) 

2830 self.assertFalse(query2.any(execute=True, exact=True)) 

2831 self.assertEqual(query2.count(exact=False), 0) 

2832 self.assertEqual(query2.count(exact=True), 0) 

2833 self.assertTrue(list(query2.explain_no_results())) 

2834 # These queries yield no results due to various problems that can be 

2835 # spotted prior to execution, yielding helpful diagnostics. 

2836 base_query = registry.queryDataIds(["detector", "physical_filter"]) 

2837 queries_and_snippets = [ 

2838 ( 

2839 # Dataset type name doesn't match any existing dataset types. 

2840 registry.queryDatasets("nonexistent", collections=...), 

2841 ["nonexistent"], 

2842 ), 

2843 ( 

2844 # Dataset type object isn't registered. 

2845 registry.queryDatasets( 

2846 DatasetType( 

2847 "nonexistent", 

2848 dimensions=["instrument"], 

2849 universe=registry.dimensions, 

2850 storageClass="Image", 

2851 ), 

2852 collections=..., 

2853 ), 

2854 ["nonexistent"], 

2855 ), 

2856 ( 

2857 # No datasets of this type in this collection. 

2858 registry.queryDatasets("flat", collections=["biases"]), 

2859 ["flat", "biases"], 

2860 ), 

2861 ( 

2862 # No datasets of this type in this collection. 

2863 base_query.findDatasets("flat", collections=["biases"]), 

2864 ["flat", "biases"], 

2865 ), 

2866 ( 

2867 # No collections matching at all. 

2868 registry.queryDatasets("flat", collections=re.compile("potato.+")), 

2869 ["potato"], 

2870 ), 

2871 ] 

2872 with self.assertRaises(MissingDatasetTypeError): 

2873 # Dataset type name doesn't match any existing dataset types. 

2874 registry.queryDataIds(["detector"], datasets=["nonexistent"], collections=...) 

2875 with self.assertRaises(MissingDatasetTypeError): 

2876 # Dataset type name doesn't match any existing dataset types. 

2877 registry.queryDimensionRecords("detector", datasets=["nonexistent"], collections=...) 

2878 for query, snippets in queries_and_snippets: 

2879 self.assertFalse(query.any(execute=False, exact=False)) 

2880 self.assertFalse(query.any(execute=True, exact=False)) 

2881 self.assertFalse(query.any(execute=True, exact=True)) 

2882 self.assertEqual(query.count(exact=False), 0) 

2883 self.assertEqual(query.count(exact=True), 0) 

2884 messages = list(query.explain_no_results()) 

2885 self.assertTrue(messages) 

2886 # Want all expected snippets to appear in at least one message. 

2887 self.assertTrue( 

2888 any( 

2889 all(snippet in message for snippet in snippets) for message in query.explain_no_results() 

2890 ), 

2891 messages, 

2892 ) 

2893 

2894 # Wildcards on dataset types are not permitted in queryDataIds. 

2895 with self.assertRaises(DatasetTypeExpressionError): 

2896 registry.queryDataIds(["detector"], datasets=re.compile("^nonexistent$"), collections=...) 

2897 

2898 # These queries yield no results due to problems that can be identified 

2899 # by cheap follow-up queries, yielding helpful diagnostics. 

2900 for query, snippets in [ 

2901 ( 

2902 # No records for one of the involved dimensions. 

2903 registry.queryDataIds(["subfilter"]), 

2904 ["no rows", "subfilter"], 

2905 ), 

2906 ( 

2907 # No records for one of the involved dimensions. 

2908 registry.queryDimensionRecords("subfilter"), 

2909 ["no rows", "subfilter"], 

2910 ), 

2911 ]: 

2912 self.assertFalse(query.any(execute=True, exact=False)) 

2913 self.assertFalse(query.any(execute=True, exact=True)) 

2914 self.assertEqual(query.count(exact=True), 0) 

2915 messages = list(query.explain_no_results()) 

2916 self.assertTrue(messages) 

2917 # Want all expected snippets to appear in at least one message. 

2918 self.assertTrue( 

2919 any( 

2920 all(snippet in message for snippet in snippets) for message in query.explain_no_results() 

2921 ), 

2922 messages, 

2923 ) 

2924 

2925 # This query yields four overlaps in the database, but one is filtered 

2926 # out in postprocessing. The count queries aren't accurate because 

2927 # they don't account for duplication that happens due to an internal 

2928 # join against commonSkyPix. 

2929 query3 = registry.queryDataIds(["visit", "tract"], instrument="Cam1", skymap="SkyMap1") 

2930 self.assertEqual( 

2931 { 

2932 DataCoordinate.standardize( 

2933 instrument="Cam1", 

2934 skymap="SkyMap1", 

2935 visit=v, 

2936 tract=t, 

2937 universe=registry.dimensions, 

2938 ) 

2939 for v, t in [(1, 0), (2, 0), (2, 1)] 

2940 }, 

2941 set(query3), 

2942 ) 

2943 self.assertTrue(query3.any(execute=False, exact=False)) 

2944 self.assertTrue(query3.any(execute=True, exact=False)) 

2945 self.assertTrue(query3.any(execute=True, exact=True)) 

2946 self.assertGreaterEqual(query3.count(exact=False), 4) 

2947 self.assertGreaterEqual(query3.count(exact=True, discard=True), 3) 

2948 self.assertFalse(list(query3.explain_no_results())) 

2949 # This query yields overlaps in the database, but all are filtered 

2950 # out in postprocessing. The count queries again aren't very useful. 

2951 # We have to use `where=` here to avoid an optimization that 

2952 # (currently) skips the spatial postprocess-filtering because it 

2953 # recognizes that no spatial join is necessary. That's not ideal, but 

2954 # fixing it is out of scope for this ticket. 

2955 query4 = registry.queryDataIds( 

2956 ["visit", "tract"], 

2957 instrument="Cam1", 

2958 skymap="SkyMap1", 

2959 where="visit=1 AND detector=1 AND tract=0 AND patch=4", 

2960 ) 

2961 self.assertFalse(set(query4)) 

2962 self.assertTrue(query4.any(execute=False, exact=False)) 

2963 self.assertTrue(query4.any(execute=True, exact=False)) 

2964 self.assertFalse(query4.any(execute=True, exact=True)) 

2965 self.assertGreaterEqual(query4.count(exact=False), 1) 

2966 self.assertEqual(query4.count(exact=True, discard=True), 0) 

2967 messages = query4.explain_no_results() 

2968 self.assertTrue(messages) 

2969 self.assertTrue(any("overlap" in message for message in messages)) 

2970 # This query should yield results from one dataset type but not the 

2971 # other, which is not registered. 

2972 query5 = registry.queryDatasets(["bias", "nonexistent"], collections=["biases"]) 

2973 self.assertTrue(set(query5)) 

2974 self.assertTrue(query5.any(execute=False, exact=False)) 

2975 self.assertTrue(query5.any(execute=True, exact=False)) 

2976 self.assertTrue(query5.any(execute=True, exact=True)) 

2977 self.assertGreaterEqual(query5.count(exact=False), 1) 

2978 self.assertGreaterEqual(query5.count(exact=True), 1) 

2979 self.assertFalse(list(query5.explain_no_results())) 

2980 # This query applies a selection that yields no results, fully in the 

2981 # database. Explaining why it fails involves traversing the relation 

2982 # tree and running a LIMIT 1 query at each level that has the potential 

2983 # to remove rows. 

2984 query6 = registry.queryDimensionRecords( 

2985 "detector", where="detector.purpose = 'no-purpose'", instrument="Cam1" 

2986 ) 

2987 self.assertEqual(query6.count(exact=True), 0) 

2988 messages = query6.explain_no_results() 

2989 self.assertTrue(messages) 

2990 self.assertTrue(any("no-purpose" in message for message in messages)) 

2991 

2992 def testQueryDataIdsExpressionError(self): 

2993 """Test error checking of 'where' expressions in queryDataIds.""" 

2994 registry = self.makeRegistry() 

2995 self.loadData(registry, "base.yaml") 

2996 bind = {"time": astropy.time.Time("2020-01-01T01:00:00", format="isot", scale="tai")} 

2997 with self.assertRaisesRegex(LookupError, r"No dimension element with name 'foo' in 'foo\.bar'\."): 

2998 registry.queryDataIds(["detector"], where="foo.bar = 12") 

2999 with self.assertRaisesRegex( 

3000 LookupError, "Dimension element name cannot be inferred in this context." 

3001 ): 

3002 registry.queryDataIds(["detector"], where="timespan.end < time", bind=bind) 

3003 

3004 def testQueryDataIdsOrderBy(self): 

3005 """Test order_by and limit on result returned by queryDataIds().""" 

3006 registry = self.makeRegistry() 

3007 self.loadData(registry, "base.yaml") 

3008 self.loadData(registry, "datasets.yaml") 

3009 self.loadData(registry, "spatial.yaml") 

3010 

3011 def do_query(dimensions=("visit", "tract"), datasets=None, collections=None): 

3012 return registry.queryDataIds( 

3013 dimensions, datasets=datasets, collections=collections, instrument="Cam1", skymap="SkyMap1" 

3014 ) 

3015 

3016 Test = namedtuple( 

3017 "testQueryDataIdsOrderByTest", 

3018 ("order_by", "keys", "result", "limit", "datasets", "collections"), 

3019 defaults=(None, None, None), 

3020 ) 

3021 

3022 test_data = ( 

3023 Test("tract,visit", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))), 

3024 Test("-tract,visit", "tract,visit", ((1, 2), (1, 2), (0, 1), (0, 1), (0, 2), (0, 2))), 

3025 Test("tract,-visit", "tract,visit", ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2))), 

3026 Test("-tract,-visit", "tract,visit", ((1, 2), (1, 2), (0, 2), (0, 2), (0, 1), (0, 1))), 

3027 Test( 

3028 "tract.id,visit.id", 

3029 "tract,visit", 

3030 ((0, 1), (0, 1), (0, 2)), 

3031 limit=(3,), 

3032 ), 

3033 Test("-tract,-visit", "tract,visit", ((1, 2), (1, 2), (0, 2)), limit=(3,)), 

3034 Test("tract,visit", "tract,visit", ((0, 2), (1, 2), (1, 2)), limit=(3, 3)), 

3035 Test("-tract,-visit", "tract,visit", ((0, 1),), limit=(3, 5)), 

3036 Test( 

3037 "tract,visit.exposure_time", "tract,visit", ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2)) 

3038 ), 

3039 Test( 

3040 "-tract,-visit.exposure_time", "tract,visit", ((1, 2), (1, 2), (0, 1), (0, 1), (0, 2), (0, 2)) 

3041 ), 

3042 Test("tract,-exposure_time", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))), 

3043 Test("tract,visit.name", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))), 

3044 Test( 

3045 "tract,-visit.timespan.begin,visit.timespan.end", 

3046 "tract,visit", 

3047 ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2)), 

3048 ), 

3049 Test("visit.day_obs,exposure.day_obs", "visit,exposure", ()), 

3050 Test("visit.timespan.begin,-exposure.timespan.begin", "visit,exposure", ()), 

3051 Test( 

3052 "tract,detector", 

3053 "tract,detector", 

3054 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)), 

3055 datasets="flat", 

3056 collections="imported_r", 

3057 ), 

3058 Test( 

3059 "tract,detector.full_name", 

3060 "tract,detector", 

3061 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)), 

3062 datasets="flat", 

3063 collections="imported_r", 

3064 ), 

3065 Test( 

3066 "tract,detector.raft,detector.name_in_raft", 

3067 "tract,detector", 

3068 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)), 

3069 datasets="flat", 

3070 collections="imported_r", 

3071 ), 

3072 ) 

3073 

3074 for test in test_data: 

3075 order_by = test.order_by.split(",") 

3076 keys = test.keys.split(",") 

3077 query = do_query(keys, test.datasets, test.collections).order_by(*order_by) 

3078 if test.limit is not None: 

3079 query = query.limit(*test.limit) 

3080 dataIds = tuple(tuple(dataId[k] for k in keys) for dataId in query) 

3081 self.assertEqual(dataIds, test.result) 

3082 

3083 # and materialize 

3084 query = do_query(keys).order_by(*order_by) 

3085 if test.limit is not None: 

3086 query = query.limit(*test.limit) 

3087 with self.assertRaises(RelationalAlgebraError): 

3088 with query.materialize(): 

3089 pass 

3090 

3091 # errors in a name 

3092 for order_by in ("", "-"): 

3093 with self.assertRaisesRegex(ValueError, "Empty dimension name in ORDER BY"): 

3094 list(do_query().order_by(order_by)) 

3095 

3096 for order_by in ("undimension.name", "-undimension.name"): 

3097 with self.assertRaisesRegex(ValueError, "Unknown dimension element 'undimension'"): 

3098 list(do_query().order_by(order_by)) 

3099 

3100 for order_by in ("attract", "-attract"): 

3101 with self.assertRaisesRegex(ValueError, "Metadata 'attract' cannot be found in any dimension"): 

3102 list(do_query().order_by(order_by)) 

3103 

3104 with self.assertRaisesRegex(ValueError, "Metadata 'exposure_time' exists in more than one dimension"): 

3105 list(do_query(("exposure", "visit")).order_by("exposure_time")) 

3106 

3107 with self.assertRaisesRegex( 

3108 ValueError, 

3109 r"Timespan exists in more than one dimension element \(day_obs, exposure, visit\); " 

3110 r"qualify timespan with specific dimension name\.", 

3111 ): 

3112 list(do_query(("exposure", "visit")).order_by("timespan.begin")) 

3113 

3114 with self.assertRaisesRegex( 

3115 ValueError, "Cannot find any temporal dimension element for 'timespan.begin'" 

3116 ): 

3117 list(do_query("tract").order_by("timespan.begin")) 

3118 

3119 with self.assertRaisesRegex(ValueError, "Cannot use 'timespan.begin' with non-temporal element"): 

3120 list(do_query("tract").order_by("tract.timespan.begin")) 

3121 

3122 with self.assertRaisesRegex(ValueError, "Field 'name' does not exist in 'tract'."): 

3123 list(do_query("tract").order_by("tract.name")) 

3124 

3125 with self.assertRaisesRegex( 

3126 ValueError, r"Unknown dimension element 'timestamp'; perhaps you meant 'timespan.begin'\?" 

3127 ): 

3128 list(do_query("visit").order_by("timestamp.begin")) 

3129 

3130 def testQueryDataIdsGovernorExceptions(self): 

3131 """Test exceptions raised by queryDataIds() for incorrect governors.""" 

3132 registry = self.makeRegistry() 

3133 self.loadData(registry, "base.yaml") 

3134 self.loadData(registry, "datasets.yaml") 

3135 self.loadData(registry, "spatial.yaml") 

3136 

3137 def do_query(dimensions, dataId=None, where="", bind=None, **kwargs): 

3138 return registry.queryDataIds(dimensions, dataId=dataId, where=where, bind=bind, **kwargs) 

3139 

3140 Test = namedtuple( 

3141 "testQueryDataIdExceptionsTest", 

3142 ("dimensions", "dataId", "where", "bind", "kwargs", "exception", "count"), 

3143 defaults=(None, None, None, {}, None, 0), 

3144 ) 

3145 

3146 test_data = ( 

3147 Test("tract,visit", count=6), 

3148 Test("tract,visit", kwargs={"instrument": "Cam1", "skymap": "SkyMap1"}, count=6), 

3149 Test( 

3150 "tract,visit", kwargs={"instrument": "Cam2", "skymap": "SkyMap1"}, exception=DataIdValueError 

3151 ), 

3152 Test("tract,visit", dataId={"instrument": "Cam1", "skymap": "SkyMap1"}, count=6), 

3153 Test( 

3154 "tract,visit", dataId={"instrument": "Cam1", "skymap": "SkyMap2"}, exception=DataIdValueError 

3155 ), 

3156 Test("tract,visit", where="instrument='Cam1' AND skymap='SkyMap1'", count=6), 

3157 Test("tract,visit", where="instrument='Cam1' AND skymap='SkyMap5'", exception=DataIdValueError), 

3158 Test( 

3159 "tract,visit", 

3160 where="instrument=cam AND skymap=map", 

3161 bind={"cam": "Cam1", "map": "SkyMap1"}, 

3162 count=6, 

3163 ), 

3164 Test( 

3165 "tract,visit", 

3166 where="instrument=cam AND skymap=map", 

3167 bind={"cam": "Cam", "map": "SkyMap"}, 

3168 exception=DataIdValueError, 

3169 ), 

3170 ) 

3171 

3172 for test in test_data: 

3173 dimensions = test.dimensions.split(",") 

3174 if test.exception: 

3175 with self.assertRaises(test.exception): 

3176 do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs).count() 

3177 else: 

3178 query = do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs) 

3179 self.assertEqual(query.count(discard=True), test.count) 

3180 

3181 # and materialize 

3182 if test.exception: 

3183 with self.assertRaises(test.exception): 

3184 query = do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs) 

3185 with query.materialize() as materialized: 

3186 materialized.count(discard=True) 

3187 else: 

3188 query = do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs) 

3189 with query.materialize() as materialized: 

3190 self.assertEqual(materialized.count(discard=True), test.count) 

3191 

3192 def testQueryDimensionRecordsOrderBy(self): 

3193 """Test order_by and limit on result returned by 

3194 queryDimensionRecords(). 

3195 """ 

3196 registry = self.makeRegistry() 

3197 self.loadData(registry, "base.yaml") 

3198 self.loadData(registry, "datasets.yaml") 

3199 self.loadData(registry, "spatial.yaml") 

3200 

3201 def do_query(element, datasets=None, collections=None): 

3202 return registry.queryDimensionRecords( 

3203 element, instrument="Cam1", datasets=datasets, collections=collections 

3204 ) 

3205 

3206 query = do_query("detector") 

3207 self.assertEqual(len(list(query)), 4) 

3208 

3209 Test = namedtuple( 

3210 "testQueryDataIdsOrderByTest", 

3211 ("element", "order_by", "result", "limit", "datasets", "collections"), 

3212 defaults=(None, None, None), 

3213 ) 

3214 

3215 test_data = ( 

3216 Test("detector", "detector", (1, 2, 3, 4)), 

3217 Test("detector", "-detector", (4, 3, 2, 1)), 

3218 Test("detector", "raft,-name_in_raft", (2, 1, 4, 3)), 

3219 Test("detector", "-detector.purpose", (4,), limit=(1,)), 

3220 Test("detector", "-purpose,detector.raft,name_in_raft", (2, 3), limit=(2, 2)), 

3221 Test("visit", "visit", (1, 2)), 

3222 Test("visit", "-visit.id", (2, 1)), 

3223 Test("visit", "zenith_angle", (1, 2)), 

3224 Test("visit", "-visit.name", (2, 1)), 

3225 Test("visit", "day_obs,-timespan.begin", (2, 1)), 

3226 ) 

3227 

3228 for test in test_data: 

3229 order_by = test.order_by.split(",") 

3230 query = do_query(test.element).order_by(*order_by) 

3231 if test.limit is not None: 

3232 query = query.limit(*test.limit) 

3233 dataIds = tuple(rec.id for rec in query) 

3234 self.assertEqual(dataIds, test.result) 

3235 

3236 # errors in a name 

3237 for order_by in ("", "-"): 

3238 with self.assertRaisesRegex(ValueError, "Empty dimension name in ORDER BY"): 

3239 list(do_query("detector").order_by(order_by)) 

3240 

3241 for order_by in ("undimension.name", "-undimension.name"): 

3242 with self.assertRaisesRegex(ValueError, "Element name mismatch: 'undimension'"): 

3243 list(do_query("detector").order_by(order_by)) 

3244 

3245 for order_by in ("attract", "-attract"): 

3246 with self.assertRaisesRegex(ValueError, "Field 'attract' does not exist in 'detector'."): 

3247 list(do_query("detector").order_by(order_by)) 

3248 

3249 for order_by in ("timestamp.begin", "-timestamp.begin"): 

3250 with self.assertRaisesRegex( 

3251 ValueError, 

3252 r"Element name mismatch: 'timestamp' instead of 'visit'; " 

3253 r"perhaps you meant 'timespan.begin'\?", 

3254 ): 

3255 list(do_query("visit").order_by(order_by)) 

3256 

3257 def testQueryDimensionRecordsExceptions(self): 

3258 """Test exceptions raised by queryDimensionRecords().""" 

3259 registry = self.makeRegistry() 

3260 self.loadData(registry, "base.yaml") 

3261 self.loadData(registry, "datasets.yaml") 

3262 self.loadData(registry, "spatial.yaml") 

3263 

3264 result = registry.queryDimensionRecords("detector") 

3265 self.assertEqual(result.count(), 4) 

3266 result = registry.queryDimensionRecords("detector", instrument="Cam1") 

3267 self.assertEqual(result.count(), 4) 

3268 result = registry.queryDimensionRecords("detector", dataId={"instrument": "Cam1"}) 

3269 self.assertEqual(result.count(), 4) 

3270 result = registry.queryDimensionRecords("detector", where="instrument='Cam1'") 

3271 self.assertEqual(result.count(), 4) 

3272 result = registry.queryDimensionRecords("detector", where="instrument=instr", bind={"instr": "Cam1"}) 

3273 self.assertEqual(result.count(), 4) 

3274 

3275 with self.assertRaisesRegex(DataIdValueError, "dimension instrument"): 

3276 result = registry.queryDimensionRecords("detector", instrument="NotCam1") 

3277 result.count() 

3278 

3279 with self.assertRaisesRegex(DataIdValueError, "dimension instrument"): 

3280 result = registry.queryDimensionRecords("detector", dataId={"instrument": "NotCam1"}) 

3281 result.count() 

3282 

3283 with self.assertRaisesRegex(DataIdValueError, "Unknown values specified for governor dimension"): 

3284 result = registry.queryDimensionRecords("detector", where="instrument='NotCam1'") 

3285 result.count() 

3286 

3287 with self.assertRaisesRegex(DataIdValueError, "Unknown values specified for governor dimension"): 

3288 result = registry.queryDimensionRecords( 

3289 "detector", where="instrument=instr", bind={"instr": "NotCam1"} 

3290 ) 

3291 result.count() 

3292 

3293 def testDatasetConstrainedDimensionRecordQueries(self): 

3294 """Test that queryDimensionRecords works even when given a dataset 

3295 constraint whose dimensions extend beyond the requested dimension 

3296 element's. 

3297 """ 

3298 registry = self.makeRegistry() 

3299 self.loadData(registry, "base.yaml") 

3300 self.loadData(registry, "datasets.yaml") 

3301 # Query for physical_filter dimension records, using a dataset that 

3302 # has both physical_filter and dataset dimensions. 

3303 records = registry.queryDimensionRecords( 

3304 "physical_filter", 

3305 datasets=["flat"], 

3306 collections="imported_r", 

3307 ) 

3308 self.assertEqual({record.name for record in records}, {"Cam1-R1", "Cam1-R2"}) 

3309 # Trying to constrain by all dataset types is an error. 

3310 with self.assertRaises(TypeError): 

3311 list(registry.queryDimensionRecords("physical_filter", datasets=..., collections="imported_r")) 

3312 

3313 def testSkyPixDatasetQueries(self): 

3314 """Test that we can build queries involving skypix dimensions as long 

3315 as a dataset type that uses those dimensions is included. 

3316 """ 

3317 registry = self.makeRegistry() 

3318 self.loadData(registry, "base.yaml") 

3319 dataset_type = DatasetType( 

3320 "a", dimensions=["htm7", "instrument"], universe=registry.dimensions, storageClass="int" 

3321 ) 

3322 registry.registerDatasetType(dataset_type) 

3323 run = "r" 

3324 registry.registerRun(run) 

3325 # First try queries where there are no datasets; the concern is whether 

3326 # we can even build and execute these queries without raising, even 

3327 # when "doomed" query shortcuts are in play. 

3328 self.assertFalse( 

3329 list(registry.queryDataIds(["htm7", "instrument"], datasets=dataset_type, collections=run)) 

3330 ) 

3331 self.assertFalse(list(registry.queryDatasets(dataset_type, collections=run))) 

3332 # Now add a dataset and see that we can get it back. 

3333 htm7 = registry.dimensions.skypix["htm"][7].pixelization 

3334 data_id = registry.expandDataId(instrument="Cam1", htm7=htm7.universe()[0][0]) 

3335 (ref,) = registry.insertDatasets(dataset_type, [data_id], run=run) 

3336 self.assertEqual( 

3337 set(registry.queryDataIds(["htm7", "instrument"], datasets=dataset_type, collections=run)), 

3338 {data_id}, 

3339 ) 

3340 self.assertEqual(set(registry.queryDatasets(dataset_type, collections=run)), {ref}) 

3341 

3342 def testDatasetIdFactory(self): 

3343 """Simple test for DatasetIdFactory, mostly to catch potential changes 

3344 in its API. 

3345 """ 

3346 registry = self.makeRegistry() 

3347 factory = DatasetIdFactory() 

3348 dataset_type = DatasetType( 

3349 "datasetType", 

3350 dimensions=["detector", "instrument"], 

3351 universe=registry.dimensions, 

3352 storageClass="int", 

3353 ) 

3354 run = "run" 

3355 data_id = DataCoordinate.standardize( 

3356 instrument="Cam1", detector=1, dimensions=dataset_type.dimensions 

3357 ) 

3358 

3359 datasetId = factory.makeDatasetId(run, dataset_type, data_id, DatasetIdGenEnum.UNIQUE) 

3360 self.assertIsInstance(datasetId, uuid.UUID) 

3361 self.assertEqual(datasetId.version, 4) 

3362 

3363 datasetId = factory.makeDatasetId(run, dataset_type, data_id, DatasetIdGenEnum.DATAID_TYPE) 

3364 self.assertIsInstance(datasetId, uuid.UUID) 

3365 self.assertEqual(datasetId.version, 5) 

3366 

3367 datasetId = factory.makeDatasetId(run, dataset_type, data_id, DatasetIdGenEnum.DATAID_TYPE_RUN) 

3368 self.assertIsInstance(datasetId, uuid.UUID) 

3369 self.assertEqual(datasetId.version, 5) 

3370 

3371 def testExposureQueries(self): 

3372 """Test query methods using arguments sourced from the exposure log 

3373 service. 

3374 

3375 The most complete test dataset currently available to daf_butler tests 

3376 is hsc-rc2-subset.yaml export (which is unfortunately distinct from the 

3377 the lsst/rc2_subset GitHub repo), but that does not have 'exposure' 

3378 dimension records as it was focused on providing nontrivial spatial 

3379 overlaps between visit+detector and tract+patch. So in this test we 

3380 need to translate queries that originally used the exposure dimension 

3381 to use the (very similar) visit dimension instead. 

3382 """ 

3383 registry = self.makeRegistry() 

3384 self.loadData(registry, "hsc-rc2-subset.yaml") 

3385 self.assertEqual( 

3386 [ 

3387 record.id 

3388 for record in registry.queryDimensionRecords("visit", instrument="HSC") 

3389 .order_by("id") 

3390 .limit(5) 

3391 ], 

3392 [318, 322, 326, 330, 332], 

3393 ) 

3394 self.assertEqual( 

3395 [ 

3396 data_id["visit"] 

3397 for data_id in registry.queryDataIds(["visit"], instrument="HSC").order_by("visit").limit(5) 

3398 ], 

3399 [318, 322, 326, 330, 332], 

3400 ) 

3401 self.assertEqual( 

3402 [ 

3403 record.id 

3404 for record in registry.queryDimensionRecords("detector", instrument="HSC") 

3405 .order_by("full_name") 

3406 .limit(5) 

3407 ], 

3408 [73, 72, 71, 70, 65], 

3409 ) 

3410 self.assertEqual( 

3411 [ 

3412 data_id["detector"] 

3413 for data_id in registry.queryDataIds(["detector"], instrument="HSC") 

3414 .order_by("full_name") 

3415 .limit(5) 

3416 ], 

3417 [73, 72, 71, 70, 65], 

3418 ) 

3419 

3420 def test_long_query_names(self) -> None: 

3421 """Test that queries involving very long names are handled correctly. 

3422 

3423 This is especially important for PostgreSQL, which truncates symbols 

3424 longer than 64 chars, but it's worth testing for all DBs. 

3425 """ 

3426 registry = self.makeRegistry() 

3427 name = "abcd" * 17 

3428 registry.registerDatasetType( 

3429 DatasetType( 

3430 name, 

3431 dimensions=(), 

3432 storageClass="Exposure", 

3433 universe=registry.dimensions, 

3434 ) 

3435 ) 

3436 # Need to search more than one collection actually containing a 

3437 # matching dataset to avoid optimizations that sidestep bugs due to 

3438 # truncation by making findFirst=True a no-op. 

3439 run1 = "run1" 

3440 registry.registerRun(run1) 

3441 run2 = "run2" 

3442 registry.registerRun(run2) 

3443 (ref1,) = registry.insertDatasets(name, [DataCoordinate.make_empty(registry.dimensions)], run1) 

3444 registry.insertDatasets(name, [DataCoordinate.make_empty(registry.dimensions)], run2) 

3445 self.assertEqual( 

3446 set(registry.queryDatasets(name, collections=[run1, run2], findFirst=True)), 

3447 {ref1}, 

3448 ) 

3449 

3450 def test_skypix_constraint_queries(self) -> None: 

3451 """Test queries spatially constrained by a skypix data ID.""" 

3452 registry = self.makeRegistry() 

3453 self.loadData(registry, "hsc-rc2-subset.yaml") 

3454 patch_regions = { 

3455 (data_id["tract"], data_id["patch"]): data_id.region 

3456 for data_id in registry.queryDataIds(["patch"]).expanded() 

3457 } 

3458 skypix_dimension: SkyPixDimension = registry.dimensions["htm11"] 

3459 # This check ensures the test doesn't become trivial due to a config 

3460 # change; if it does, just pick a different HTML level. 

3461 self.assertNotEqual(skypix_dimension, registry.dimensions.commonSkyPix) 

3462 # Gather all skypix IDs that definitely overlap at least one of these 

3463 # patches. 

3464 relevant_skypix_ids = lsst.sphgeom.RangeSet() 

3465 for patch_region in patch_regions.values(): 

3466 relevant_skypix_ids |= skypix_dimension.pixelization.interior(patch_region) 

3467 # Look for a "nontrivial" skypix_id that overlaps at least one patch 

3468 # and does not overlap at least one other patch. 

3469 for skypix_id in itertools.chain.from_iterable( 

3470 range(begin, end) for begin, end in relevant_skypix_ids 

3471 ): 

3472 skypix_region = skypix_dimension.pixelization.pixel(skypix_id) 

3473 overlapping_patches = { 

3474 patch_key 

3475 for patch_key, patch_region in patch_regions.items() 

3476 if not patch_region.isDisjointFrom(skypix_region) 

3477 } 

3478 if overlapping_patches and overlapping_patches != patch_regions.keys(): 

3479 break 

3480 else: 

3481 raise RuntimeError("Could not find usable skypix ID for this dimension configuration.") 

3482 self.assertEqual( 

3483 { 

3484 (data_id["tract"], data_id["patch"]) 

3485 for data_id in registry.queryDataIds( 

3486 ["patch"], 

3487 dataId={skypix_dimension.name: skypix_id}, 

3488 ) 

3489 }, 

3490 overlapping_patches, 

3491 ) 

3492 # Test that a three-way join that includes the common skypix system in 

3493 # the dimensions doesn't generate redundant join terms in the query. 

3494 full_data_ids = set( 

3495 registry.queryDataIds( 

3496 ["tract", "visit", "htm7"], skymap="hsc_rings_v1", instrument="HSC" 

3497 ).expanded() 

3498 ) 

3499 self.assertGreater(len(full_data_ids), 0) 

3500 for data_id in full_data_ids: 

3501 self.assertFalse(data_id.records["tract"].region.isDisjointFrom(data_id.records["htm7"].region)) 

3502 self.assertFalse(data_id.records["visit"].region.isDisjointFrom(data_id.records["htm7"].region)) 

3503 

3504 def test_spatial_constraint_queries(self) -> None: 

3505 """Test queries in which one spatial dimension in the constraint (data 

3506 ID or ``where`` string) constrains a different spatial dimension in the 

3507 query result columns. 

3508 """ 

3509 registry = self.makeRegistry() 

3510 self.loadData(registry, "hsc-rc2-subset.yaml") 

3511 patch_regions = { 

3512 (data_id["tract"], data_id["patch"]): data_id.region 

3513 for data_id in registry.queryDataIds(["patch"]).expanded() 

3514 } 

3515 observation_regions = { 

3516 (data_id["visit"], data_id["detector"]): data_id.region 

3517 for data_id in registry.queryDataIds(["visit", "detector"]).expanded() 

3518 } 

3519 all_combos = { 

3520 (patch_key, observation_key) 

3521 for patch_key, observation_key in itertools.product(patch_regions, observation_regions) 

3522 } 

3523 overlapping_combos = { 

3524 (patch_key, observation_key) 

3525 for patch_key, observation_key in all_combos 

3526 if not patch_regions[patch_key].isDisjointFrom(observation_regions[observation_key]) 

3527 } 

3528 # Check a direct spatial join with no constraint first. 

3529 self.assertEqual( 

3530 { 

3531 ((data_id["tract"], data_id["patch"]), (data_id["visit"], data_id["detector"])) 

3532 for data_id in registry.queryDataIds(["patch", "visit", "detector"]) 

3533 }, 

3534 overlapping_combos, 

3535 ) 

3536 overlaps_by_patch: defaultdict[tuple[int, int], set[tuple[str, str]]] = defaultdict(set) 

3537 overlaps_by_observation: defaultdict[tuple[int, int], set[tuple[str, str]]] = defaultdict(set) 

3538 for patch_key, observation_key in overlapping_combos: 

3539 overlaps_by_patch[patch_key].add(observation_key) 

3540 overlaps_by_observation[observation_key].add(patch_key) 

3541 # Find patches and observations that overlap at least one of the other 

3542 # but not all of the other. 

3543 nontrivial_patch = next( 

3544 iter( 

3545 patch_key 

3546 for patch_key, observation_keys in overlaps_by_patch.items() 

3547 if observation_keys and observation_keys != observation_regions.keys() 

3548 ) 

3549 ) 

3550 nontrivial_observation = next( 

3551 iter( 

3552 observation_key 

3553 for observation_key, patch_keys in overlaps_by_observation.items() 

3554 if patch_keys and patch_keys != patch_regions.keys() 

3555 ) 

3556 ) 

3557 # Use the nontrivial patches and observations as constraints on the 

3558 # other dimensions in various ways, first via a 'where' expression. 

3559 # It's better in general to us 'bind' instead of f-strings, but these 

3560 # all integers so there are no quoting concerns. 

3561 self.assertEqual( 

3562 { 

3563 (data_id["visit"], data_id["detector"]) 

3564 for data_id in registry.queryDataIds( 

3565 ["visit", "detector"], 

3566 where=f"tract={nontrivial_patch[0]} AND patch={nontrivial_patch[1]}", 

3567 skymap="hsc_rings_v1", 

3568 ) 

3569 }, 

3570 overlaps_by_patch[nontrivial_patch], 

3571 ) 

3572 self.assertEqual( 

3573 { 

3574 (data_id["tract"], data_id["patch"]) 

3575 for data_id in registry.queryDataIds( 

3576 ["patch"], 

3577 where=f"visit={nontrivial_observation[0]} AND detector={nontrivial_observation[1]}", 

3578 instrument="HSC", 

3579 ) 

3580 }, 

3581 overlaps_by_observation[nontrivial_observation], 

3582 ) 

3583 # and then via the dataId argument. 

3584 self.assertEqual( 

3585 { 

3586 (data_id["visit"], data_id["detector"]) 

3587 for data_id in registry.queryDataIds( 

3588 ["visit", "detector"], 

3589 dataId={ 

3590 "tract": nontrivial_patch[0], 

3591 "patch": nontrivial_patch[1], 

3592 }, 

3593 skymap="hsc_rings_v1", 

3594 ) 

3595 }, 

3596 overlaps_by_patch[nontrivial_patch], 

3597 ) 

3598 self.assertEqual( 

3599 { 

3600 (data_id["tract"], data_id["patch"]) 

3601 for data_id in registry.queryDataIds( 

3602 ["patch"], 

3603 dataId={ 

3604 "visit": nontrivial_observation[0], 

3605 "detector": nontrivial_observation[1], 

3606 }, 

3607 instrument="HSC", 

3608 ) 

3609 }, 

3610 overlaps_by_observation[nontrivial_observation], 

3611 ) 

3612 

3613 def test_query_projection_drop_postprocessing(self) -> None: 

3614 """Test that projections and deduplications on query objects can 

3615 drop post-query region filtering to ensure the query remains in 

3616 the SQL engine. 

3617 """ 

3618 registry = self.makeRegistry() 

3619 self.loadData(registry, "base.yaml") 

3620 self.loadData(registry, "spatial.yaml") 

3621 

3622 def pop_transfer(tree: Relation) -> Relation: 

3623 """If a relation tree terminates with a transfer to a new engine, 

3624 return the relation prior to that transfer. If not, return the 

3625 original relation. 

3626 

3627 Parameters 

3628 ---------- 

3629 tree : `Relation` 

3630 The relation tree to modify. 

3631 """ 

3632 match tree: 

3633 case Transfer(target=target): 

3634 return target 

3635 case _: 

3636 return tree 

3637 

3638 # There's no public way to get a Query object yet, so we get one from a 

3639 # DataCoordinateQueryResults private attribute. When a public API is 

3640 # available this test should use it. 

3641 query = registry.queryDataIds(["visit", "detector", "tract", "patch"])._query 

3642 # We expect this query to terminate in the iteration engine originally, 

3643 # because region-filtering is necessary. 

3644 self.assertIsInstance(pop_transfer(query.relation).engine, iteration.Engine) 

3645 # If we deduplicate, we usually have to do that downstream of the 

3646 # filtering. That means the deduplication has to happen in the 

3647 # iteration engine. 

3648 self.assertIsInstance(pop_transfer(query.projected(unique=True).relation).engine, iteration.Engine) 

3649 # If we pass drop_postprocessing, we instead drop the region filtering 

3650 # so the deduplication can happen in SQL (though there might still be 

3651 # transfer to iteration at the tail of the tree that we can ignore; 

3652 # that's what the pop_transfer takes care of here). 

3653 self.assertIsInstance( 

3654 pop_transfer(query.projected(unique=True, drop_postprocessing=True).relation).engine, 

3655 sql.Engine, 

3656 ) 

3657 

3658 def test_query_find_datasets_drop_postprocessing(self) -> None: 

3659 """Test that DataCoordinateQueryResults.findDatasets avoids commutator 

3660 problems with the FindFirstDataset relation operation. 

3661 """ 

3662 # Setup: load some visit, tract, and patch records, and insert two 

3663 # datasets with dimensions {visit, patch}, with one in each of two 

3664 # RUN collections. 

3665 registry = self.makeRegistry() 

3666 self.loadData(registry, "base.yaml") 

3667 self.loadData(registry, "spatial.yaml") 

3668 storage_class = StorageClass("Warpy") 

3669 registry.storageClasses.registerStorageClass(storage_class) 

3670 dataset_type = DatasetType( 

3671 "warp", {"visit", "patch"}, storageClass=storage_class, universe=registry.dimensions 

3672 ) 

3673 registry.registerDatasetType(dataset_type) 

3674 (data_id,) = registry.queryDataIds(["visit", "patch"]).limit(1) 

3675 registry.registerRun("run1") 

3676 registry.registerRun("run2") 

3677 (ref1,) = registry.insertDatasets(dataset_type, [data_id], run="run1") 

3678 (ref2,) = registry.insertDatasets(dataset_type, [data_id], run="run2") 

3679 # Query for the dataset using queryDataIds(...).findDatasets(...) 

3680 # against only one of the two collections. This should work even 

3681 # though the relation returned by queryDataIds ends with 

3682 # iteration-engine region-filtering, because we can recognize before 

3683 # running the query that there is only one collecton to search and 

3684 # hence the (default) findFirst=True is irrelevant, and joining in the 

3685 # dataset query commutes past the iteration-engine postprocessing. 

3686 query1 = registry.queryDataIds( 

3687 {"visit", "patch"}, visit=data_id["visit"], instrument=data_id["instrument"] 

3688 ) 

3689 self.assertEqual( 

3690 set(query1.findDatasets(dataset_type.name, collections=["run1"])), 

3691 {ref1}, 

3692 ) 

3693 # Query for the dataset using queryDataIds(...).findDatasets(...) 

3694 # against both collections. This can only work if the FindFirstDataset 

3695 # operation can be commuted past the iteration-engine options into SQL. 

3696 query2 = registry.queryDataIds( 

3697 {"visit", "patch"}, visit=data_id["visit"], instrument=data_id["instrument"] 

3698 ) 

3699 self.assertEqual( 

3700 set(query2.findDatasets(dataset_type.name, collections=["run2", "run1"])), 

3701 {ref2}, 

3702 ) 

3703 

3704 def test_query_empty_collections(self) -> None: 

3705 """Test for registry query methods with empty collections. The methods 

3706 should return empty result set (or None when applicable) and provide 

3707 "doomed" diagnostics. 

3708 """ 

3709 registry = self.makeRegistry() 

3710 self.loadData(registry, "base.yaml") 

3711 self.loadData(registry, "datasets.yaml") 

3712 

3713 # Tests for registry.findDataset() 

3714 with self.assertRaises(NoDefaultCollectionError): 

3715 registry.findDataset("bias", instrument="Cam1", detector=1) 

3716 self.assertIsNotNone(registry.findDataset("bias", instrument="Cam1", detector=1, collections=...)) 

3717 self.assertIsNone(registry.findDataset("bias", instrument="Cam1", detector=1, collections=[])) 

3718 

3719 # Tests for registry.queryDatasets() 

3720 with self.assertRaises(NoDefaultCollectionError): 

3721 registry.queryDatasets("bias") 

3722 self.assertTrue(list(registry.queryDatasets("bias", collections=...))) 

3723 

3724 result = registry.queryDatasets("bias", collections=[]) 

3725 self.assertEqual(len(list(result)), 0) 

3726 messages = list(result.explain_no_results()) 

3727 self.assertTrue(messages) 

3728 self.assertTrue(any("because collection list is empty" in message for message in messages)) 

3729 

3730 # Tests for registry.queryDataIds() 

3731 with self.assertRaises(NoDefaultCollectionError): 

3732 registry.queryDataIds("detector", datasets="bias") 

3733 self.assertTrue(list(registry.queryDataIds("detector", datasets="bias", collections=...))) 

3734 

3735 result = registry.queryDataIds("detector", datasets="bias", collections=[]) 

3736 self.assertEqual(len(list(result)), 0) 

3737 messages = list(result.explain_no_results()) 

3738 self.assertTrue(messages) 

3739 self.assertTrue(any("because collection list is empty" in message for message in messages)) 

3740 

3741 # Tests for registry.queryDimensionRecords() 

3742 with self.assertRaises(NoDefaultCollectionError): 

3743 registry.queryDimensionRecords("detector", datasets="bias") 

3744 self.assertTrue(list(registry.queryDimensionRecords("detector", datasets="bias", collections=...))) 

3745 

3746 result = registry.queryDimensionRecords("detector", datasets="bias", collections=[]) 

3747 self.assertEqual(len(list(result)), 0) 

3748 messages = list(result.explain_no_results()) 

3749 self.assertTrue(messages) 

3750 self.assertTrue(any("because collection list is empty" in message for message in messages)) 

3751 

3752 def test_dataset_followup_spatial_joins(self) -> None: 

3753 """Test queryDataIds(...).findRelatedDatasets(...) where a spatial join 

3754 is involved. 

3755 """ 

3756 registry = self.makeRegistry() 

3757 self.loadData(registry, "base.yaml") 

3758 self.loadData(registry, "spatial.yaml") 

3759 pvi_dataset_type = DatasetType( 

3760 "pvi", {"visit", "detector"}, storageClass="StructuredDataDict", universe=registry.dimensions 

3761 ) 

3762 registry.registerDatasetType(pvi_dataset_type) 

3763 collection = "datasets" 

3764 registry.registerRun(collection) 

3765 (pvi1,) = registry.insertDatasets( 

3766 pvi_dataset_type, [{"instrument": "Cam1", "visit": 1, "detector": 1}], run=collection 

3767 ) 

3768 (pvi2,) = registry.insertDatasets( 

3769 pvi_dataset_type, [{"instrument": "Cam1", "visit": 1, "detector": 2}], run=collection 

3770 ) 

3771 (pvi3,) = registry.insertDatasets( 

3772 pvi_dataset_type, [{"instrument": "Cam1", "visit": 1, "detector": 3}], run=collection 

3773 ) 

3774 self.assertEqual( 

3775 set( 

3776 registry.queryDataIds(["patch"], skymap="SkyMap1", tract=0) 

3777 .expanded() 

3778 .findRelatedDatasets("pvi", [collection]) 

3779 ), 

3780 { 

3781 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=0), pvi1), 

3782 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=0), pvi2), 

3783 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=1), pvi2), 

3784 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=2), pvi1), 

3785 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=2), pvi2), 

3786 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=2), pvi3), 

3787 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=3), pvi2), 

3788 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=4), pvi3), 

3789 }, 

3790 )