Coverage for python/lsst/daf/butler/registry/tests/_registry.py: 5%

1477 statements  

« prev     ^ index     » next       coverage.py v7.4.0, created at 2024-01-25 10:50 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27from __future__ import annotations 

28 

29from ... import ddl 

30 

31__all__ = ["RegistryTests"] 

32 

33import datetime 

34import itertools 

35import os 

36import re 

37import unittest 

38import uuid 

39from abc import ABC, abstractmethod 

40from collections import defaultdict, namedtuple 

41from collections.abc import Iterator 

42from datetime import timedelta 

43from typing import TYPE_CHECKING 

44 

45import astropy.time 

46import sqlalchemy 

47 

48try: 

49 import numpy as np 

50except ImportError: 

51 np = None 

52 

53import lsst.sphgeom 

54from lsst.daf.relation import Relation, RelationalAlgebraError, Transfer, iteration, sql 

55 

56from ..._dataset_association import DatasetAssociation 

57from ..._dataset_ref import DatasetIdFactory, DatasetIdGenEnum, DatasetRef 

58from ..._dataset_type import DatasetType 

59from ..._storage_class import StorageClass 

60from ..._timespan import Timespan 

61from ...dimensions import DataCoordinate, DataCoordinateSet, SkyPixDimension 

62from .._collection_summary import CollectionSummary 

63from .._collection_type import CollectionType 

64from .._config import RegistryConfig 

65from .._exceptions import ( 

66 ArgumentError, 

67 CollectionError, 

68 CollectionTypeError, 

69 ConflictingDefinitionError, 

70 DataIdValueError, 

71 DatasetTypeError, 

72 DatasetTypeExpressionError, 

73 InconsistentDataIdError, 

74 MissingCollectionError, 

75 MissingDatasetTypeError, 

76 NoDefaultCollectionError, 

77 OrphanedRecordError, 

78) 

79from ..interfaces import ButlerAttributeExistsError 

80 

81if TYPE_CHECKING: 

82 from ..sql_registry import SqlRegistry 

83 

84 

85class RegistryTests(ABC): 

86 """Generic tests for the `SqlRegistry` class that can be subclassed to 

87 generate tests for different configurations. 

88 """ 

89 

90 collectionsManager: str | None = None 

91 """Name of the collections manager class, if subclass provides value for 

92 this member then it overrides name specified in default configuration 

93 (`str`). 

94 """ 

95 

96 datasetsManager: str | dict[str, str] | None = None 

97 """Name or configuration dictionary of the datasets manager class, if 

98 subclass provides value for this member then it overrides name specified 

99 in default configuration (`str` or `dict`). 

100 """ 

101 

102 @classmethod 

103 @abstractmethod 

104 def getDataDir(cls) -> str: 

105 """Return the root directory containing test data YAML files.""" 

106 raise NotImplementedError() 

107 

108 def makeRegistryConfig(self) -> RegistryConfig: 

109 """Create RegistryConfig used to create a registry. 

110 

111 This method should be called by a subclass from `makeRegistry`. 

112 Returned instance will be pre-configured based on the values of class 

113 members, and default-configured for all other parameters. Subclasses 

114 that need default configuration should just instantiate 

115 `RegistryConfig` directly. 

116 """ 

117 config = RegistryConfig() 

118 if self.collectionsManager: 

119 config["managers", "collections"] = self.collectionsManager 

120 if self.datasetsManager: 

121 config["managers", "datasets"] = self.datasetsManager 

122 return config 

123 

124 @abstractmethod 

125 def makeRegistry(self, share_repo_with: SqlRegistry | None = None) -> SqlRegistry | None: 

126 """Return the SqlRegistry instance to be tested. 

127 

128 Parameters 

129 ---------- 

130 share_repo_with : `SqlRegistry`, optional 

131 If provided, the new registry should point to the same data 

132 repository as this existing registry. 

133 

134 Returns 

135 ------- 

136 registry : `SqlRegistry` 

137 New `SqlRegistry` instance, or `None` *only* if `share_repo_with` 

138 is not `None` and this test case does not support that argument 

139 (e.g. it is impossible with in-memory SQLite DBs). 

140 """ 

141 raise NotImplementedError() 

142 

143 def loadData(self, registry: SqlRegistry, filename: str) -> None: 

144 """Load registry test data from ``getDataDir/<filename>``, 

145 which should be a YAML import/export file. 

146 

147 Parameters 

148 ---------- 

149 registry : `SqlRegistry` 

150 The registry to load into. 

151 filename : `str` 

152 The name of the file to load. 

153 """ 

154 from ...transfers import YamlRepoImportBackend 

155 

156 with open(os.path.join(self.getDataDir(), filename)) as stream: 

157 backend = YamlRepoImportBackend(stream, registry) 

158 backend.register() 

159 backend.load(datastore=None) 

160 

161 def checkQueryResults(self, results, expected): 

162 """Check that a query results object contains expected values. 

163 

164 Parameters 

165 ---------- 

166 results : `DataCoordinateQueryResults` or `DatasetQueryResults` 

167 A lazy-evaluation query results object. 

168 expected : `list` 

169 A list of `DataCoordinate` o `DatasetRef` objects that should be 

170 equal to results of the query, aside from ordering. 

171 """ 

172 self.assertCountEqual(list(results), expected) 

173 self.assertEqual(results.count(), len(expected)) 

174 if expected: 

175 self.assertTrue(results.any()) 

176 else: 

177 self.assertFalse(results.any()) 

178 

179 def testOpaque(self): 

180 """Tests for `SqlRegistry.registerOpaqueTable`, 

181 `SqlRegistry.insertOpaqueData`, `SqlRegistry.fetchOpaqueData`, and 

182 `SqlRegistry.deleteOpaqueData`. 

183 """ 

184 registry = self.makeRegistry() 

185 table = "opaque_table_for_testing" 

186 registry.registerOpaqueTable( 

187 table, 

188 spec=ddl.TableSpec( 

189 fields=[ 

190 ddl.FieldSpec("id", dtype=sqlalchemy.BigInteger, primaryKey=True), 

191 ddl.FieldSpec("name", dtype=sqlalchemy.String, length=16, nullable=False), 

192 ddl.FieldSpec("count", dtype=sqlalchemy.SmallInteger, nullable=True), 

193 ], 

194 ), 

195 ) 

196 rows = [ 

197 {"id": 1, "name": "one", "count": None}, 

198 {"id": 2, "name": "two", "count": 5}, 

199 {"id": 3, "name": "three", "count": 6}, 

200 ] 

201 registry.insertOpaqueData(table, *rows) 

202 self.assertCountEqual(rows, list(registry.fetchOpaqueData(table))) 

203 self.assertEqual(rows[0:1], list(registry.fetchOpaqueData(table, id=1))) 

204 self.assertEqual(rows[1:2], list(registry.fetchOpaqueData(table, name="two"))) 

205 self.assertEqual(rows[0:1], list(registry.fetchOpaqueData(table, id=(1, 3), name=("one", "two")))) 

206 self.assertEqual(rows, list(registry.fetchOpaqueData(table, id=(1, 2, 3)))) 

207 # Test very long IN clause which exceeds sqlite limit on number of 

208 # parameters. SQLite says the limit is 32k but it looks like it is 

209 # much higher. 

210 self.assertEqual(rows, list(registry.fetchOpaqueData(table, id=list(range(300_000))))) 

211 # Two IN clauses, each longer than 1k batch size, first with 

212 # duplicates, second has matching elements in different batches (after 

213 # sorting). 

214 self.assertEqual( 

215 rows[0:2], 

216 list( 

217 registry.fetchOpaqueData( 

218 table, 

219 id=list(range(1000)) + list(range(100, 0, -1)), 

220 name=["one"] + [f"q{i}" for i in range(2200)] + ["two"], 

221 ) 

222 ), 

223 ) 

224 self.assertEqual([], list(registry.fetchOpaqueData(table, id=1, name="two"))) 

225 registry.deleteOpaqueData(table, id=3) 

226 self.assertCountEqual(rows[:2], list(registry.fetchOpaqueData(table))) 

227 registry.deleteOpaqueData(table) 

228 self.assertEqual([], list(registry.fetchOpaqueData(table))) 

229 

230 def testDatasetType(self): 

231 """Tests for `SqlRegistry.registerDatasetType` and 

232 `SqlRegistry.getDatasetType`. 

233 """ 

234 registry = self.makeRegistry() 

235 # Check valid insert 

236 datasetTypeName = "test" 

237 storageClass = StorageClass("testDatasetType") 

238 registry.storageClasses.registerStorageClass(storageClass) 

239 dimensions = registry.dimensions.conform(("instrument", "visit")) 

240 differentDimensions = registry.dimensions.conform(("instrument", "patch")) 

241 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

242 # Inserting for the first time should return True 

243 self.assertTrue(registry.registerDatasetType(inDatasetType)) 

244 outDatasetType1 = registry.getDatasetType(datasetTypeName) 

245 self.assertEqual(outDatasetType1, inDatasetType) 

246 

247 # Re-inserting should work 

248 self.assertFalse(registry.registerDatasetType(inDatasetType)) 

249 # Except when they are not identical 

250 with self.assertRaises(ConflictingDefinitionError): 

251 nonIdenticalDatasetType = DatasetType(datasetTypeName, differentDimensions, storageClass) 

252 registry.registerDatasetType(nonIdenticalDatasetType) 

253 

254 # Template can be None 

255 datasetTypeName = "testNoneTemplate" 

256 storageClass = StorageClass("testDatasetType2") 

257 registry.storageClasses.registerStorageClass(storageClass) 

258 dimensions = registry.dimensions.conform(("instrument", "visit")) 

259 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

260 registry.registerDatasetType(inDatasetType) 

261 outDatasetType2 = registry.getDatasetType(datasetTypeName) 

262 self.assertEqual(outDatasetType2, inDatasetType) 

263 

264 allTypes = set(registry.queryDatasetTypes()) 

265 self.assertEqual(allTypes, {outDatasetType1, outDatasetType2}) 

266 

267 def testDimensions(self): 

268 """Tests for `SqlRegistry.insertDimensionData`, 

269 `SqlRegistry.syncDimensionData`, and `SqlRegistry.expandDataId`. 

270 """ 

271 registry = self.makeRegistry() 

272 dimensionName = "instrument" 

273 dimension = registry.dimensions[dimensionName] 

274 dimensionValue = { 

275 "name": "DummyCam", 

276 "visit_max": 10, 

277 "visit_system": 0, 

278 "exposure_max": 10, 

279 "detector_max": 2, 

280 "class_name": "lsst.pipe.base.Instrument", 

281 } 

282 registry.insertDimensionData(dimensionName, dimensionValue) 

283 # Inserting the same value twice should fail 

284 with self.assertRaises(sqlalchemy.exc.IntegrityError): 

285 registry.insertDimensionData(dimensionName, dimensionValue) 

286 # expandDataId should retrieve the record we just inserted 

287 self.assertEqual( 

288 registry.expandDataId(instrument="DummyCam", dimensions=dimension.minimal_group) 

289 .records[dimensionName] 

290 .toDict(), 

291 dimensionValue, 

292 ) 

293 # expandDataId should raise if there is no record with the given ID. 

294 with self.assertRaises(DataIdValueError): 

295 registry.expandDataId({"instrument": "Unknown"}, dimensions=dimension.minimal_group) 

296 # band doesn't have a table; insert should fail. 

297 with self.assertRaises(TypeError): 

298 registry.insertDimensionData("band", {"band": "i"}) 

299 dimensionName2 = "physical_filter" 

300 dimension2 = registry.dimensions[dimensionName2] 

301 dimensionValue2 = {"name": "DummyCam_i", "band": "i"} 

302 # Missing required dependency ("instrument") should fail 

303 with self.assertRaises(KeyError): 

304 registry.insertDimensionData(dimensionName2, dimensionValue2) 

305 # Adding required dependency should fix the failure 

306 dimensionValue2["instrument"] = "DummyCam" 

307 registry.insertDimensionData(dimensionName2, dimensionValue2) 

308 # expandDataId should retrieve the record we just inserted. 

309 self.assertEqual( 

310 registry.expandDataId( 

311 instrument="DummyCam", physical_filter="DummyCam_i", dimensions=dimension2.minimal_group 

312 ) 

313 .records[dimensionName2] 

314 .toDict(), 

315 dimensionValue2, 

316 ) 

317 # Use syncDimensionData to insert a new record successfully. 

318 dimensionName3 = "detector" 

319 dimensionValue3 = { 

320 "instrument": "DummyCam", 

321 "id": 1, 

322 "full_name": "one", 

323 "name_in_raft": "zero", 

324 "purpose": "SCIENCE", 

325 } 

326 self.assertTrue(registry.syncDimensionData(dimensionName3, dimensionValue3)) 

327 # Sync that again. Note that one field ("raft") is NULL, and that 

328 # should be okay. 

329 self.assertFalse(registry.syncDimensionData(dimensionName3, dimensionValue3)) 

330 # Now try that sync with the same primary key but a different value. 

331 # This should fail. 

332 with self.assertRaises(ConflictingDefinitionError): 

333 registry.syncDimensionData( 

334 dimensionName3, 

335 { 

336 "instrument": "DummyCam", 

337 "id": 1, 

338 "full_name": "one", 

339 "name_in_raft": "four", 

340 "purpose": "SCIENCE", 

341 }, 

342 ) 

343 

344 @unittest.skipIf(np is None, "numpy not available.") 

345 def testNumpyDataId(self): 

346 """Test that we can use a numpy int in a dataId.""" 

347 registry = self.makeRegistry() 

348 dimensionEntries = [ 

349 ("instrument", {"instrument": "DummyCam"}), 

350 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}), 

351 # Using an np.int64 here fails unless Records.fromDict is also 

352 # patched to look for numbers.Integral 

353 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}), 

354 ] 

355 for args in dimensionEntries: 

356 registry.insertDimensionData(*args) 

357 

358 # Try a normal integer and something that looks like an int but 

359 # is not. 

360 for visit_id in (42, np.int64(42)): 

361 with self.subTest(visit_id=visit_id, id_type=type(visit_id).__name__): 

362 expanded = registry.expandDataId({"instrument": "DummyCam", "visit": visit_id}) 

363 self.assertEqual(expanded["visit"], int(visit_id)) 

364 self.assertIsInstance(expanded["visit"], int) 

365 

366 def testDataIdRelationships(self): 

367 """Test that `SqlRegistry.expandDataId` raises an exception when the 

368 given keys are inconsistent. 

369 """ 

370 registry = self.makeRegistry() 

371 self.loadData(registry, "base.yaml") 

372 # Insert a few more dimension records for the next test. 

373 registry.insertDimensionData( 

374 "exposure", 

375 {"instrument": "Cam1", "id": 1, "obs_id": "one", "physical_filter": "Cam1-G"}, 

376 ) 

377 registry.insertDimensionData( 

378 "exposure", 

379 {"instrument": "Cam1", "id": 2, "obs_id": "two", "physical_filter": "Cam1-G"}, 

380 ) 

381 registry.insertDimensionData( 

382 "visit_system", 

383 {"instrument": "Cam1", "id": 0, "name": "one-to-one"}, 

384 ) 

385 registry.insertDimensionData( 

386 "visit", 

387 {"instrument": "Cam1", "id": 1, "name": "one", "physical_filter": "Cam1-G"}, 

388 ) 

389 registry.insertDimensionData( 

390 "visit_definition", 

391 {"instrument": "Cam1", "visit": 1, "exposure": 1}, 

392 ) 

393 with self.assertRaises(InconsistentDataIdError): 

394 registry.expandDataId( 

395 {"instrument": "Cam1", "visit": 1, "exposure": 2}, 

396 ) 

397 

398 def testDataset(self): 

399 """Basic tests for `SqlRegistry.insertDatasets`, 

400 `SqlRegistry.getDataset`, and `SqlRegistry.removeDatasets`. 

401 """ 

402 registry = self.makeRegistry() 

403 self.loadData(registry, "base.yaml") 

404 run = "tésτ" 

405 registry.registerRun(run) 

406 datasetType = registry.getDatasetType("bias") 

407 dataId = {"instrument": "Cam1", "detector": 2} 

408 (ref,) = registry.insertDatasets(datasetType, dataIds=[dataId], run=run) 

409 outRef = registry.getDataset(ref.id) 

410 self.assertIsNotNone(ref.id) 

411 self.assertEqual(ref, outRef) 

412 with self.assertRaises(ConflictingDefinitionError): 

413 registry.insertDatasets(datasetType, dataIds=[dataId], run=run) 

414 registry.removeDatasets([ref]) 

415 self.assertIsNone(registry.findDataset(datasetType, dataId, collections=[run])) 

416 

417 def testFindDataset(self): 

418 """Tests for `SqlRegistry.findDataset`.""" 

419 registry = self.makeRegistry() 

420 self.loadData(registry, "base.yaml") 

421 run = "tésτ" 

422 datasetType = registry.getDatasetType("bias") 

423 dataId = {"instrument": "Cam1", "detector": 4} 

424 registry.registerRun(run) 

425 (inputRef,) = registry.insertDatasets(datasetType, dataIds=[dataId], run=run) 

426 outputRef = registry.findDataset(datasetType, dataId, collections=[run]) 

427 self.assertEqual(outputRef, inputRef) 

428 # Check that retrieval with invalid dataId raises 

429 with self.assertRaises(LookupError): 

430 dataId = {"instrument": "Cam1"} # no detector 

431 registry.findDataset(datasetType, dataId, collections=run) 

432 # Check that different dataIds match to different datasets 

433 dataId1 = {"instrument": "Cam1", "detector": 1} 

434 (inputRef1,) = registry.insertDatasets(datasetType, dataIds=[dataId1], run=run) 

435 dataId2 = {"instrument": "Cam1", "detector": 2} 

436 (inputRef2,) = registry.insertDatasets(datasetType, dataIds=[dataId2], run=run) 

437 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef1) 

438 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef2) 

439 self.assertNotEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef2) 

440 self.assertNotEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef1) 

441 # Check that requesting a non-existing dataId returns None 

442 nonExistingDataId = {"instrument": "Cam1", "detector": 3} 

443 self.assertIsNone(registry.findDataset(datasetType, nonExistingDataId, collections=run)) 

444 # Search more than one collection, in which two have the right 

445 # dataset type and another does not. 

446 registry.registerRun("empty") 

447 self.loadData(registry, "datasets.yaml") 

448 bias1 = registry.findDataset("bias", instrument="Cam1", detector=2, collections=["imported_g"]) 

449 self.assertIsNotNone(bias1) 

450 bias2 = registry.findDataset("bias", instrument="Cam1", detector=2, collections=["imported_r"]) 

451 self.assertIsNotNone(bias2) 

452 self.assertEqual( 

453 bias1, 

454 registry.findDataset( 

455 "bias", instrument="Cam1", detector=2, collections=["empty", "imported_g", "imported_r"] 

456 ), 

457 ) 

458 self.assertEqual( 

459 bias2, 

460 registry.findDataset( 

461 "bias", instrument="Cam1", detector=2, collections=["empty", "imported_r", "imported_g"] 

462 ), 

463 ) 

464 # Search more than one collection, with one of them a CALIBRATION 

465 # collection. 

466 registry.registerCollection("Cam1/calib", CollectionType.CALIBRATION) 

467 timespan = Timespan( 

468 begin=astropy.time.Time("2020-01-01T01:00:00", format="isot", scale="tai"), 

469 end=astropy.time.Time("2020-01-01T02:00:00", format="isot", scale="tai"), 

470 ) 

471 registry.certify("Cam1/calib", [bias2], timespan=timespan) 

472 self.assertEqual( 

473 bias1, 

474 registry.findDataset( 

475 "bias", 

476 instrument="Cam1", 

477 detector=2, 

478 collections=["empty", "imported_g", "Cam1/calib"], 

479 timespan=timespan, 

480 ), 

481 ) 

482 self.assertEqual( 

483 bias2, 

484 registry.findDataset( 

485 "bias", 

486 instrument="Cam1", 

487 detector=2, 

488 collections=["empty", "Cam1/calib", "imported_g"], 

489 timespan=timespan, 

490 ), 

491 ) 

492 # If we try to search those same collections without a timespan, it 

493 # should still work, since the CALIBRATION collection is ignored. 

494 self.assertEqual( 

495 bias1, 

496 registry.findDataset( 

497 "bias", instrument="Cam1", detector=2, collections=["empty", "imported_g", "Cam1/calib"] 

498 ), 

499 ) 

500 self.assertEqual( 

501 bias1, 

502 registry.findDataset( 

503 "bias", instrument="Cam1", detector=2, collections=["empty", "Cam1/calib", "imported_g"] 

504 ), 

505 ) 

506 

507 def testRemoveDatasetTypeSuccess(self): 

508 """Test that SqlRegistry.removeDatasetType works when there are no 

509 datasets of that type present. 

510 """ 

511 registry = self.makeRegistry() 

512 self.loadData(registry, "base.yaml") 

513 registry.removeDatasetType("flat") 

514 with self.assertRaises(MissingDatasetTypeError): 

515 registry.getDatasetType("flat") 

516 

517 def testRemoveDatasetTypeFailure(self): 

518 """Test that SqlRegistry.removeDatasetType raises when there are 

519 datasets of that type present or if the dataset type is for a 

520 component. 

521 """ 

522 registry = self.makeRegistry() 

523 self.loadData(registry, "base.yaml") 

524 self.loadData(registry, "datasets.yaml") 

525 with self.assertRaises(OrphanedRecordError): 

526 registry.removeDatasetType("flat") 

527 with self.assertRaises(DatasetTypeError): 

528 registry.removeDatasetType(DatasetType.nameWithComponent("flat", "image")) 

529 

530 def testImportDatasetsUUID(self): 

531 """Test for `SqlRegistry._importDatasets` with UUID dataset ID.""" 

532 if isinstance(self.datasetsManager, str): 

533 if not self.datasetsManager.endswith(".ByDimensionsDatasetRecordStorageManagerUUID"): 

534 self.skipTest(f"Unexpected dataset manager {self.datasetsManager}") 

535 elif isinstance(self.datasetsManager, dict) and not self.datasetsManager["cls"].endswith( 

536 ".ByDimensionsDatasetRecordStorageManagerUUID" 

537 ): 

538 self.skipTest(f"Unexpected dataset manager {self.datasetsManager['cls']}") 

539 

540 registry = self.makeRegistry() 

541 self.loadData(registry, "base.yaml") 

542 for run in range(6): 

543 registry.registerRun(f"run{run}") 

544 datasetTypeBias = registry.getDatasetType("bias") 

545 datasetTypeFlat = registry.getDatasetType("flat") 

546 dataIdBias1 = {"instrument": "Cam1", "detector": 1} 

547 dataIdBias2 = {"instrument": "Cam1", "detector": 2} 

548 dataIdFlat1 = {"instrument": "Cam1", "detector": 1, "physical_filter": "Cam1-G", "band": "g"} 

549 

550 ref = DatasetRef(datasetTypeBias, dataIdBias1, run="run0") 

551 (ref1,) = registry._importDatasets([ref]) 

552 # UUID is used without change 

553 self.assertEqual(ref.id, ref1.id) 

554 

555 # All different failure modes 

556 refs = ( 

557 # Importing same DatasetRef with different dataset ID is an error 

558 DatasetRef(datasetTypeBias, dataIdBias1, run="run0"), 

559 # Same DatasetId but different DataId 

560 DatasetRef(datasetTypeBias, dataIdBias2, id=ref1.id, run="run0"), 

561 DatasetRef(datasetTypeFlat, dataIdFlat1, id=ref1.id, run="run0"), 

562 # Same DatasetRef and DatasetId but different run 

563 DatasetRef(datasetTypeBias, dataIdBias1, id=ref1.id, run="run1"), 

564 ) 

565 for ref in refs: 

566 with self.assertRaises(ConflictingDefinitionError): 

567 registry._importDatasets([ref]) 

568 

569 # Test for non-unique IDs, they can be re-imported multiple times. 

570 for run, idGenMode in ((2, DatasetIdGenEnum.DATAID_TYPE), (4, DatasetIdGenEnum.DATAID_TYPE_RUN)): 

571 with self.subTest(idGenMode=idGenMode): 

572 # Make dataset ref with reproducible dataset ID. 

573 ref = DatasetRef(datasetTypeBias, dataIdBias1, run=f"run{run}", id_generation_mode=idGenMode) 

574 (ref1,) = registry._importDatasets([ref]) 

575 self.assertIsInstance(ref1.id, uuid.UUID) 

576 self.assertEqual(ref1.id.version, 5) 

577 self.assertEqual(ref1.id, ref.id) 

578 

579 # Importing it again is OK 

580 (ref2,) = registry._importDatasets([ref1]) 

581 self.assertEqual(ref2.id, ref1.id) 

582 

583 # Cannot import to different run with the same ID 

584 ref = DatasetRef(datasetTypeBias, dataIdBias1, id=ref1.id, run=f"run{run+1}") 

585 with self.assertRaises(ConflictingDefinitionError): 

586 registry._importDatasets([ref]) 

587 

588 ref = DatasetRef( 

589 datasetTypeBias, dataIdBias1, run=f"run{run+1}", id_generation_mode=idGenMode 

590 ) 

591 if idGenMode is DatasetIdGenEnum.DATAID_TYPE: 

592 # Cannot import same DATAID_TYPE ref into a new run 

593 with self.assertRaises(ConflictingDefinitionError): 

594 (ref2,) = registry._importDatasets([ref]) 

595 else: 

596 # DATAID_TYPE_RUN ref can be imported into a new run 

597 (ref2,) = registry._importDatasets([ref]) 

598 

599 def testComponentLookups(self): 

600 """Test searching for component datasets via their parents. 

601 

602 Components can no longer be found by registry. This test checks 

603 that this now fails. 

604 """ 

605 registry = self.makeRegistry() 

606 self.loadData(registry, "base.yaml") 

607 self.loadData(registry, "datasets.yaml") 

608 # Test getting the child dataset type (which does still exist in the 

609 # Registry), and check for consistency with 

610 # DatasetRef.makeComponentRef. 

611 collection = "imported_g" 

612 parentType = registry.getDatasetType("bias") 

613 childType = registry.getDatasetType("bias.wcs") 

614 parentRefResolved = registry.findDataset( 

615 parentType, collections=collection, instrument="Cam1", detector=1 

616 ) 

617 self.assertIsInstance(parentRefResolved, DatasetRef) 

618 self.assertEqual(childType, parentRefResolved.makeComponentRef("wcs").datasetType) 

619 # Search for a single dataset with findDataset. 

620 with self.assertRaises(DatasetTypeError): 

621 registry.findDataset("bias.wcs", collections=collection, dataId=parentRefResolved.dataId) 

622 

623 def testCollections(self): 

624 """Tests for registry methods that manage collections.""" 

625 registry = self.makeRegistry() 

626 other_registry = self.makeRegistry(share_repo_with=registry) 

627 self.loadData(registry, "base.yaml") 

628 self.loadData(registry, "datasets.yaml") 

629 run1 = "imported_g" 

630 run2 = "imported_r" 

631 # Test setting a collection docstring after it has been created. 

632 registry.setCollectionDocumentation(run1, "doc for run1") 

633 self.assertEqual(registry.getCollectionDocumentation(run1), "doc for run1") 

634 registry.setCollectionDocumentation(run1, None) 

635 self.assertIsNone(registry.getCollectionDocumentation(run1)) 

636 datasetType = "bias" 

637 # Find some datasets via their run's collection. 

638 dataId1 = {"instrument": "Cam1", "detector": 1} 

639 ref1 = registry.findDataset(datasetType, dataId1, collections=run1) 

640 self.assertIsNotNone(ref1) 

641 dataId2 = {"instrument": "Cam1", "detector": 2} 

642 ref2 = registry.findDataset(datasetType, dataId2, collections=run1) 

643 self.assertIsNotNone(ref2) 

644 # Associate those into a new collection, then look for them there. 

645 tag1 = "tag1" 

646 registry.registerCollection(tag1, type=CollectionType.TAGGED, doc="doc for tag1") 

647 # Check that we can query for old and new collections by type. 

648 self.assertEqual(set(registry.queryCollections(collectionTypes=CollectionType.RUN)), {run1, run2}) 

649 self.assertEqual( 

650 set(registry.queryCollections(collectionTypes={CollectionType.TAGGED, CollectionType.RUN})), 

651 {tag1, run1, run2}, 

652 ) 

653 self.assertEqual(registry.getCollectionDocumentation(tag1), "doc for tag1") 

654 registry.associate(tag1, [ref1, ref2]) 

655 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1) 

656 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

657 # Disassociate one and verify that we can't it there anymore... 

658 registry.disassociate(tag1, [ref1]) 

659 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=tag1)) 

660 # ...but we can still find ref2 in tag1, and ref1 in the run. 

661 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run1), ref1) 

662 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

663 collections = set(registry.queryCollections()) 

664 self.assertEqual(collections, {run1, run2, tag1}) 

665 # Associate both refs into tag1 again; ref2 is already there, but that 

666 # should be a harmless no-op. 

667 registry.associate(tag1, [ref1, ref2]) 

668 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1) 

669 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

670 # Get a different dataset (from a different run) that has the same 

671 # dataset type and data ID as ref2. 

672 ref2b = registry.findDataset(datasetType, dataId2, collections=run2) 

673 self.assertNotEqual(ref2, ref2b) 

674 # Attempting to associate that into tag1 should be an error. 

675 with self.assertRaises(ConflictingDefinitionError): 

676 registry.associate(tag1, [ref2b]) 

677 # That error shouldn't have messed up what we had before. 

678 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1) 

679 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

680 # Attempt to associate the conflicting dataset again, this time with 

681 # a dataset that isn't in the collection and won't cause a conflict. 

682 # Should also fail without modifying anything. 

683 dataId3 = {"instrument": "Cam1", "detector": 3} 

684 ref3 = registry.findDataset(datasetType, dataId3, collections=run1) 

685 with self.assertRaises(ConflictingDefinitionError): 

686 registry.associate(tag1, [ref3, ref2b]) 

687 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1) 

688 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

689 self.assertIsNone(registry.findDataset(datasetType, dataId3, collections=tag1)) 

690 # Register a chained collection that searches [tag1, run2] 

691 chain1 = "chain1" 

692 registry.registerCollection(chain1, type=CollectionType.CHAINED) 

693 self.assertIs(registry.getCollectionType(chain1), CollectionType.CHAINED) 

694 # Chained collection exists, but has no collections in it. 

695 self.assertFalse(registry.getCollectionChain(chain1)) 

696 # If we query for all collections, we should get the chained collection 

697 # only if we don't ask to flatten it (i.e. yield only its children). 

698 self.assertEqual(set(registry.queryCollections(flattenChains=False)), {tag1, run1, run2, chain1}) 

699 self.assertEqual(set(registry.queryCollections(flattenChains=True)), {tag1, run1, run2}) 

700 # Attempt to set its child collections to something circular; that 

701 # should fail. 

702 with self.assertRaises(ValueError): 

703 registry.setCollectionChain(chain1, [tag1, chain1]) 

704 # Add the child collections. 

705 registry.setCollectionChain(chain1, [tag1, run2]) 

706 self.assertEqual(list(registry.getCollectionChain(chain1)), [tag1, run2]) 

707 self.assertEqual(registry.getCollectionParentChains(tag1), {chain1}) 

708 self.assertEqual(registry.getCollectionParentChains(run2), {chain1}) 

709 # Refresh the other registry that points to the same repo, and make 

710 # sure it can see the things we've done (note that this does require 

711 # an explicit refresh(); that's the documented behavior, because 

712 # caching is ~impossible otherwise). 

713 if other_registry is not None: 

714 other_registry.refresh() 

715 self.assertEqual(list(other_registry.getCollectionChain(chain1)), [tag1, run2]) 

716 self.assertEqual(other_registry.getCollectionParentChains(tag1), {chain1}) 

717 self.assertEqual(other_registry.getCollectionParentChains(run2), {chain1}) 

718 # Searching for dataId1 or dataId2 in the chain should return ref1 and 

719 # ref2, because both are in tag1. 

720 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain1), ref1) 

721 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain1), ref2) 

722 # Now disassociate ref2 from tag1. The search (for bias) with 

723 # dataId2 in chain1 should then: 

724 # 1. not find it in tag1 

725 # 2. find a different dataset in run2 

726 registry.disassociate(tag1, [ref2]) 

727 ref2b = registry.findDataset(datasetType, dataId2, collections=chain1) 

728 self.assertNotEqual(ref2b, ref2) 

729 self.assertEqual(ref2b, registry.findDataset(datasetType, dataId2, collections=run2)) 

730 # Define a new chain so we can test recursive chains. 

731 chain2 = "chain2" 

732 registry.registerCollection(chain2, type=CollectionType.CHAINED) 

733 registry.setCollectionChain(chain2, [run2, chain1]) 

734 self.assertEqual(registry.getCollectionParentChains(chain1), {chain2}) 

735 self.assertEqual(registry.getCollectionParentChains(run2), {chain1, chain2}) 

736 # Query for collections matching a regex. 

737 self.assertCountEqual( 

738 list(registry.queryCollections(re.compile("imported_."), flattenChains=False)), 

739 ["imported_r", "imported_g"], 

740 ) 

741 # Query for collections matching a regex or an explicit str. 

742 self.assertCountEqual( 

743 list(registry.queryCollections([re.compile("imported_."), "chain1"], flattenChains=False)), 

744 ["imported_r", "imported_g", "chain1"], 

745 ) 

746 # Search for bias with dataId1 should find it via tag1 in chain2, 

747 # recursing, because is not in run1. 

748 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=run2)) 

749 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain2), ref1) 

750 # Search for bias with dataId2 should find it in run2 (ref2b). 

751 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain2), ref2b) 

752 # Search for a flat that is in run2. That should not be found 

753 # at the front of chain2, because of the restriction to bias 

754 # on run2 there, but it should be found in at the end of chain1. 

755 dataId4 = {"instrument": "Cam1", "detector": 3, "physical_filter": "Cam1-R2"} 

756 ref4 = registry.findDataset("flat", dataId4, collections=run2) 

757 self.assertIsNotNone(ref4) 

758 self.assertEqual(ref4, registry.findDataset("flat", dataId4, collections=chain2)) 

759 # Deleting a collection that's part of a CHAINED collection is not 

760 # allowed, and is exception-safe. 

761 with self.assertRaises(sqlalchemy.exc.IntegrityError): 

762 registry.removeCollection(run2) 

763 self.assertEqual(registry.getCollectionType(run2), CollectionType.RUN) 

764 with self.assertRaises(sqlalchemy.exc.IntegrityError): 

765 registry.removeCollection(chain1) 

766 self.assertEqual(registry.getCollectionType(chain1), CollectionType.CHAINED) 

767 # Actually remove chain2, test that it's gone by asking for its type. 

768 registry.removeCollection(chain2) 

769 with self.assertRaises(MissingCollectionError): 

770 registry.getCollectionType(chain2) 

771 # Actually remove run2 and chain1, which should work now. 

772 registry.removeCollection(chain1) 

773 registry.removeCollection(run2) 

774 with self.assertRaises(MissingCollectionError): 

775 registry.getCollectionType(run2) 

776 with self.assertRaises(MissingCollectionError): 

777 registry.getCollectionType(chain1) 

778 # Remove tag1 as well, just to test that we can remove TAGGED 

779 # collections. 

780 registry.removeCollection(tag1) 

781 with self.assertRaises(MissingCollectionError): 

782 registry.getCollectionType(tag1) 

783 

784 def testCollectionChainFlatten(self): 

785 """Test that `SqlRegistry.setCollectionChain` obeys its 'flatten' 

786 option. 

787 """ 

788 registry = self.makeRegistry() 

789 registry.registerCollection("inner", CollectionType.CHAINED) 

790 registry.registerCollection("innermost", CollectionType.RUN) 

791 registry.setCollectionChain("inner", ["innermost"]) 

792 registry.registerCollection("outer", CollectionType.CHAINED) 

793 registry.setCollectionChain("outer", ["inner"], flatten=False) 

794 self.assertEqual(list(registry.getCollectionChain("outer")), ["inner"]) 

795 registry.setCollectionChain("outer", ["inner"], flatten=True) 

796 self.assertEqual(list(registry.getCollectionChain("outer")), ["innermost"]) 

797 

798 def testBasicTransaction(self): 

799 """Test that all operations within a single transaction block are 

800 rolled back if an exception propagates out of the block. 

801 """ 

802 registry = self.makeRegistry() 

803 storageClass = StorageClass("testDatasetType") 

804 registry.storageClasses.registerStorageClass(storageClass) 

805 with registry.transaction(): 

806 registry.insertDimensionData("instrument", {"name": "Cam1", "class_name": "A"}) 

807 with self.assertRaises(ValueError): 

808 with registry.transaction(): 

809 registry.insertDimensionData("instrument", {"name": "Cam2"}) 

810 raise ValueError("Oops, something went wrong") 

811 # Cam1 should exist 

812 self.assertEqual(registry.expandDataId(instrument="Cam1").records["instrument"].class_name, "A") 

813 # But Cam2 and Cam3 should both not exist 

814 with self.assertRaises(DataIdValueError): 

815 registry.expandDataId(instrument="Cam2") 

816 with self.assertRaises(DataIdValueError): 

817 registry.expandDataId(instrument="Cam3") 

818 

819 def testNestedTransaction(self): 

820 """Test that operations within a transaction block are not rolled back 

821 if an exception propagates out of an inner transaction block and is 

822 then caught. 

823 """ 

824 registry = self.makeRegistry() 

825 dimension = registry.dimensions["instrument"] 

826 dataId1 = {"instrument": "DummyCam"} 

827 dataId2 = {"instrument": "DummyCam2"} 

828 checkpointReached = False 

829 with registry.transaction(): 

830 # This should be added and (ultimately) committed. 

831 registry.insertDimensionData(dimension, dataId1) 

832 with self.assertRaises(sqlalchemy.exc.IntegrityError): 

833 with registry.transaction(savepoint=True): 

834 # This does not conflict, and should succeed (but not 

835 # be committed). 

836 registry.insertDimensionData(dimension, dataId2) 

837 checkpointReached = True 

838 # This should conflict and raise, triggerring a rollback 

839 # of the previous insertion within the same transaction 

840 # context, but not the original insertion in the outer 

841 # block. 

842 registry.insertDimensionData(dimension, dataId1) 

843 self.assertTrue(checkpointReached) 

844 self.assertIsNotNone(registry.expandDataId(dataId1, dimensions=dimension.minimal_group)) 

845 with self.assertRaises(DataIdValueError): 

846 registry.expandDataId(dataId2, dimensions=dimension.minimal_group) 

847 

848 def testInstrumentDimensions(self): 

849 """Test queries involving only instrument dimensions, with no joins to 

850 skymap. 

851 """ 

852 registry = self.makeRegistry() 

853 

854 # need a bunch of dimensions and datasets for test 

855 registry.insertDimensionData( 

856 "instrument", dict(name="DummyCam", visit_max=25, exposure_max=300, detector_max=6) 

857 ) 

858 registry.insertDimensionData( 

859 "physical_filter", 

860 dict(instrument="DummyCam", name="dummy_r", band="r"), 

861 dict(instrument="DummyCam", name="dummy_i", band="i"), 

862 ) 

863 registry.insertDimensionData( 

864 "detector", *[dict(instrument="DummyCam", id=i, full_name=str(i)) for i in range(1, 6)] 

865 ) 

866 registry.insertDimensionData( 

867 "visit", 

868 dict(instrument="DummyCam", id=10, name="ten", physical_filter="dummy_i"), 

869 dict(instrument="DummyCam", id=11, name="eleven", physical_filter="dummy_r"), 

870 dict(instrument="DummyCam", id=20, name="twelve", physical_filter="dummy_r"), 

871 ) 

872 for i in range(1, 6): 

873 registry.insertDimensionData( 

874 "visit_detector_region", 

875 dict(instrument="DummyCam", visit=10, detector=i), 

876 dict(instrument="DummyCam", visit=11, detector=i), 

877 dict(instrument="DummyCam", visit=20, detector=i), 

878 ) 

879 registry.insertDimensionData( 

880 "exposure", 

881 dict(instrument="DummyCam", id=100, obs_id="100", physical_filter="dummy_i"), 

882 dict(instrument="DummyCam", id=101, obs_id="101", physical_filter="dummy_i"), 

883 dict(instrument="DummyCam", id=110, obs_id="110", physical_filter="dummy_r"), 

884 dict(instrument="DummyCam", id=111, obs_id="111", physical_filter="dummy_r"), 

885 dict(instrument="DummyCam", id=200, obs_id="200", physical_filter="dummy_r"), 

886 dict(instrument="DummyCam", id=201, obs_id="201", physical_filter="dummy_r"), 

887 ) 

888 registry.insertDimensionData( 

889 "visit_definition", 

890 dict(instrument="DummyCam", exposure=100, visit=10), 

891 dict(instrument="DummyCam", exposure=101, visit=10), 

892 dict(instrument="DummyCam", exposure=110, visit=11), 

893 dict(instrument="DummyCam", exposure=111, visit=11), 

894 dict(instrument="DummyCam", exposure=200, visit=20), 

895 dict(instrument="DummyCam", exposure=201, visit=20), 

896 ) 

897 # dataset types 

898 run1 = "test1_r" 

899 run2 = "test2_r" 

900 tagged2 = "test2_t" 

901 registry.registerRun(run1) 

902 registry.registerRun(run2) 

903 registry.registerCollection(tagged2) 

904 storageClass = StorageClass("testDataset") 

905 registry.storageClasses.registerStorageClass(storageClass) 

906 rawType = DatasetType( 

907 name="RAW", 

908 dimensions=registry.dimensions.conform(("instrument", "exposure", "detector")), 

909 storageClass=storageClass, 

910 ) 

911 registry.registerDatasetType(rawType) 

912 calexpType = DatasetType( 

913 name="CALEXP", 

914 dimensions=registry.dimensions.conform(("instrument", "visit", "detector")), 

915 storageClass=storageClass, 

916 ) 

917 registry.registerDatasetType(calexpType) 

918 

919 # add pre-existing datasets 

920 for exposure in (100, 101, 110, 111): 

921 for detector in (1, 2, 3): 

922 # note that only 3 of 5 detectors have datasets 

923 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector) 

924 (ref,) = registry.insertDatasets(rawType, dataIds=[dataId], run=run1) 

925 # exposures 100 and 101 appear in both run1 and tagged2. 

926 # 100 has different datasets in the different collections 

927 # 101 has the same dataset in both collections. 

928 if exposure == 100: 

929 (ref,) = registry.insertDatasets(rawType, dataIds=[dataId], run=run2) 

930 if exposure in (100, 101): 

931 registry.associate(tagged2, [ref]) 

932 # Add pre-existing datasets to tagged2. 

933 for exposure in (200, 201): 

934 for detector in (3, 4, 5): 

935 # note that only 3 of 5 detectors have datasets 

936 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector) 

937 (ref,) = registry.insertDatasets(rawType, dataIds=[dataId], run=run2) 

938 registry.associate(tagged2, [ref]) 

939 

940 dimensions = registry.dimensions.conform( 

941 rawType.dimensions.required.names | calexpType.dimensions.required.names 

942 ) 

943 # Test that single dim string works as well as list of str 

944 rows = registry.queryDataIds("visit", datasets=rawType, collections=run1).expanded().toSet() 

945 rowsI = registry.queryDataIds(["visit"], datasets=rawType, collections=run1).expanded().toSet() 

946 self.assertEqual(rows, rowsI) 

947 # with empty expression 

948 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1).expanded().toSet() 

949 self.assertEqual(len(rows), 4 * 3) # 4 exposures times 3 detectors 

950 self.assertCountEqual({dataId["exposure"] for dataId in rows}, (100, 101, 110, 111)) 

951 self.assertCountEqual({dataId["visit"] for dataId in rows}, (10, 11)) 

952 self.assertCountEqual({dataId["detector"] for dataId in rows}, (1, 2, 3)) 

953 

954 # second collection 

955 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=tagged2).toSet() 

956 self.assertEqual(len(rows), 4 * 3) # 4 exposures times 3 detectors 

957 for dataId in rows: 

958 self.assertCountEqual(dataId.dimensions.required, ("instrument", "detector", "exposure", "visit")) 

959 self.assertCountEqual({dataId["exposure"] for dataId in rows}, (100, 101, 200, 201)) 

960 self.assertCountEqual({dataId["visit"] for dataId in rows}, (10, 20)) 

961 self.assertCountEqual({dataId["detector"] for dataId in rows}, (1, 2, 3, 4, 5)) 

962 

963 # with two input datasets 

964 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=[run1, tagged2]).toSet() 

965 self.assertEqual(len(set(rows)), 6 * 3) # 6 exposures times 3 detectors; set needed to de-dupe 

966 for dataId in rows: 

967 self.assertCountEqual(dataId.dimensions.required, ("instrument", "detector", "exposure", "visit")) 

968 self.assertCountEqual({dataId["exposure"] for dataId in rows}, (100, 101, 110, 111, 200, 201)) 

969 self.assertCountEqual({dataId["visit"] for dataId in rows}, (10, 11, 20)) 

970 self.assertCountEqual({dataId["detector"] for dataId in rows}, (1, 2, 3, 4, 5)) 

971 

972 # limit to single visit 

973 rows = registry.queryDataIds( 

974 dimensions, datasets=rawType, collections=run1, where="visit = 10", instrument="DummyCam" 

975 ).toSet() 

976 self.assertEqual(len(rows), 2 * 3) # 2 exposures times 3 detectors 

977 self.assertCountEqual({dataId["exposure"] for dataId in rows}, (100, 101)) 

978 self.assertCountEqual({dataId["visit"] for dataId in rows}, (10,)) 

979 self.assertCountEqual({dataId["detector"] for dataId in rows}, (1, 2, 3)) 

980 

981 # more limiting expression, using link names instead of Table.column 

982 rows = registry.queryDataIds( 

983 dimensions, 

984 datasets=rawType, 

985 collections=run1, 

986 where="visit = 10 and detector > 1 and 'DummyCam'=instrument", 

987 ).toSet() 

988 self.assertEqual(len(rows), 2 * 2) # 2 exposures times 2 detectors 

989 self.assertCountEqual({dataId["exposure"] for dataId in rows}, (100, 101)) 

990 self.assertCountEqual({dataId["visit"] for dataId in rows}, (10,)) 

991 self.assertCountEqual({dataId["detector"] for dataId in rows}, (2, 3)) 

992 

993 # queryDataIds with only one of `datasets` and `collections` is an 

994 # error. 

995 with self.assertRaises(CollectionError): 

996 registry.queryDataIds(dimensions, datasets=rawType) 

997 with self.assertRaises(ArgumentError): 

998 registry.queryDataIds(dimensions, collections=run1) 

999 

1000 # expression excludes everything 

1001 rows = registry.queryDataIds( 

1002 dimensions, datasets=rawType, collections=run1, where="visit > 1000", instrument="DummyCam" 

1003 ).toSet() 

1004 self.assertEqual(len(rows), 0) 

1005 

1006 # Selecting by physical_filter, this is not in the dimensions, but it 

1007 # is a part of the full expression so it should work too. 

1008 rows = registry.queryDataIds( 

1009 dimensions, 

1010 datasets=rawType, 

1011 collections=run1, 

1012 where="physical_filter = 'dummy_r'", 

1013 instrument="DummyCam", 

1014 ).toSet() 

1015 self.assertEqual(len(rows), 2 * 3) # 2 exposures times 3 detectors 

1016 self.assertCountEqual({dataId["exposure"] for dataId in rows}, (110, 111)) 

1017 self.assertCountEqual({dataId["visit"] for dataId in rows}, (11,)) 

1018 self.assertCountEqual({dataId["detector"] for dataId in rows}, (1, 2, 3)) 

1019 

1020 def testSkyMapDimensions(self): 

1021 """Tests involving only skymap dimensions, no joins to instrument.""" 

1022 registry = self.makeRegistry() 

1023 

1024 # need a bunch of dimensions and datasets for test, we want 

1025 # "band" in the test so also have to add physical_filter 

1026 # dimensions 

1027 registry.insertDimensionData("instrument", dict(instrument="DummyCam")) 

1028 registry.insertDimensionData( 

1029 "physical_filter", 

1030 dict(instrument="DummyCam", name="dummy_r", band="r"), 

1031 dict(instrument="DummyCam", name="dummy_i", band="i"), 

1032 ) 

1033 registry.insertDimensionData("skymap", dict(name="DummyMap", hash=b"sha!")) 

1034 for tract in range(10): 

1035 registry.insertDimensionData("tract", dict(skymap="DummyMap", id=tract)) 

1036 registry.insertDimensionData( 

1037 "patch", 

1038 *[dict(skymap="DummyMap", tract=tract, id=patch, cell_x=0, cell_y=0) for patch in range(10)], 

1039 ) 

1040 

1041 # dataset types 

1042 run = "tésτ" 

1043 registry.registerRun(run) 

1044 storageClass = StorageClass("testDataset") 

1045 registry.storageClasses.registerStorageClass(storageClass) 

1046 calexpType = DatasetType( 

1047 name="deepCoadd_calexp", 

1048 dimensions=registry.dimensions.conform(("skymap", "tract", "patch", "band")), 

1049 storageClass=storageClass, 

1050 ) 

1051 registry.registerDatasetType(calexpType) 

1052 mergeType = DatasetType( 

1053 name="deepCoadd_mergeDet", 

1054 dimensions=registry.dimensions.conform(("skymap", "tract", "patch")), 

1055 storageClass=storageClass, 

1056 ) 

1057 registry.registerDatasetType(mergeType) 

1058 measType = DatasetType( 

1059 name="deepCoadd_meas", 

1060 dimensions=registry.dimensions.conform(("skymap", "tract", "patch", "band")), 

1061 storageClass=storageClass, 

1062 ) 

1063 registry.registerDatasetType(measType) 

1064 

1065 dimensions = registry.dimensions.conform( 

1066 calexpType.dimensions.required.names 

1067 | mergeType.dimensions.required.names 

1068 | measType.dimensions.required.names 

1069 ) 

1070 

1071 # add pre-existing datasets 

1072 for tract in (1, 3, 5): 

1073 for patch in (2, 4, 6, 7): 

1074 dataId = dict(skymap="DummyMap", tract=tract, patch=patch) 

1075 registry.insertDatasets(mergeType, dataIds=[dataId], run=run) 

1076 for aFilter in ("i", "r"): 

1077 dataId = dict(skymap="DummyMap", tract=tract, patch=patch, band=aFilter) 

1078 registry.insertDatasets(calexpType, dataIds=[dataId], run=run) 

1079 

1080 # with empty expression 

1081 rows = registry.queryDataIds(dimensions, datasets=[calexpType, mergeType], collections=run).toSet() 

1082 self.assertEqual(len(rows), 3 * 4 * 2) # 4 tracts x 4 patches x 2 filters 

1083 for dataId in rows: 

1084 self.assertCountEqual(dataId.dimensions.required, ("skymap", "tract", "patch", "band")) 

1085 self.assertCountEqual({dataId["tract"] for dataId in rows}, (1, 3, 5)) 

1086 self.assertCountEqual({dataId["patch"] for dataId in rows}, (2, 4, 6, 7)) 

1087 self.assertCountEqual({dataId["band"] for dataId in rows}, ("i", "r")) 

1088 

1089 # limit to 2 tracts and 2 patches 

1090 rows = registry.queryDataIds( 

1091 dimensions, 

1092 datasets=[calexpType, mergeType], 

1093 collections=run, 

1094 where="tract IN (1, 5) AND patch IN (2, 7)", 

1095 skymap="DummyMap", 

1096 ).toSet() 

1097 self.assertEqual(len(rows), 2 * 2 * 2) # 2 tracts x 2 patches x 2 filters 

1098 self.assertCountEqual({dataId["tract"] for dataId in rows}, (1, 5)) 

1099 self.assertCountEqual({dataId["patch"] for dataId in rows}, (2, 7)) 

1100 self.assertCountEqual({dataId["band"] for dataId in rows}, ("i", "r")) 

1101 

1102 # limit to single filter 

1103 rows = registry.queryDataIds( 

1104 dimensions, datasets=[calexpType, mergeType], collections=run, where="band = 'i'" 

1105 ).toSet() 

1106 self.assertEqual(len(rows), 3 * 4 * 1) # 4 tracts x 4 patches x 2 filters 

1107 self.assertCountEqual({dataId["tract"] for dataId in rows}, (1, 3, 5)) 

1108 self.assertCountEqual({dataId["patch"] for dataId in rows}, (2, 4, 6, 7)) 

1109 self.assertCountEqual({dataId["band"] for dataId in rows}, ("i",)) 

1110 

1111 # Specifying non-existing skymap is an exception 

1112 with self.assertRaisesRegex(DataIdValueError, "Unknown values specified for governor dimension"): 

1113 rows = registry.queryDataIds( 

1114 dimensions, datasets=[calexpType, mergeType], collections=run, where="skymap = 'Mars'" 

1115 ).toSet() 

1116 

1117 def testSpatialJoin(self): 

1118 """Test queries that involve spatial overlap joins.""" 

1119 registry = self.makeRegistry() 

1120 self.loadData(registry, "hsc-rc2-subset.yaml") 

1121 

1122 # Dictionary of spatial DatabaseDimensionElements, keyed by the name of 

1123 # the TopologicalFamily they belong to. We'll relate all elements in 

1124 # each family to all of the elements in each other family. 

1125 families = defaultdict(set) 

1126 # Dictionary of {element.name: {dataId: region}}. 

1127 regions = {} 

1128 for element in registry.dimensions.database_elements: 

1129 if element.spatial is not None: 

1130 families[element.spatial.name].add(element) 

1131 regions[element.name] = { 

1132 record.dataId: record.region for record in registry.queryDimensionRecords(element) 

1133 } 

1134 

1135 # If this check fails, it's not necessarily a problem - it may just be 

1136 # a reasonable change to the default dimension definitions - but the 

1137 # test below depends on there being more than one family to do anything 

1138 # useful. 

1139 self.assertEqual(len(families), 2) 

1140 

1141 # Overlap DatabaseDimensionElements with each other. 

1142 for family1, family2 in itertools.combinations(families, 2): 

1143 for element1, element2 in itertools.product(families[family1], families[family2]): 

1144 dimensions = element1.minimal_group | element2.minimal_group 

1145 # Construct expected set of overlapping data IDs via a 

1146 # brute-force comparison of the regions we've already fetched. 

1147 expected = { 

1148 DataCoordinate.standardize( 

1149 {**dataId1.required, **dataId2.required}, dimensions=dimensions 

1150 ) 

1151 for (dataId1, region1), (dataId2, region2) in itertools.product( 

1152 regions[element1.name].items(), regions[element2.name].items() 

1153 ) 

1154 if not region1.isDisjointFrom(region2) 

1155 } 

1156 self.assertGreater(len(expected), 2, msg="Test that we aren't just comparing empty sets.") 

1157 queried = set(registry.queryDataIds(dimensions)) 

1158 self.assertEqual(expected, queried) 

1159 

1160 # Overlap each DatabaseDimensionElement with the commonSkyPix system. 

1161 commonSkyPix = registry.dimensions.commonSkyPix 

1162 for elementName, these_regions in regions.items(): 

1163 dimensions = registry.dimensions[elementName].minimal_group | commonSkyPix.minimal_group 

1164 expected = set() 

1165 for dataId, region in these_regions.items(): 

1166 for begin, end in commonSkyPix.pixelization.envelope(region): 

1167 expected.update( 

1168 DataCoordinate.standardize( 

1169 {commonSkyPix.name: index, **dataId.required}, dimensions=dimensions 

1170 ) 

1171 for index in range(begin, end) 

1172 ) 

1173 self.assertGreater(len(expected), 2, msg="Test that we aren't just comparing empty sets.") 

1174 queried = set(registry.queryDataIds(dimensions)) 

1175 self.assertEqual(expected, queried) 

1176 

1177 def testAbstractQuery(self): 

1178 """Test that we can run a query that just lists the known 

1179 bands. This is tricky because band is 

1180 backed by a query against physical_filter. 

1181 """ 

1182 registry = self.makeRegistry() 

1183 registry.insertDimensionData("instrument", dict(name="DummyCam")) 

1184 registry.insertDimensionData( 

1185 "physical_filter", 

1186 dict(instrument="DummyCam", name="dummy_i", band="i"), 

1187 dict(instrument="DummyCam", name="dummy_i2", band="i"), 

1188 dict(instrument="DummyCam", name="dummy_r", band="r"), 

1189 ) 

1190 rows = registry.queryDataIds(["band"]).toSet() 

1191 self.assertCountEqual( 

1192 rows, 

1193 [ 

1194 DataCoordinate.standardize(band="i", universe=registry.dimensions), 

1195 DataCoordinate.standardize(band="r", universe=registry.dimensions), 

1196 ], 

1197 ) 

1198 

1199 def testAttributeManager(self): 

1200 """Test basic functionality of attribute manager.""" 

1201 # number of attributes with schema versions in a fresh database, 

1202 # 6 managers with 2 records per manager, plus config for dimensions 

1203 VERSION_COUNT = 6 * 2 + 1 

1204 

1205 registry = self.makeRegistry() 

1206 attributes = registry._managers.attributes 

1207 

1208 # check what get() returns for non-existing key 

1209 self.assertIsNone(attributes.get("attr")) 

1210 self.assertEqual(attributes.get("attr", ""), "") 

1211 self.assertEqual(attributes.get("attr", "Value"), "Value") 

1212 self.assertEqual(len(list(attributes.items())), VERSION_COUNT) 

1213 

1214 # cannot store empty key or value 

1215 with self.assertRaises(ValueError): 

1216 attributes.set("", "value") 

1217 with self.assertRaises(ValueError): 

1218 attributes.set("attr", "") 

1219 

1220 # set value of non-existing key 

1221 attributes.set("attr", "value") 

1222 self.assertEqual(len(list(attributes.items())), VERSION_COUNT + 1) 

1223 self.assertEqual(attributes.get("attr"), "value") 

1224 

1225 # update value of existing key 

1226 with self.assertRaises(ButlerAttributeExistsError): 

1227 attributes.set("attr", "value2") 

1228 

1229 attributes.set("attr", "value2", force=True) 

1230 self.assertEqual(len(list(attributes.items())), VERSION_COUNT + 1) 

1231 self.assertEqual(attributes.get("attr"), "value2") 

1232 

1233 # delete existing key 

1234 self.assertTrue(attributes.delete("attr")) 

1235 self.assertEqual(len(list(attributes.items())), VERSION_COUNT) 

1236 

1237 # delete non-existing key 

1238 self.assertFalse(attributes.delete("non-attr")) 

1239 

1240 # store bunch of keys and get the list back 

1241 data = [ 

1242 ("version.core", "1.2.3"), 

1243 ("version.dimensions", "3.2.1"), 

1244 ("config.managers.opaque", "ByNameOpaqueTableStorageManager"), 

1245 ] 

1246 for key, value in data: 

1247 attributes.set(key, value) 

1248 items = dict(attributes.items()) 

1249 for key, value in data: 

1250 self.assertEqual(items[key], value) 

1251 

1252 def testQueryDatasetsDeduplication(self): 

1253 """Test that the findFirst option to queryDatasets selects datasets 

1254 from collections in the order given". 

1255 """ 

1256 registry = self.makeRegistry() 

1257 self.loadData(registry, "base.yaml") 

1258 self.loadData(registry, "datasets.yaml") 

1259 self.assertCountEqual( 

1260 list(registry.queryDatasets("bias", collections=["imported_g", "imported_r"])), 

1261 [ 

1262 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"), 

1263 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g"), 

1264 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g"), 

1265 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r"), 

1266 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r"), 

1267 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

1268 ], 

1269 ) 

1270 self.assertCountEqual( 

1271 list(registry.queryDatasets("bias", collections=["imported_g", "imported_r"], findFirst=True)), 

1272 [ 

1273 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"), 

1274 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g"), 

1275 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g"), 

1276 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

1277 ], 

1278 ) 

1279 self.assertCountEqual( 

1280 list(registry.queryDatasets("bias", collections=["imported_r", "imported_g"], findFirst=True)), 

1281 [ 

1282 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"), 

1283 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r"), 

1284 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r"), 

1285 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

1286 ], 

1287 ) 

1288 

1289 def testQueryResults(self): 

1290 """Test querying for data IDs and then manipulating the QueryResults 

1291 object returned to perform other queries. 

1292 """ 

1293 registry = self.makeRegistry() 

1294 self.loadData(registry, "base.yaml") 

1295 self.loadData(registry, "datasets.yaml") 

1296 bias = registry.getDatasetType("bias") 

1297 flat = registry.getDatasetType("flat") 

1298 # Obtain expected results from methods other than those we're testing 

1299 # here. That includes: 

1300 # - the dimensions of the data IDs we want to query: 

1301 expected_dimensions = registry.dimensions.conform(["detector", "physical_filter"]) 

1302 # - the dimensions of some other data IDs we'll extract from that: 

1303 expected_subset_dimensions = registry.dimensions.conform(["detector"]) 

1304 # - the data IDs we expect to obtain from the first queries: 

1305 expectedDataIds = DataCoordinateSet( 

1306 { 

1307 DataCoordinate.standardize( 

1308 instrument="Cam1", detector=d, physical_filter=p, universe=registry.dimensions 

1309 ) 

1310 for d, p in itertools.product({1, 2, 3}, {"Cam1-G", "Cam1-R1", "Cam1-R2"}) 

1311 }, 

1312 dimensions=expected_dimensions, 

1313 hasFull=False, 

1314 hasRecords=False, 

1315 ) 

1316 # - the flat datasets we expect to find from those data IDs, in just 

1317 # one collection (so deduplication is irrelevant): 

1318 expectedFlats = [ 

1319 registry.findDataset( 

1320 flat, instrument="Cam1", detector=1, physical_filter="Cam1-R1", collections="imported_r" 

1321 ), 

1322 registry.findDataset( 

1323 flat, instrument="Cam1", detector=2, physical_filter="Cam1-R1", collections="imported_r" 

1324 ), 

1325 registry.findDataset( 

1326 flat, instrument="Cam1", detector=3, physical_filter="Cam1-R2", collections="imported_r" 

1327 ), 

1328 ] 

1329 # - the data IDs we expect to extract from that: 

1330 expectedSubsetDataIds = expectedDataIds.subset(expected_subset_dimensions) 

1331 # - the bias datasets we expect to find from those data IDs, after we 

1332 # subset-out the physical_filter dimension, both with duplicates: 

1333 expectedAllBiases = [ 

1334 registry.findDataset(bias, instrument="Cam1", detector=1, collections="imported_g"), 

1335 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_g"), 

1336 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_g"), 

1337 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_r"), 

1338 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_r"), 

1339 ] 

1340 # - ...and without duplicates: 

1341 expectedDeduplicatedBiases = [ 

1342 registry.findDataset(bias, instrument="Cam1", detector=1, collections="imported_g"), 

1343 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_r"), 

1344 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_r"), 

1345 ] 

1346 # Test against those expected results, using a "lazy" query for the 

1347 # data IDs (which re-executes that query each time we use it to do 

1348 # something new). 

1349 dataIds = registry.queryDataIds( 

1350 ["detector", "physical_filter"], 

1351 where="detector.purpose = 'SCIENCE'", # this rejects detector=4 

1352 instrument="Cam1", 

1353 ) 

1354 self.assertEqual(dataIds.dimensions, expected_dimensions) 

1355 self.assertEqual(dataIds.toSet(), expectedDataIds) 

1356 self.assertCountEqual( 

1357 list( 

1358 dataIds.findDatasets( 

1359 flat, 

1360 collections=["imported_r"], 

1361 ) 

1362 ), 

1363 expectedFlats, 

1364 ) 

1365 subsetDataIds = dataIds.subset(expected_subset_dimensions, unique=True) 

1366 self.assertEqual(subsetDataIds.dimensions, expected_subset_dimensions) 

1367 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds) 

1368 self.assertCountEqual( 

1369 list(subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=False)), 

1370 expectedAllBiases, 

1371 ) 

1372 self.assertCountEqual( 

1373 list(subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=True)), 

1374 expectedDeduplicatedBiases, 

1375 ) 

1376 

1377 # Searching for a dataset with dimensions we had projected away 

1378 # restores those dimensions. 

1379 self.assertCountEqual( 

1380 list(subsetDataIds.findDatasets("flat", collections=["imported_r"], findFirst=True)), 

1381 expectedFlats, 

1382 ) 

1383 

1384 # Use a named dataset type that does not exist and a dataset type 

1385 # object that does not exist. 

1386 unknown_type = DatasetType("not_known", dimensions=bias.dimensions, storageClass="Exposure") 

1387 

1388 # Test both string name and dataset type object. 

1389 test_type: str | DatasetType 

1390 for test_type, test_type_name in ( 

1391 (unknown_type, unknown_type.name), 

1392 (unknown_type.name, unknown_type.name), 

1393 ): 

1394 with self.assertRaisesRegex(DatasetTypeError, expected_regex=test_type_name): 

1395 list( 

1396 subsetDataIds.findDatasets( 

1397 test_type, collections=["imported_r", "imported_g"], findFirst=True 

1398 ) 

1399 ) 

1400 

1401 # Materialize the bias dataset queries (only) by putting the results 

1402 # into temporary tables, then repeat those tests. 

1403 with subsetDataIds.findDatasets( 

1404 bias, collections=["imported_r", "imported_g"], findFirst=False 

1405 ).materialize() as biases: 

1406 self.assertCountEqual(list(biases), expectedAllBiases) 

1407 with subsetDataIds.findDatasets( 

1408 bias, collections=["imported_r", "imported_g"], findFirst=True 

1409 ).materialize() as biases: 

1410 self.assertCountEqual(list(biases), expectedDeduplicatedBiases) 

1411 # Materialize the data ID subset query, but not the dataset queries. 

1412 with subsetDataIds.materialize() as subsetDataIds: 

1413 self.assertEqual(subsetDataIds.dimensions, expected_subset_dimensions) 

1414 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds) 

1415 self.assertCountEqual( 

1416 list( 

1417 subsetDataIds.findDatasets( 

1418 bias, collections=["imported_r", "imported_g"], findFirst=False 

1419 ) 

1420 ), 

1421 expectedAllBiases, 

1422 ) 

1423 self.assertCountEqual( 

1424 list( 

1425 subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=True) 

1426 ), 

1427 expectedDeduplicatedBiases, 

1428 ) 

1429 # Materialize the dataset queries, too. 

1430 with subsetDataIds.findDatasets( 

1431 bias, collections=["imported_r", "imported_g"], findFirst=False 

1432 ).materialize() as biases: 

1433 self.assertCountEqual(list(biases), expectedAllBiases) 

1434 with subsetDataIds.findDatasets( 

1435 bias, collections=["imported_r", "imported_g"], findFirst=True 

1436 ).materialize() as biases: 

1437 self.assertCountEqual(list(biases), expectedDeduplicatedBiases) 

1438 # Materialize the original query, but none of the follow-up queries. 

1439 with dataIds.materialize() as dataIds: 

1440 self.assertEqual(dataIds.dimensions, expected_dimensions) 

1441 self.assertEqual(dataIds.toSet(), expectedDataIds) 

1442 self.assertCountEqual( 

1443 list( 

1444 dataIds.findDatasets( 

1445 flat, 

1446 collections=["imported_r"], 

1447 ) 

1448 ), 

1449 expectedFlats, 

1450 ) 

1451 subsetDataIds = dataIds.subset(expected_subset_dimensions, unique=True) 

1452 self.assertEqual(subsetDataIds.dimensions, expected_subset_dimensions) 

1453 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds) 

1454 self.assertCountEqual( 

1455 list( 

1456 subsetDataIds.findDatasets( 

1457 bias, collections=["imported_r", "imported_g"], findFirst=False 

1458 ) 

1459 ), 

1460 expectedAllBiases, 

1461 ) 

1462 self.assertCountEqual( 

1463 list( 

1464 subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=True) 

1465 ), 

1466 expectedDeduplicatedBiases, 

1467 ) 

1468 # Materialize just the bias dataset queries. 

1469 with subsetDataIds.findDatasets( 

1470 bias, collections=["imported_r", "imported_g"], findFirst=False 

1471 ).materialize() as biases: 

1472 self.assertCountEqual(list(biases), expectedAllBiases) 

1473 with subsetDataIds.findDatasets( 

1474 bias, collections=["imported_r", "imported_g"], findFirst=True 

1475 ).materialize() as biases: 

1476 self.assertCountEqual(list(biases), expectedDeduplicatedBiases) 

1477 # Materialize the subset data ID query, but not the dataset 

1478 # queries. 

1479 with subsetDataIds.materialize() as subsetDataIds: 

1480 self.assertEqual(subsetDataIds.dimensions, expected_subset_dimensions) 

1481 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds) 

1482 self.assertCountEqual( 

1483 list( 

1484 subsetDataIds.findDatasets( 

1485 bias, collections=["imported_r", "imported_g"], findFirst=False 

1486 ) 

1487 ), 

1488 expectedAllBiases, 

1489 ) 

1490 self.assertCountEqual( 

1491 list( 

1492 subsetDataIds.findDatasets( 

1493 bias, collections=["imported_r", "imported_g"], findFirst=True 

1494 ) 

1495 ), 

1496 expectedDeduplicatedBiases, 

1497 ) 

1498 # Materialize the bias dataset queries, too, so now we're 

1499 # materializing every single step. 

1500 with subsetDataIds.findDatasets( 

1501 bias, collections=["imported_r", "imported_g"], findFirst=False 

1502 ).materialize() as biases: 

1503 self.assertCountEqual(list(biases), expectedAllBiases) 

1504 with subsetDataIds.findDatasets( 

1505 bias, collections=["imported_r", "imported_g"], findFirst=True 

1506 ).materialize() as biases: 

1507 self.assertCountEqual(list(biases), expectedDeduplicatedBiases) 

1508 

1509 def testStorageClassPropagation(self): 

1510 """Test that queries for datasets respect the storage class passed in 

1511 as part of a full dataset type. 

1512 """ 

1513 registry = self.makeRegistry() 

1514 self.loadData(registry, "base.yaml") 

1515 dataset_type_in_registry = DatasetType( 

1516 "tbl", dimensions=["instrument"], storageClass="Packages", universe=registry.dimensions 

1517 ) 

1518 registry.registerDatasetType(dataset_type_in_registry) 

1519 run = "run1" 

1520 registry.registerRun(run) 

1521 (inserted_ref,) = registry.insertDatasets( 

1522 dataset_type_in_registry, [registry.expandDataId(instrument="Cam1")], run=run 

1523 ) 

1524 self.assertEqual(inserted_ref.datasetType, dataset_type_in_registry) 

1525 query_dataset_type = DatasetType( 

1526 "tbl", dimensions=["instrument"], storageClass="StructuredDataDict", universe=registry.dimensions 

1527 ) 

1528 self.assertNotEqual(dataset_type_in_registry, query_dataset_type) 

1529 query_datasets_result = registry.queryDatasets(query_dataset_type, collections=[run]) 

1530 self.assertEqual(query_datasets_result.parentDatasetType, query_dataset_type) # type: ignore 

1531 (query_datasets_ref,) = query_datasets_result 

1532 self.assertEqual(query_datasets_ref.datasetType, query_dataset_type) 

1533 query_data_ids_find_datasets_result = registry.queryDataIds(["instrument"]).findDatasets( 

1534 query_dataset_type, collections=[run] 

1535 ) 

1536 self.assertEqual(query_data_ids_find_datasets_result.parentDatasetType, query_dataset_type) 

1537 (query_data_ids_find_datasets_ref,) = query_data_ids_find_datasets_result 

1538 self.assertEqual(query_data_ids_find_datasets_ref.datasetType, query_dataset_type) 

1539 query_dataset_types_result = registry.queryDatasetTypes(query_dataset_type) 

1540 self.assertEqual(list(query_dataset_types_result), [query_dataset_type]) 

1541 find_dataset_ref = registry.findDataset(query_dataset_type, instrument="Cam1", collections=[run]) 

1542 self.assertEqual(find_dataset_ref.datasetType, query_dataset_type) 

1543 

1544 def testEmptyDimensionsQueries(self): 

1545 """Test Query and QueryResults objects in the case where there are no 

1546 dimensions. 

1547 """ 

1548 # Set up test data: one dataset type, two runs, one dataset in each. 

1549 registry = self.makeRegistry() 

1550 self.loadData(registry, "base.yaml") 

1551 schema = DatasetType("schema", dimensions=registry.dimensions.empty, storageClass="Catalog") 

1552 registry.registerDatasetType(schema) 

1553 dataId = DataCoordinate.make_empty(registry.dimensions) 

1554 run1 = "run1" 

1555 run2 = "run2" 

1556 registry.registerRun(run1) 

1557 registry.registerRun(run2) 

1558 (dataset1,) = registry.insertDatasets(schema, dataIds=[dataId], run=run1) 

1559 (dataset2,) = registry.insertDatasets(schema, dataIds=[dataId], run=run2) 

1560 # Query directly for both of the datasets, and each one, one at a time. 

1561 self.checkQueryResults( 

1562 registry.queryDatasets(schema, collections=[run1, run2], findFirst=False), [dataset1, dataset2] 

1563 ) 

1564 self.checkQueryResults( 

1565 registry.queryDatasets(schema, collections=[run1, run2], findFirst=True), 

1566 [dataset1], 

1567 ) 

1568 self.checkQueryResults( 

1569 registry.queryDatasets(schema, collections=[run2, run1], findFirst=True), 

1570 [dataset2], 

1571 ) 

1572 # Query for data IDs with no dimensions. 

1573 dataIds = registry.queryDataIds([]) 

1574 self.checkQueryResults(dataIds, [dataId]) 

1575 # Use queried data IDs to find the datasets. 

1576 self.checkQueryResults( 

1577 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False), 

1578 [dataset1, dataset2], 

1579 ) 

1580 self.checkQueryResults( 

1581 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), 

1582 [dataset1], 

1583 ) 

1584 self.checkQueryResults( 

1585 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), 

1586 [dataset2], 

1587 ) 

1588 # Now materialize the data ID query results and repeat those tests. 

1589 with dataIds.materialize() as dataIds: 

1590 self.checkQueryResults(dataIds, [dataId]) 

1591 self.checkQueryResults( 

1592 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), 

1593 [dataset1], 

1594 ) 

1595 self.checkQueryResults( 

1596 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), 

1597 [dataset2], 

1598 ) 

1599 # Query for non-empty data IDs, then subset that to get the empty one. 

1600 # Repeat the above tests starting from that. 

1601 dataIds = registry.queryDataIds(["instrument"]).subset(registry.dimensions.empty, unique=True) 

1602 self.checkQueryResults(dataIds, [dataId]) 

1603 self.checkQueryResults( 

1604 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False), 

1605 [dataset1, dataset2], 

1606 ) 

1607 self.checkQueryResults( 

1608 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), 

1609 [dataset1], 

1610 ) 

1611 self.checkQueryResults( 

1612 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), 

1613 [dataset2], 

1614 ) 

1615 with dataIds.materialize() as dataIds: 

1616 self.checkQueryResults(dataIds, [dataId]) 

1617 self.checkQueryResults( 

1618 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False), 

1619 [dataset1, dataset2], 

1620 ) 

1621 self.checkQueryResults( 

1622 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), 

1623 [dataset1], 

1624 ) 

1625 self.checkQueryResults( 

1626 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), 

1627 [dataset2], 

1628 ) 

1629 # Query for non-empty data IDs, then materialize, then subset to get 

1630 # the empty one. Repeat again. 

1631 with registry.queryDataIds(["instrument"]).materialize() as nonEmptyDataIds: 

1632 dataIds = nonEmptyDataIds.subset(registry.dimensions.empty, unique=True) 

1633 self.checkQueryResults(dataIds, [dataId]) 

1634 self.checkQueryResults( 

1635 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False), 

1636 [dataset1, dataset2], 

1637 ) 

1638 self.checkQueryResults( 

1639 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), 

1640 [dataset1], 

1641 ) 

1642 self.checkQueryResults( 

1643 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), 

1644 [dataset2], 

1645 ) 

1646 with dataIds.materialize() as dataIds: 

1647 self.checkQueryResults(dataIds, [dataId]) 

1648 self.checkQueryResults( 

1649 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False), 

1650 [dataset1, dataset2], 

1651 ) 

1652 self.checkQueryResults( 

1653 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), 

1654 [dataset1], 

1655 ) 

1656 self.checkQueryResults( 

1657 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), 

1658 [dataset2], 

1659 ) 

1660 # Repeat the materialization tests with a dimension element that isn't 

1661 # cached, so there's no way we can know when building the query where 

1662 # there are any rows are not (there aren't). 

1663 dataIds = registry.queryDataIds(["exposure"]).subset(registry.dimensions.empty, unique=True) 

1664 with dataIds.materialize() as dataIds: 

1665 self.checkQueryResults(dataIds, []) 

1666 self.checkQueryResults( 

1667 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False), [] 

1668 ) 

1669 self.checkQueryResults(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), []) 

1670 self.checkQueryResults(dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), []) 

1671 # Query for non-empty data IDs with a constraint on an empty-data-ID 

1672 # dataset that exists. 

1673 dataIds = registry.queryDataIds(["instrument"], datasets="schema", collections=...) 

1674 self.checkQueryResults( 

1675 dataIds.subset(unique=True), 

1676 [DataCoordinate.standardize(instrument="Cam1", universe=registry.dimensions)], 

1677 ) 

1678 # Again query for non-empty data IDs with a constraint on empty-data-ID 

1679 # datasets, but when the datasets don't exist. We delete the existing 

1680 # dataset and query just that collection rather than creating a new 

1681 # empty collection because this is a bit less likely for our build-time 

1682 # logic to shortcut-out (via the collection summaries), and such a 

1683 # shortcut would make this test a bit more trivial than we'd like. 

1684 registry.removeDatasets([dataset2]) 

1685 dataIds = registry.queryDataIds(["instrument"], datasets="schema", collections=run2) 

1686 self.checkQueryResults(dataIds, []) 

1687 

1688 def testDimensionDataModifications(self): 

1689 """Test that modifying dimension records via: 

1690 syncDimensionData(..., update=True) and 

1691 insertDimensionData(..., replace=True) works as expected, even in the 

1692 presence of datasets using those dimensions and spatial overlap 

1693 relationships. 

1694 """ 

1695 

1696 def _unpack_range_set(ranges: lsst.sphgeom.RangeSet) -> Iterator[int]: 

1697 """Unpack a sphgeom.RangeSet into the integers it contains.""" 

1698 for begin, end in ranges: 

1699 yield from range(begin, end) 

1700 

1701 def _range_set_hull( 

1702 ranges: lsst.sphgeom.RangeSet, 

1703 pixelization: lsst.sphgeom.HtmPixelization, 

1704 ) -> lsst.sphgeom.ConvexPolygon: 

1705 """Create a ConvexPolygon hull of the region defined by a set of 

1706 HTM pixelization index ranges. 

1707 """ 

1708 points = [] 

1709 for index in _unpack_range_set(ranges): 

1710 points.extend(pixelization.triangle(index).getVertices()) 

1711 return lsst.sphgeom.ConvexPolygon(points) 

1712 

1713 # Use HTM to set up an initial parent region (one arbitrary trixel) 

1714 # and four child regions (the trixels within the parent at the next 

1715 # level. We'll use the parent as a tract/visit region and the children 

1716 # as its patch/visit_detector regions. 

1717 registry = self.makeRegistry() 

1718 htm6 = registry.dimensions.skypix["htm"][6].pixelization 

1719 commonSkyPix = registry.dimensions.commonSkyPix.pixelization 

1720 index = 12288 

1721 child_ranges_small = lsst.sphgeom.RangeSet(index).scaled(4) 

1722 assert htm6.universe().contains(child_ranges_small) 

1723 child_regions_small = [htm6.triangle(i) for i in _unpack_range_set(child_ranges_small)] 

1724 parent_region_small = lsst.sphgeom.ConvexPolygon( 

1725 list(itertools.chain.from_iterable(c.getVertices() for c in child_regions_small)) 

1726 ) 

1727 assert all(parent_region_small.contains(c) for c in child_regions_small) 

1728 # Make a larger version of each child region, defined to be the set of 

1729 # htm6 trixels that overlap the original's bounding circle. Make a new 

1730 # parent that's the convex hull of the new children. 

1731 child_regions_large = [ 

1732 _range_set_hull(htm6.envelope(c.getBoundingCircle()), htm6) for c in child_regions_small 

1733 ] 

1734 assert all( 

1735 large.contains(small) 

1736 for large, small in zip(child_regions_large, child_regions_small, strict=True) 

1737 ) 

1738 parent_region_large = lsst.sphgeom.ConvexPolygon( 

1739 list(itertools.chain.from_iterable(c.getVertices() for c in child_regions_large)) 

1740 ) 

1741 assert all(parent_region_large.contains(c) for c in child_regions_large) 

1742 assert parent_region_large.contains(parent_region_small) 

1743 assert not parent_region_small.contains(parent_region_large) 

1744 assert not all(parent_region_small.contains(c) for c in child_regions_large) 

1745 # Find some commonSkyPix indices that overlap the large regions but not 

1746 # overlap the small regions. We use commonSkyPix here to make sure the 

1747 # real tests later involve what's in the database, not just post-query 

1748 # filtering of regions. 

1749 child_difference_indices = [] 

1750 for large, small in zip(child_regions_large, child_regions_small, strict=True): 

1751 difference = list(_unpack_range_set(commonSkyPix.envelope(large) - commonSkyPix.envelope(small))) 

1752 assert difference, "if this is empty, we can't test anything useful with these regions" 

1753 assert all( 

1754 not commonSkyPix.triangle(d).isDisjointFrom(large) 

1755 and commonSkyPix.triangle(d).isDisjointFrom(small) 

1756 for d in difference 

1757 ) 

1758 child_difference_indices.append(difference) 

1759 parent_difference_indices = list( 

1760 _unpack_range_set( 

1761 commonSkyPix.envelope(parent_region_large) - commonSkyPix.envelope(parent_region_small) 

1762 ) 

1763 ) 

1764 assert parent_difference_indices, "if this is empty, we can't test anything useful with these regions" 

1765 assert all( 

1766 ( 

1767 not commonSkyPix.triangle(d).isDisjointFrom(parent_region_large) 

1768 and commonSkyPix.triangle(d).isDisjointFrom(parent_region_small) 

1769 ) 

1770 for d in parent_difference_indices 

1771 ) 

1772 # Now that we've finally got those regions, we'll insert the large ones 

1773 # as tract/patch dimension records. 

1774 skymap_name = "testing_v1" 

1775 registry.insertDimensionData( 

1776 "skymap", 

1777 { 

1778 "name": skymap_name, 

1779 "hash": bytes([42]), 

1780 "tract_max": 1, 

1781 "patch_nx_max": 2, 

1782 "patch_ny_max": 2, 

1783 }, 

1784 ) 

1785 registry.insertDimensionData("tract", {"skymap": skymap_name, "id": 0, "region": parent_region_large}) 

1786 registry.insertDimensionData( 

1787 "patch", 

1788 *[ 

1789 {"skymap": skymap_name, "tract": 0, "id": n, "cell_x": n % 2, "cell_y": n // 2, "region": c} 

1790 for n, c in enumerate(child_regions_large) 

1791 ], 

1792 ) 

1793 # Add at dataset that uses these dimensions to make sure that modifying 

1794 # them doesn't disrupt foreign keys (need to make sure DB doesn't 

1795 # implement insert with replace=True as delete-then-insert). 

1796 dataset_type = DatasetType( 

1797 "coadd", 

1798 dimensions=["tract", "patch"], 

1799 universe=registry.dimensions, 

1800 storageClass="Exposure", 

1801 ) 

1802 registry.registerDatasetType(dataset_type) 

1803 registry.registerCollection("the_run", CollectionType.RUN) 

1804 registry.insertDatasets( 

1805 dataset_type, 

1806 [{"skymap": skymap_name, "tract": 0, "patch": 2}], 

1807 run="the_run", 

1808 ) 

1809 # Query for tracts and patches that overlap some "difference" htm9 

1810 # pixels; there should be overlaps, because the database has 

1811 # the "large" suite of regions. 

1812 self.assertEqual( 

1813 {0}, 

1814 { 

1815 data_id["tract"] 

1816 for data_id in registry.queryDataIds( 

1817 ["tract"], 

1818 skymap=skymap_name, 

1819 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]}, 

1820 ) 

1821 }, 

1822 ) 

1823 for patch_id, patch_difference_indices in enumerate(child_difference_indices): 

1824 self.assertIn( 

1825 patch_id, 

1826 { 

1827 data_id["patch"] 

1828 for data_id in registry.queryDataIds( 

1829 ["patch"], 

1830 skymap=skymap_name, 

1831 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]}, 

1832 ) 

1833 }, 

1834 ) 

1835 # Use sync to update the tract region and insert to update the regions 

1836 # of the patches, to the "small" suite. 

1837 updated = registry.syncDimensionData( 

1838 "tract", 

1839 {"skymap": skymap_name, "id": 0, "region": parent_region_small}, 

1840 update=True, 

1841 ) 

1842 self.assertEqual(updated, {"region": parent_region_large}) 

1843 registry.insertDimensionData( 

1844 "patch", 

1845 *[ 

1846 {"skymap": skymap_name, "tract": 0, "id": n, "cell_x": n % 2, "cell_y": n // 2, "region": c} 

1847 for n, c in enumerate(child_regions_small) 

1848 ], 

1849 replace=True, 

1850 ) 

1851 # Query again; there now should be no such overlaps, because the 

1852 # database has the "small" suite of regions. 

1853 self.assertFalse( 

1854 set( 

1855 registry.queryDataIds( 

1856 ["tract"], 

1857 skymap=skymap_name, 

1858 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]}, 

1859 ) 

1860 ) 

1861 ) 

1862 for patch_id, patch_difference_indices in enumerate(child_difference_indices): 

1863 self.assertNotIn( 

1864 patch_id, 

1865 { 

1866 data_id["patch"] 

1867 for data_id in registry.queryDataIds( 

1868 ["patch"], 

1869 skymap=skymap_name, 

1870 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]}, 

1871 ) 

1872 }, 

1873 ) 

1874 # Update back to the large regions and query one more time. 

1875 updated = registry.syncDimensionData( 

1876 "tract", 

1877 {"skymap": skymap_name, "id": 0, "region": parent_region_large}, 

1878 update=True, 

1879 ) 

1880 self.assertEqual(updated, {"region": parent_region_small}) 

1881 registry.insertDimensionData( 

1882 "patch", 

1883 *[ 

1884 {"skymap": skymap_name, "tract": 0, "id": n, "cell_x": n % 2, "cell_y": n // 2, "region": c} 

1885 for n, c in enumerate(child_regions_large) 

1886 ], 

1887 replace=True, 

1888 ) 

1889 self.assertEqual( 

1890 {0}, 

1891 { 

1892 data_id["tract"] 

1893 for data_id in registry.queryDataIds( 

1894 ["tract"], 

1895 skymap=skymap_name, 

1896 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]}, 

1897 ) 

1898 }, 

1899 ) 

1900 for patch_id, patch_difference_indices in enumerate(child_difference_indices): 

1901 self.assertIn( 

1902 patch_id, 

1903 { 

1904 data_id["patch"] 

1905 for data_id in registry.queryDataIds( 

1906 ["patch"], 

1907 skymap=skymap_name, 

1908 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]}, 

1909 ) 

1910 }, 

1911 ) 

1912 

1913 def testCalibrationCollections(self): 

1914 """Test operations on `~CollectionType.CALIBRATION` collections, 

1915 including `SqlRegistry.certify`, `SqlRegistry.decertify`, 

1916 `SqlRegistry.findDataset`, and 

1917 `DataCoordinateQueryResults.findRelatedDatasets`. 

1918 """ 

1919 # Setup - make a Registry, fill it with some datasets in 

1920 # non-calibration collections. 

1921 registry = self.makeRegistry() 

1922 self.loadData(registry, "base.yaml") 

1923 self.loadData(registry, "datasets.yaml") 

1924 # Set up some timestamps. 

1925 t1 = astropy.time.Time("2020-01-01T01:00:00", format="isot", scale="tai") 

1926 t2 = astropy.time.Time("2020-01-01T02:00:00", format="isot", scale="tai") 

1927 t3 = astropy.time.Time("2020-01-01T03:00:00", format="isot", scale="tai") 

1928 t4 = astropy.time.Time("2020-01-01T04:00:00", format="isot", scale="tai") 

1929 t5 = astropy.time.Time("2020-01-01T05:00:00", format="isot", scale="tai") 

1930 allTimespans = [ 

1931 Timespan(a, b) for a, b in itertools.combinations([None, t1, t2, t3, t4, t5, None], r=2) 

1932 ] 

1933 # Insert some exposure records with timespans between each sequential 

1934 # pair of those. 

1935 registry.insertDimensionData( 

1936 "exposure", 

1937 { 

1938 "instrument": "Cam1", 

1939 "id": 0, 

1940 "obs_id": "zero", 

1941 "physical_filter": "Cam1-G", 

1942 "timespan": Timespan(t1, t2), 

1943 }, 

1944 { 

1945 "instrument": "Cam1", 

1946 "id": 1, 

1947 "obs_id": "one", 

1948 "physical_filter": "Cam1-G", 

1949 "timespan": Timespan(t2, t3), 

1950 }, 

1951 { 

1952 "instrument": "Cam1", 

1953 "id": 2, 

1954 "obs_id": "two", 

1955 "physical_filter": "Cam1-G", 

1956 "timespan": Timespan(t3, t4), 

1957 }, 

1958 { 

1959 "instrument": "Cam1", 

1960 "id": 3, 

1961 "obs_id": "three", 

1962 "physical_filter": "Cam1-G", 

1963 "timespan": Timespan(t4, t5), 

1964 }, 

1965 ) 

1966 # Get references to some datasets. 

1967 bias2a = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g") 

1968 bias3a = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g") 

1969 bias2b = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r") 

1970 bias3b = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r") 

1971 # Register the main calibration collection we'll be working with. 

1972 collection = "Cam1/calibs/default" 

1973 registry.registerCollection(collection, type=CollectionType.CALIBRATION) 

1974 # Cannot associate into a calibration collection (no timespan). 

1975 with self.assertRaises(CollectionTypeError): 

1976 registry.associate(collection, [bias2a]) 

1977 # Certify 2a dataset with [t2, t4) validity. 

1978 registry.certify(collection, [bias2a], Timespan(begin=t2, end=t4)) 

1979 # Test that we can query for this dataset via the new collection, both 

1980 # on its own and with a RUN collection. 

1981 self.assertEqual( 

1982 set(registry.queryDatasets("bias", findFirst=False, collections=collection)), 

1983 {bias2a}, 

1984 ) 

1985 self.assertEqual( 

1986 set(registry.queryDatasets("bias", findFirst=False, collections=[collection, "imported_r"])), 

1987 { 

1988 bias2a, 

1989 bias2b, 

1990 bias3b, 

1991 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

1992 }, 

1993 ) 

1994 self.assertEqual( 

1995 set(registry.queryDataIds("detector", datasets="bias", collections=collection)), 

1996 {registry.expandDataId(instrument="Cam1", detector=2)}, 

1997 ) 

1998 self.assertEqual( 

1999 set(registry.queryDataIds("detector", datasets="bias", collections=[collection, "imported_r"])), 

2000 { 

2001 registry.expandDataId(instrument="Cam1", detector=2), 

2002 registry.expandDataId(instrument="Cam1", detector=3), 

2003 registry.expandDataId(instrument="Cam1", detector=4), 

2004 }, 

2005 ) 

2006 self.assertEqual( 

2007 set( 

2008 registry.queryDataIds(["exposure", "detector"]).findRelatedDatasets( 

2009 "bias", findFirst=True, collections=[collection] 

2010 ) 

2011 ), 

2012 { 

2013 (registry.expandDataId(instrument="Cam1", detector=2, exposure=1), bias2a), 

2014 (registry.expandDataId(instrument="Cam1", detector=2, exposure=2), bias2a), 

2015 }, 

2016 ) 

2017 self.assertEqual( 

2018 set( 

2019 registry.queryDataIds( 

2020 ["exposure", "detector"], instrument="Cam1", detector=2 

2021 ).findRelatedDatasets("bias", findFirst=True, collections=[collection, "imported_r"]) 

2022 ), 

2023 { 

2024 (registry.expandDataId(instrument="Cam1", detector=2, exposure=1), bias2a), 

2025 (registry.expandDataId(instrument="Cam1", detector=2, exposure=2), bias2a), 

2026 (registry.expandDataId(instrument="Cam1", detector=2, exposure=0), bias2b), 

2027 (registry.expandDataId(instrument="Cam1", detector=2, exposure=3), bias2b), 

2028 }, 

2029 ) 

2030 

2031 # We should not be able to certify 2b with anything overlapping that 

2032 # window. 

2033 with self.assertRaises(ConflictingDefinitionError): 

2034 registry.certify(collection, [bias2b], Timespan(begin=None, end=t3)) 

2035 with self.assertRaises(ConflictingDefinitionError): 

2036 registry.certify(collection, [bias2b], Timespan(begin=None, end=t5)) 

2037 with self.assertRaises(ConflictingDefinitionError): 

2038 registry.certify(collection, [bias2b], Timespan(begin=t1, end=t3)) 

2039 with self.assertRaises(ConflictingDefinitionError): 

2040 registry.certify(collection, [bias2b], Timespan(begin=t1, end=t5)) 

2041 with self.assertRaises(ConflictingDefinitionError): 

2042 registry.certify(collection, [bias2b], Timespan(begin=t1, end=None)) 

2043 with self.assertRaises(ConflictingDefinitionError): 

2044 registry.certify(collection, [bias2b], Timespan(begin=t2, end=t3)) 

2045 with self.assertRaises(ConflictingDefinitionError): 

2046 registry.certify(collection, [bias2b], Timespan(begin=t2, end=t5)) 

2047 with self.assertRaises(ConflictingDefinitionError): 

2048 registry.certify(collection, [bias2b], Timespan(begin=t2, end=None)) 

2049 # We should be able to certify 3a with a range overlapping that window, 

2050 # because it's for a different detector. 

2051 # We'll certify 3a over [t1, t3). 

2052 registry.certify(collection, [bias3a], Timespan(begin=t1, end=t3)) 

2053 # Now we'll certify 2b and 3b together over [t4, ∞). 

2054 registry.certify(collection, [bias2b, bias3b], Timespan(begin=t4, end=None)) 

2055 

2056 # Fetch all associations and check that they are what we expect. 

2057 self.assertCountEqual( 

2058 list( 

2059 registry.queryDatasetAssociations( 

2060 "bias", 

2061 collections=[collection, "imported_g", "imported_r"], 

2062 ) 

2063 ), 

2064 [ 

2065 DatasetAssociation( 

2066 ref=registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"), 

2067 collection="imported_g", 

2068 timespan=None, 

2069 ), 

2070 DatasetAssociation( 

2071 ref=registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

2072 collection="imported_r", 

2073 timespan=None, 

2074 ), 

2075 DatasetAssociation(ref=bias2a, collection="imported_g", timespan=None), 

2076 DatasetAssociation(ref=bias3a, collection="imported_g", timespan=None), 

2077 DatasetAssociation(ref=bias2b, collection="imported_r", timespan=None), 

2078 DatasetAssociation(ref=bias3b, collection="imported_r", timespan=None), 

2079 DatasetAssociation(ref=bias2a, collection=collection, timespan=Timespan(begin=t2, end=t4)), 

2080 DatasetAssociation(ref=bias3a, collection=collection, timespan=Timespan(begin=t1, end=t3)), 

2081 DatasetAssociation(ref=bias2b, collection=collection, timespan=Timespan(begin=t4, end=None)), 

2082 DatasetAssociation(ref=bias3b, collection=collection, timespan=Timespan(begin=t4, end=None)), 

2083 ], 

2084 ) 

2085 

2086 class Ambiguous: 

2087 """Tag class to denote lookups that should be ambiguous.""" 

2088 

2089 pass 

2090 

2091 def _assertLookup( 

2092 detector: int, timespan: Timespan, expected: DatasetRef | type[Ambiguous] | None 

2093 ) -> None: 

2094 """Local function that asserts that a bias lookup returns the given 

2095 expected result. 

2096 """ 

2097 if expected is Ambiguous: 

2098 with self.assertRaises((DatasetTypeError, LookupError)): 

2099 registry.findDataset( 

2100 "bias", 

2101 collections=collection, 

2102 instrument="Cam1", 

2103 detector=detector, 

2104 timespan=timespan, 

2105 ) 

2106 else: 

2107 self.assertEqual( 

2108 expected, 

2109 registry.findDataset( 

2110 "bias", 

2111 collections=collection, 

2112 instrument="Cam1", 

2113 detector=detector, 

2114 timespan=timespan, 

2115 ), 

2116 ) 

2117 

2118 # Systematically test lookups against expected results. 

2119 _assertLookup(detector=2, timespan=Timespan(None, t1), expected=None) 

2120 _assertLookup(detector=2, timespan=Timespan(None, t2), expected=None) 

2121 _assertLookup(detector=2, timespan=Timespan(None, t3), expected=bias2a) 

2122 _assertLookup(detector=2, timespan=Timespan(None, t4), expected=bias2a) 

2123 _assertLookup(detector=2, timespan=Timespan(None, t5), expected=Ambiguous) 

2124 _assertLookup(detector=2, timespan=Timespan(None, None), expected=Ambiguous) 

2125 _assertLookup(detector=2, timespan=Timespan(t1, t2), expected=None) 

2126 _assertLookup(detector=2, timespan=Timespan(t1, t3), expected=bias2a) 

2127 _assertLookup(detector=2, timespan=Timespan(t1, t4), expected=bias2a) 

2128 _assertLookup(detector=2, timespan=Timespan(t1, t5), expected=Ambiguous) 

2129 _assertLookup(detector=2, timespan=Timespan(t1, None), expected=Ambiguous) 

2130 _assertLookup(detector=2, timespan=Timespan(t2, t3), expected=bias2a) 

2131 _assertLookup(detector=2, timespan=Timespan(t2, t4), expected=bias2a) 

2132 _assertLookup(detector=2, timespan=Timespan(t2, t5), expected=Ambiguous) 

2133 _assertLookup(detector=2, timespan=Timespan(t2, None), expected=Ambiguous) 

2134 _assertLookup(detector=2, timespan=Timespan(t3, t4), expected=bias2a) 

2135 _assertLookup(detector=2, timespan=Timespan(t3, t5), expected=Ambiguous) 

2136 _assertLookup(detector=2, timespan=Timespan(t3, None), expected=Ambiguous) 

2137 _assertLookup(detector=2, timespan=Timespan(t4, t5), expected=bias2b) 

2138 _assertLookup(detector=2, timespan=Timespan(t4, None), expected=bias2b) 

2139 _assertLookup(detector=2, timespan=Timespan(t5, None), expected=bias2b) 

2140 _assertLookup(detector=3, timespan=Timespan(None, t1), expected=None) 

2141 _assertLookup(detector=3, timespan=Timespan(None, t2), expected=bias3a) 

2142 _assertLookup(detector=3, timespan=Timespan(None, t3), expected=bias3a) 

2143 _assertLookup(detector=3, timespan=Timespan(None, t4), expected=bias3a) 

2144 _assertLookup(detector=3, timespan=Timespan(None, t5), expected=Ambiguous) 

2145 _assertLookup(detector=3, timespan=Timespan(None, None), expected=Ambiguous) 

2146 _assertLookup(detector=3, timespan=Timespan(t1, t2), expected=bias3a) 

2147 _assertLookup(detector=3, timespan=Timespan(t1, t3), expected=bias3a) 

2148 _assertLookup(detector=3, timespan=Timespan(t1, t4), expected=bias3a) 

2149 _assertLookup(detector=3, timespan=Timespan(t1, t5), expected=Ambiguous) 

2150 _assertLookup(detector=3, timespan=Timespan(t1, None), expected=Ambiguous) 

2151 _assertLookup(detector=3, timespan=Timespan(t2, t3), expected=bias3a) 

2152 _assertLookup(detector=3, timespan=Timespan(t2, t4), expected=bias3a) 

2153 _assertLookup(detector=3, timespan=Timespan(t2, t5), expected=Ambiguous) 

2154 _assertLookup(detector=3, timespan=Timespan(t2, None), expected=Ambiguous) 

2155 _assertLookup(detector=3, timespan=Timespan(t3, t4), expected=None) 

2156 _assertLookup(detector=3, timespan=Timespan(t3, t5), expected=bias3b) 

2157 _assertLookup(detector=3, timespan=Timespan(t3, None), expected=bias3b) 

2158 _assertLookup(detector=3, timespan=Timespan(t4, t5), expected=bias3b) 

2159 _assertLookup(detector=3, timespan=Timespan(t4, None), expected=bias3b) 

2160 _assertLookup(detector=3, timespan=Timespan(t5, None), expected=bias3b) 

2161 

2162 # Test lookups via temporal joins to exposures. 

2163 self.assertEqual( 

2164 set( 

2165 registry.queryDataIds( 

2166 ["exposure", "detector"], instrument="Cam1", detector=2 

2167 ).findRelatedDatasets("bias", collections=[collection]) 

2168 ), 

2169 { 

2170 (registry.expandDataId(instrument="Cam1", exposure=1, detector=2), bias2a), 

2171 (registry.expandDataId(instrument="Cam1", exposure=2, detector=2), bias2a), 

2172 (registry.expandDataId(instrument="Cam1", exposure=3, detector=2), bias2b), 

2173 }, 

2174 ) 

2175 self.assertEqual( 

2176 set( 

2177 registry.queryDataIds( 

2178 ["exposure", "detector"], instrument="Cam1", detector=3 

2179 ).findRelatedDatasets("bias", collections=[collection]) 

2180 ), 

2181 { 

2182 (registry.expandDataId(instrument="Cam1", exposure=0, detector=3), bias3a), 

2183 (registry.expandDataId(instrument="Cam1", exposure=1, detector=3), bias3a), 

2184 (registry.expandDataId(instrument="Cam1", exposure=3, detector=3), bias3b), 

2185 }, 

2186 ) 

2187 self.assertEqual( 

2188 set( 

2189 registry.queryDataIds( 

2190 ["exposure", "detector"], instrument="Cam1", detector=2 

2191 ).findRelatedDatasets("bias", collections=[collection, "imported_g"]) 

2192 ), 

2193 { 

2194 (registry.expandDataId(instrument="Cam1", exposure=0, detector=2), bias2a), 

2195 (registry.expandDataId(instrument="Cam1", exposure=1, detector=2), bias2a), 

2196 (registry.expandDataId(instrument="Cam1", exposure=2, detector=2), bias2a), 

2197 (registry.expandDataId(instrument="Cam1", exposure=3, detector=2), bias2b), 

2198 }, 

2199 ) 

2200 self.assertEqual( 

2201 set( 

2202 registry.queryDataIds( 

2203 ["exposure", "detector"], instrument="Cam1", detector=3 

2204 ).findRelatedDatasets("bias", collections=[collection, "imported_g"]) 

2205 ), 

2206 { 

2207 (registry.expandDataId(instrument="Cam1", exposure=0, detector=3), bias3a), 

2208 (registry.expandDataId(instrument="Cam1", exposure=1, detector=3), bias3a), 

2209 (registry.expandDataId(instrument="Cam1", exposure=2, detector=3), bias3a), 

2210 (registry.expandDataId(instrument="Cam1", exposure=3, detector=3), bias3b), 

2211 }, 

2212 ) 

2213 

2214 # Decertify [t3, t5) for all data IDs, and do test lookups again. 

2215 # This should truncate bias2a to [t2, t3), leave bias3a unchanged at 

2216 # [t1, t3), and truncate bias2b and bias3b to [t5, ∞). 

2217 registry.decertify(collection=collection, datasetType="bias", timespan=Timespan(t3, t5)) 

2218 _assertLookup(detector=2, timespan=Timespan(None, t1), expected=None) 

2219 _assertLookup(detector=2, timespan=Timespan(None, t2), expected=None) 

2220 _assertLookup(detector=2, timespan=Timespan(None, t3), expected=bias2a) 

2221 _assertLookup(detector=2, timespan=Timespan(None, t4), expected=bias2a) 

2222 _assertLookup(detector=2, timespan=Timespan(None, t5), expected=bias2a) 

2223 _assertLookup(detector=2, timespan=Timespan(None, None), expected=Ambiguous) 

2224 _assertLookup(detector=2, timespan=Timespan(t1, t2), expected=None) 

2225 _assertLookup(detector=2, timespan=Timespan(t1, t3), expected=bias2a) 

2226 _assertLookup(detector=2, timespan=Timespan(t1, t4), expected=bias2a) 

2227 _assertLookup(detector=2, timespan=Timespan(t1, t5), expected=bias2a) 

2228 _assertLookup(detector=2, timespan=Timespan(t1, None), expected=Ambiguous) 

2229 _assertLookup(detector=2, timespan=Timespan(t2, t3), expected=bias2a) 

2230 _assertLookup(detector=2, timespan=Timespan(t2, t4), expected=bias2a) 

2231 _assertLookup(detector=2, timespan=Timespan(t2, t5), expected=bias2a) 

2232 _assertLookup(detector=2, timespan=Timespan(t2, None), expected=Ambiguous) 

2233 _assertLookup(detector=2, timespan=Timespan(t3, t4), expected=None) 

2234 _assertLookup(detector=2, timespan=Timespan(t3, t5), expected=None) 

2235 _assertLookup(detector=2, timespan=Timespan(t3, None), expected=bias2b) 

2236 _assertLookup(detector=2, timespan=Timespan(t4, t5), expected=None) 

2237 _assertLookup(detector=2, timespan=Timespan(t4, None), expected=bias2b) 

2238 _assertLookup(detector=2, timespan=Timespan(t5, None), expected=bias2b) 

2239 _assertLookup(detector=3, timespan=Timespan(None, t1), expected=None) 

2240 _assertLookup(detector=3, timespan=Timespan(None, t2), expected=bias3a) 

2241 _assertLookup(detector=3, timespan=Timespan(None, t3), expected=bias3a) 

2242 _assertLookup(detector=3, timespan=Timespan(None, t4), expected=bias3a) 

2243 _assertLookup(detector=3, timespan=Timespan(None, t5), expected=bias3a) 

2244 _assertLookup(detector=3, timespan=Timespan(None, None), expected=Ambiguous) 

2245 _assertLookup(detector=3, timespan=Timespan(t1, t2), expected=bias3a) 

2246 _assertLookup(detector=3, timespan=Timespan(t1, t3), expected=bias3a) 

2247 _assertLookup(detector=3, timespan=Timespan(t1, t4), expected=bias3a) 

2248 _assertLookup(detector=3, timespan=Timespan(t1, t5), expected=bias3a) 

2249 _assertLookup(detector=3, timespan=Timespan(t1, None), expected=Ambiguous) 

2250 _assertLookup(detector=3, timespan=Timespan(t2, t3), expected=bias3a) 

2251 _assertLookup(detector=3, timespan=Timespan(t2, t4), expected=bias3a) 

2252 _assertLookup(detector=3, timespan=Timespan(t2, t5), expected=bias3a) 

2253 _assertLookup(detector=3, timespan=Timespan(t2, None), expected=Ambiguous) 

2254 _assertLookup(detector=3, timespan=Timespan(t3, t4), expected=None) 

2255 _assertLookup(detector=3, timespan=Timespan(t3, t5), expected=None) 

2256 _assertLookup(detector=3, timespan=Timespan(t3, None), expected=bias3b) 

2257 _assertLookup(detector=3, timespan=Timespan(t4, t5), expected=None) 

2258 _assertLookup(detector=3, timespan=Timespan(t4, None), expected=bias3b) 

2259 _assertLookup(detector=3, timespan=Timespan(t5, None), expected=bias3b) 

2260 

2261 # Decertify everything, this time with explicit data IDs, then check 

2262 # that no lookups succeed. 

2263 registry.decertify( 

2264 collection, 

2265 "bias", 

2266 Timespan(None, None), 

2267 dataIds=[ 

2268 dict(instrument="Cam1", detector=2), 

2269 dict(instrument="Cam1", detector=3), 

2270 ], 

2271 ) 

2272 for detector in (2, 3): 

2273 for timespan in allTimespans: 

2274 _assertLookup(detector=detector, timespan=timespan, expected=None) 

2275 # Certify bias2a and bias3a over (-∞, ∞), check that all lookups return 

2276 # those. 

2277 registry.certify( 

2278 collection, 

2279 [bias2a, bias3a], 

2280 Timespan(None, None), 

2281 ) 

2282 for timespan in allTimespans: 

2283 _assertLookup(detector=2, timespan=timespan, expected=bias2a) 

2284 _assertLookup(detector=3, timespan=timespan, expected=bias3a) 

2285 # Decertify just bias2 over [t2, t4). 

2286 # This should split a single certification row into two (and leave the 

2287 # other existing row, for bias3a, alone). 

2288 registry.decertify( 

2289 collection, "bias", Timespan(t2, t4), dataIds=[dict(instrument="Cam1", detector=2)] 

2290 ) 

2291 for timespan in allTimespans: 

2292 _assertLookup(detector=3, timespan=timespan, expected=bias3a) 

2293 overlapsBefore = timespan.overlaps(Timespan(None, t2)) 

2294 overlapsAfter = timespan.overlaps(Timespan(t4, None)) 

2295 if overlapsBefore and overlapsAfter: 

2296 expected = Ambiguous 

2297 elif overlapsBefore or overlapsAfter: 

2298 expected = bias2a 

2299 else: 

2300 expected = None 

2301 _assertLookup(detector=2, timespan=timespan, expected=expected) 

2302 

2303 def testSkipCalibs(self): 

2304 """Test how queries handle skipping of calibration collections.""" 

2305 registry = self.makeRegistry() 

2306 self.loadData(registry, "base.yaml") 

2307 self.loadData(registry, "datasets.yaml") 

2308 

2309 coll_calib = "Cam1/calibs/default" 

2310 registry.registerCollection(coll_calib, type=CollectionType.CALIBRATION) 

2311 

2312 # Add all biases to the calibration collection. 

2313 # Without this, the logic that prunes dataset subqueries based on 

2314 # datasetType-collection summary information will fire before the logic 

2315 # we want to test below. This is a good thing (it avoids the dreaded 

2316 # NotImplementedError a bit more often) everywhere but here. 

2317 registry.certify(coll_calib, registry.queryDatasets("bias", collections=...), Timespan(None, None)) 

2318 

2319 coll_list = [coll_calib, "imported_g", "imported_r"] 

2320 chain = "Cam1/chain" 

2321 registry.registerCollection(chain, type=CollectionType.CHAINED) 

2322 registry.setCollectionChain(chain, coll_list) 

2323 

2324 # explicit list will raise if findFirst=True or there are temporal 

2325 # dimensions 

2326 with self.assertRaises(NotImplementedError): 

2327 registry.queryDatasets("bias", collections=coll_list, findFirst=True) 

2328 with self.assertRaises(NotImplementedError): 

2329 registry.queryDataIds( 

2330 ["instrument", "detector", "exposure"], datasets="bias", collections=coll_list 

2331 ).count() 

2332 

2333 # chain will skip 

2334 datasets = list(registry.queryDatasets("bias", collections=chain)) 

2335 self.assertGreater(len(datasets), 0) 

2336 

2337 dataIds = list(registry.queryDataIds(["instrument", "detector"], datasets="bias", collections=chain)) 

2338 self.assertGreater(len(dataIds), 0) 

2339 

2340 # glob will skip too 

2341 datasets = list(registry.queryDatasets("bias", collections="*d*")) 

2342 self.assertGreater(len(datasets), 0) 

2343 

2344 # regular expression will skip too 

2345 pattern = re.compile(".*") 

2346 datasets = list(registry.queryDatasets("bias", collections=pattern)) 

2347 self.assertGreater(len(datasets), 0) 

2348 

2349 # ellipsis should work as usual 

2350 datasets = list(registry.queryDatasets("bias", collections=...)) 

2351 self.assertGreater(len(datasets), 0) 

2352 

2353 # few tests with findFirst 

2354 datasets = list(registry.queryDatasets("bias", collections=chain, findFirst=True)) 

2355 self.assertGreater(len(datasets), 0) 

2356 

2357 def testIngestTimeQuery(self): 

2358 registry = self.makeRegistry() 

2359 self.loadData(registry, "base.yaml") 

2360 dt0 = datetime.datetime.now(datetime.UTC) 

2361 self.loadData(registry, "datasets.yaml") 

2362 dt1 = datetime.datetime.now(datetime.UTC) 

2363 

2364 datasets = list(registry.queryDatasets(..., collections=...)) 

2365 len0 = len(datasets) 

2366 self.assertGreater(len0, 0) 

2367 

2368 where = "ingest_date > T'2000-01-01'" 

2369 datasets = list(registry.queryDatasets(..., collections=..., where=where)) 

2370 len1 = len(datasets) 

2371 self.assertEqual(len0, len1) 

2372 

2373 # no one will ever use this piece of software in 30 years 

2374 where = "ingest_date > T'2050-01-01'" 

2375 datasets = list(registry.queryDatasets(..., collections=..., where=where)) 

2376 len2 = len(datasets) 

2377 self.assertEqual(len2, 0) 

2378 

2379 # Check more exact timing to make sure there is no 37 seconds offset 

2380 # (after fixing DM-30124). SQLite time precision is 1 second, make 

2381 # sure that we don't test with higher precision. 

2382 tests = [ 

2383 # format: (timestamp, operator, expected_len) 

2384 (dt0 - timedelta(seconds=1), ">", len0), 

2385 (dt0 - timedelta(seconds=1), "<", 0), 

2386 (dt1 + timedelta(seconds=1), "<", len0), 

2387 (dt1 + timedelta(seconds=1), ">", 0), 

2388 ] 

2389 for dt, op, expect_len in tests: 

2390 dt_str = dt.isoformat(sep=" ") 

2391 

2392 where = f"ingest_date {op} T'{dt_str}'" 

2393 datasets = list(registry.queryDatasets(..., collections=..., where=where)) 

2394 self.assertEqual(len(datasets), expect_len) 

2395 

2396 # same with bind using datetime or astropy Time 

2397 where = f"ingest_date {op} ingest_time" 

2398 datasets = list( 

2399 registry.queryDatasets(..., collections=..., where=where, bind={"ingest_time": dt}) 

2400 ) 

2401 self.assertEqual(len(datasets), expect_len) 

2402 

2403 dt_astropy = astropy.time.Time(dt, format="datetime") 

2404 datasets = list( 

2405 registry.queryDatasets(..., collections=..., where=where, bind={"ingest_time": dt_astropy}) 

2406 ) 

2407 self.assertEqual(len(datasets), expect_len) 

2408 

2409 def testTimespanQueries(self): 

2410 """Test query expressions involving timespans.""" 

2411 registry = self.makeRegistry() 

2412 self.loadData(registry, "hsc-rc2-subset.yaml") 

2413 # All exposures in the database; mapping from ID to timespan. 

2414 visits = {record.id: record.timespan for record in registry.queryDimensionRecords("visit")} 

2415 # Just those IDs, sorted (which is also temporal sorting, because HSC 

2416 # exposure IDs are monotonically increasing). 

2417 ids = sorted(visits.keys()) 

2418 self.assertGreater(len(ids), 20) 

2419 # Pick some quasi-random indexes into `ids` to play with. 

2420 i1 = int(len(ids) * 0.1) 

2421 i2 = int(len(ids) * 0.3) 

2422 i3 = int(len(ids) * 0.6) 

2423 i4 = int(len(ids) * 0.8) 

2424 # Extract some times from those: just before the beginning of i1 (which 

2425 # should be after the end of the exposure before), exactly the 

2426 # beginning of i2, just after the beginning of i3 (and before its end), 

2427 # and the exact end of i4. 

2428 t1 = visits[ids[i1]].begin - astropy.time.TimeDelta(1.0, format="sec") 

2429 self.assertGreater(t1, visits[ids[i1 - 1]].end) 

2430 t2 = visits[ids[i2]].begin 

2431 t3 = visits[ids[i3]].begin + astropy.time.TimeDelta(1.0, format="sec") 

2432 self.assertLess(t3, visits[ids[i3]].end) 

2433 t4 = visits[ids[i4]].end 

2434 # Make sure those are actually in order. 

2435 self.assertEqual([t1, t2, t3, t4], sorted([t4, t3, t2, t1])) 

2436 

2437 bind = { 

2438 "t1": t1, 

2439 "t2": t2, 

2440 "t3": t3, 

2441 "t4": t4, 

2442 "ts23": Timespan(t2, t3), 

2443 } 

2444 

2445 def query(where): 

2446 """Return results as a sorted, deduplicated list of visit IDs. 

2447 

2448 Parameters 

2449 ---------- 

2450 where : `str` 

2451 The WHERE clause for the query. 

2452 """ 

2453 return sorted( 

2454 { 

2455 dataId["visit"] 

2456 for dataId in registry.queryDataIds("visit", instrument="HSC", bind=bind, where=where) 

2457 } 

2458 ) 

2459 

2460 # Try a bunch of timespan queries, mixing up the bounds themselves, 

2461 # where they appear in the expression, and how we get the timespan into 

2462 # the expression. 

2463 

2464 # t1 is before the start of i1, so this should not include i1. 

2465 self.assertEqual(ids[:i1], query("visit.timespan OVERLAPS (null, t1)")) 

2466 # t2 is exactly at the start of i2, but ends are exclusive, so these 

2467 # should not include i2. 

2468 self.assertEqual(ids[i1:i2], query("(t1, t2) OVERLAPS visit.timespan")) 

2469 self.assertEqual(ids[:i2], query("visit.timespan < (t2, t4)")) 

2470 # t3 is in the middle of i3, so this should include i3. 

2471 self.assertEqual(ids[i2 : i3 + 1], query("visit.timespan OVERLAPS ts23")) 

2472 # This one should not include t3 by the same reasoning. 

2473 self.assertEqual(ids[i3 + 1 :], query("visit.timespan > (t1, t3)")) 

2474 # t4 is exactly at the end of i4, so this should include i4. 

2475 self.assertEqual(ids[i3 : i4 + 1], query(f"visit.timespan OVERLAPS (T'{t3.tai.isot}', t4)")) 

2476 # i4's upper bound of t4 is exclusive so this should not include t4. 

2477 self.assertEqual(ids[i4 + 1 :], query("visit.timespan OVERLAPS (t4, NULL)")) 

2478 

2479 # Now some timespan vs. time scalar queries. 

2480 self.assertEqual(ids[:i2], query("visit.timespan < t2")) 

2481 self.assertEqual(ids[:i2], query("t2 > visit.timespan")) 

2482 self.assertEqual(ids[i3 + 1 :], query("visit.timespan > t3")) 

2483 self.assertEqual(ids[i3 + 1 :], query("t3 < visit.timespan")) 

2484 self.assertEqual(ids[i3 : i3 + 1], query("visit.timespan OVERLAPS t3")) 

2485 self.assertEqual(ids[i3 : i3 + 1], query(f"T'{t3.tai.isot}' OVERLAPS visit.timespan")) 

2486 

2487 # Empty timespans should not overlap anything. 

2488 self.assertEqual([], query("visit.timespan OVERLAPS (t3, t2)")) 

2489 

2490 def testCollectionSummaries(self): 

2491 """Test recording and retrieval of collection summaries.""" 

2492 self.maxDiff = None 

2493 registry = self.makeRegistry() 

2494 # Importing datasets from yaml should go through the code path where 

2495 # we update collection summaries as we insert datasets. 

2496 self.loadData(registry, "base.yaml") 

2497 self.loadData(registry, "datasets.yaml") 

2498 flat = registry.getDatasetType("flat") 

2499 expected1 = CollectionSummary() 

2500 expected1.dataset_types.add(registry.getDatasetType("bias")) 

2501 expected1.add_data_ids( 

2502 flat, [DataCoordinate.standardize(instrument="Cam1", universe=registry.dimensions)] 

2503 ) 

2504 self.assertEqual(registry.getCollectionSummary("imported_g"), expected1) 

2505 self.assertEqual(registry.getCollectionSummary("imported_r"), expected1) 

2506 # Create a chained collection with both of the imported runs; the 

2507 # summary should be the same, because it's a union with itself. 

2508 chain = "chain" 

2509 registry.registerCollection(chain, CollectionType.CHAINED) 

2510 registry.setCollectionChain(chain, ["imported_r", "imported_g"]) 

2511 self.assertEqual(registry.getCollectionSummary(chain), expected1) 

2512 # Associate flats only into a tagged collection and a calibration 

2513 # collection to check summaries of those. 

2514 tag = "tag" 

2515 registry.registerCollection(tag, CollectionType.TAGGED) 

2516 registry.associate(tag, registry.queryDatasets(flat, collections="imported_g")) 

2517 calibs = "calibs" 

2518 registry.registerCollection(calibs, CollectionType.CALIBRATION) 

2519 registry.certify( 

2520 calibs, registry.queryDatasets(flat, collections="imported_g"), timespan=Timespan(None, None) 

2521 ) 

2522 expected2 = expected1.copy() 

2523 expected2.dataset_types.discard("bias") 

2524 self.assertEqual(registry.getCollectionSummary(tag), expected2) 

2525 self.assertEqual(registry.getCollectionSummary(calibs), expected2) 

2526 # Explicitly calling SqlRegistry.refresh() should load those same 

2527 # summaries, via a totally different code path. 

2528 registry.refresh() 

2529 self.assertEqual(registry.getCollectionSummary("imported_g"), expected1) 

2530 self.assertEqual(registry.getCollectionSummary("imported_r"), expected1) 

2531 self.assertEqual(registry.getCollectionSummary(tag), expected2) 

2532 self.assertEqual(registry.getCollectionSummary(calibs), expected2) 

2533 

2534 def testBindInQueryDatasets(self): 

2535 """Test that the bind parameter is correctly forwarded in 

2536 queryDatasets recursion. 

2537 """ 

2538 registry = self.makeRegistry() 

2539 # Importing datasets from yaml should go through the code path where 

2540 # we update collection summaries as we insert datasets. 

2541 self.loadData(registry, "base.yaml") 

2542 self.loadData(registry, "datasets.yaml") 

2543 self.assertEqual( 

2544 set(registry.queryDatasets("flat", band="r", collections=...)), 

2545 set(registry.queryDatasets("flat", where="band=my_band", bind={"my_band": "r"}, collections=...)), 

2546 ) 

2547 

2548 def testQueryIntRangeExpressions(self): 

2549 """Test integer range expressions in ``where`` arguments. 

2550 

2551 Note that our expressions use inclusive stop values, unlike Python's. 

2552 """ 

2553 registry = self.makeRegistry() 

2554 self.loadData(registry, "base.yaml") 

2555 self.assertEqual( 

2556 set(registry.queryDataIds(["detector"], instrument="Cam1", where="detector IN (1..2)")), 

2557 {registry.expandDataId(instrument="Cam1", detector=n) for n in [1, 2]}, 

2558 ) 

2559 self.assertEqual( 

2560 set(registry.queryDataIds(["detector"], instrument="Cam1", where="detector IN (1..4:2)")), 

2561 {registry.expandDataId(instrument="Cam1", detector=n) for n in [1, 3]}, 

2562 ) 

2563 self.assertEqual( 

2564 set(registry.queryDataIds(["detector"], instrument="Cam1", where="detector IN (2..4:2)")), 

2565 {registry.expandDataId(instrument="Cam1", detector=n) for n in [2, 4]}, 

2566 ) 

2567 

2568 def testQueryResultSummaries(self): 

2569 """Test summary methods like `count`, `any`, and `explain_no_results` 

2570 on `DataCoordinateQueryResults` and `DatasetQueryResults`. 

2571 """ 

2572 registry = self.makeRegistry() 

2573 self.loadData(registry, "base.yaml") 

2574 self.loadData(registry, "datasets.yaml") 

2575 self.loadData(registry, "spatial.yaml") 

2576 # Default test dataset has two collections, each with both flats and 

2577 # biases. Add a new collection with only biases. 

2578 registry.registerCollection("biases", CollectionType.TAGGED) 

2579 registry.associate("biases", registry.queryDatasets("bias", collections=["imported_g"])) 

2580 # First query yields two results, and involves no postprocessing. 

2581 query1 = registry.queryDataIds(["physical_filter"], band="r") 

2582 self.assertTrue(query1.any(execute=False, exact=False)) 

2583 self.assertTrue(query1.any(execute=True, exact=False)) 

2584 self.assertTrue(query1.any(execute=True, exact=True)) 

2585 self.assertEqual(query1.count(exact=False), 2) 

2586 self.assertEqual(query1.count(exact=True), 2) 

2587 self.assertFalse(list(query1.explain_no_results())) 

2588 # Second query should yield no results, which we should see when 

2589 # we attempt to expand the data ID. 

2590 query2 = registry.queryDataIds(["physical_filter"], band="h") 

2591 # There's no execute=False, exact=Fals test here because the behavior 

2592 # not something we want to guarantee in this case (and exact=False 

2593 # says either answer is legal). 

2594 self.assertFalse(query2.any(execute=True, exact=False)) 

2595 self.assertFalse(query2.any(execute=True, exact=True)) 

2596 self.assertEqual(query2.count(exact=False), 0) 

2597 self.assertEqual(query2.count(exact=True), 0) 

2598 self.assertTrue(list(query2.explain_no_results())) 

2599 # These queries yield no results due to various problems that can be 

2600 # spotted prior to execution, yielding helpful diagnostics. 

2601 base_query = registry.queryDataIds(["detector", "physical_filter"]) 

2602 queries_and_snippets = [ 

2603 ( 

2604 # Dataset type name doesn't match any existing dataset types. 

2605 registry.queryDatasets("nonexistent", collections=...), 

2606 ["nonexistent"], 

2607 ), 

2608 ( 

2609 # Dataset type object isn't registered. 

2610 registry.queryDatasets( 

2611 DatasetType( 

2612 "nonexistent", 

2613 dimensions=["instrument"], 

2614 universe=registry.dimensions, 

2615 storageClass="Image", 

2616 ), 

2617 collections=..., 

2618 ), 

2619 ["nonexistent"], 

2620 ), 

2621 ( 

2622 # No datasets of this type in this collection. 

2623 registry.queryDatasets("flat", collections=["biases"]), 

2624 ["flat", "biases"], 

2625 ), 

2626 ( 

2627 # No datasets of this type in this collection. 

2628 base_query.findDatasets("flat", collections=["biases"]), 

2629 ["flat", "biases"], 

2630 ), 

2631 ( 

2632 # No collections matching at all. 

2633 registry.queryDatasets("flat", collections=re.compile("potato.+")), 

2634 ["potato"], 

2635 ), 

2636 ] 

2637 with self.assertRaises(MissingDatasetTypeError): 

2638 # Dataset type name doesn't match any existing dataset types. 

2639 registry.queryDataIds(["detector"], datasets=["nonexistent"], collections=...) 

2640 with self.assertRaises(MissingDatasetTypeError): 

2641 # Dataset type name doesn't match any existing dataset types. 

2642 registry.queryDimensionRecords("detector", datasets=["nonexistent"], collections=...) 

2643 for query, snippets in queries_and_snippets: 

2644 self.assertFalse(query.any(execute=False, exact=False)) 

2645 self.assertFalse(query.any(execute=True, exact=False)) 

2646 self.assertFalse(query.any(execute=True, exact=True)) 

2647 self.assertEqual(query.count(exact=False), 0) 

2648 self.assertEqual(query.count(exact=True), 0) 

2649 messages = list(query.explain_no_results()) 

2650 self.assertTrue(messages) 

2651 # Want all expected snippets to appear in at least one message. 

2652 self.assertTrue( 

2653 any( 

2654 all(snippet in message for snippet in snippets) for message in query.explain_no_results() 

2655 ), 

2656 messages, 

2657 ) 

2658 

2659 # Wildcards on dataset types are not permitted in queryDataIds. 

2660 with self.assertRaises(DatasetTypeExpressionError): 

2661 registry.queryDataIds(["detector"], datasets=re.compile("^nonexistent$"), collections=...) 

2662 

2663 # These queries yield no results due to problems that can be identified 

2664 # by cheap follow-up queries, yielding helpful diagnostics. 

2665 for query, snippets in [ 

2666 ( 

2667 # No records for one of the involved dimensions. 

2668 registry.queryDataIds(["subfilter"]), 

2669 ["no rows", "subfilter"], 

2670 ), 

2671 ( 

2672 # No records for one of the involved dimensions. 

2673 registry.queryDimensionRecords("subfilter"), 

2674 ["no rows", "subfilter"], 

2675 ), 

2676 ]: 

2677 self.assertFalse(query.any(execute=True, exact=False)) 

2678 self.assertFalse(query.any(execute=True, exact=True)) 

2679 self.assertEqual(query.count(exact=True), 0) 

2680 messages = list(query.explain_no_results()) 

2681 self.assertTrue(messages) 

2682 # Want all expected snippets to appear in at least one message. 

2683 self.assertTrue( 

2684 any( 

2685 all(snippet in message for snippet in snippets) for message in query.explain_no_results() 

2686 ), 

2687 messages, 

2688 ) 

2689 

2690 # This query yields four overlaps in the database, but one is filtered 

2691 # out in postprocessing. The count queries aren't accurate because 

2692 # they don't account for duplication that happens due to an internal 

2693 # join against commonSkyPix. 

2694 query3 = registry.queryDataIds(["visit", "tract"], instrument="Cam1", skymap="SkyMap1") 

2695 self.assertEqual( 

2696 { 

2697 DataCoordinate.standardize( 

2698 instrument="Cam1", 

2699 skymap="SkyMap1", 

2700 visit=v, 

2701 tract=t, 

2702 universe=registry.dimensions, 

2703 ) 

2704 for v, t in [(1, 0), (2, 0), (2, 1)] 

2705 }, 

2706 set(query3), 

2707 ) 

2708 self.assertTrue(query3.any(execute=False, exact=False)) 

2709 self.assertTrue(query3.any(execute=True, exact=False)) 

2710 self.assertTrue(query3.any(execute=True, exact=True)) 

2711 self.assertGreaterEqual(query3.count(exact=False), 4) 

2712 self.assertGreaterEqual(query3.count(exact=True, discard=True), 3) 

2713 self.assertFalse(list(query3.explain_no_results())) 

2714 # This query yields overlaps in the database, but all are filtered 

2715 # out in postprocessing. The count queries again aren't very useful. 

2716 # We have to use `where=` here to avoid an optimization that 

2717 # (currently) skips the spatial postprocess-filtering because it 

2718 # recognizes that no spatial join is necessary. That's not ideal, but 

2719 # fixing it is out of scope for this ticket. 

2720 query4 = registry.queryDataIds( 

2721 ["visit", "tract"], 

2722 instrument="Cam1", 

2723 skymap="SkyMap1", 

2724 where="visit=1 AND detector=1 AND tract=0 AND patch=4", 

2725 ) 

2726 self.assertFalse(set(query4)) 

2727 self.assertTrue(query4.any(execute=False, exact=False)) 

2728 self.assertTrue(query4.any(execute=True, exact=False)) 

2729 self.assertFalse(query4.any(execute=True, exact=True)) 

2730 self.assertGreaterEqual(query4.count(exact=False), 1) 

2731 self.assertEqual(query4.count(exact=True, discard=True), 0) 

2732 messages = query4.explain_no_results() 

2733 self.assertTrue(messages) 

2734 self.assertTrue(any("overlap" in message for message in messages)) 

2735 # This query should yield results from one dataset type but not the 

2736 # other, which is not registered. 

2737 query5 = registry.queryDatasets(["bias", "nonexistent"], collections=["biases"]) 

2738 self.assertTrue(set(query5)) 

2739 self.assertTrue(query5.any(execute=False, exact=False)) 

2740 self.assertTrue(query5.any(execute=True, exact=False)) 

2741 self.assertTrue(query5.any(execute=True, exact=True)) 

2742 self.assertGreaterEqual(query5.count(exact=False), 1) 

2743 self.assertGreaterEqual(query5.count(exact=True), 1) 

2744 self.assertFalse(list(query5.explain_no_results())) 

2745 # This query applies a selection that yields no results, fully in the 

2746 # database. Explaining why it fails involves traversing the relation 

2747 # tree and running a LIMIT 1 query at each level that has the potential 

2748 # to remove rows. 

2749 query6 = registry.queryDimensionRecords( 

2750 "detector", where="detector.purpose = 'no-purpose'", instrument="Cam1" 

2751 ) 

2752 self.assertEqual(query6.count(exact=True), 0) 

2753 messages = query6.explain_no_results() 

2754 self.assertTrue(messages) 

2755 self.assertTrue(any("no-purpose" in message for message in messages)) 

2756 

2757 def testQueryDataIdsExpressionError(self): 

2758 """Test error checking of 'where' expressions in queryDataIds.""" 

2759 registry = self.makeRegistry() 

2760 self.loadData(registry, "base.yaml") 

2761 bind = {"time": astropy.time.Time("2020-01-01T01:00:00", format="isot", scale="tai")} 

2762 with self.assertRaisesRegex(LookupError, r"No dimension element with name 'foo' in 'foo\.bar'\."): 

2763 registry.queryDataIds(["detector"], where="foo.bar = 12") 

2764 with self.assertRaisesRegex( 

2765 LookupError, "Dimension element name cannot be inferred in this context." 

2766 ): 

2767 registry.queryDataIds(["detector"], where="timespan.end < time", bind=bind) 

2768 

2769 def testQueryDataIdsOrderBy(self): 

2770 """Test order_by and limit on result returned by queryDataIds().""" 

2771 registry = self.makeRegistry() 

2772 self.loadData(registry, "base.yaml") 

2773 self.loadData(registry, "datasets.yaml") 

2774 self.loadData(registry, "spatial.yaml") 

2775 

2776 def do_query(dimensions=("visit", "tract"), datasets=None, collections=None): 

2777 return registry.queryDataIds( 

2778 dimensions, datasets=datasets, collections=collections, instrument="Cam1", skymap="SkyMap1" 

2779 ) 

2780 

2781 Test = namedtuple( 

2782 "testQueryDataIdsOrderByTest", 

2783 ("order_by", "keys", "result", "limit", "datasets", "collections"), 

2784 defaults=(None, None, None), 

2785 ) 

2786 

2787 test_data = ( 

2788 Test("tract,visit", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))), 

2789 Test("-tract,visit", "tract,visit", ((1, 2), (1, 2), (0, 1), (0, 1), (0, 2), (0, 2))), 

2790 Test("tract,-visit", "tract,visit", ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2))), 

2791 Test("-tract,-visit", "tract,visit", ((1, 2), (1, 2), (0, 2), (0, 2), (0, 1), (0, 1))), 

2792 Test( 

2793 "tract.id,visit.id", 

2794 "tract,visit", 

2795 ((0, 1), (0, 1), (0, 2)), 

2796 limit=(3,), 

2797 ), 

2798 Test("-tract,-visit", "tract,visit", ((1, 2), (1, 2), (0, 2)), limit=(3,)), 

2799 Test("tract,visit", "tract,visit", ((0, 2), (1, 2), (1, 2)), limit=(3, 3)), 

2800 Test("-tract,-visit", "tract,visit", ((0, 1),), limit=(3, 5)), 

2801 Test( 

2802 "tract,visit.exposure_time", "tract,visit", ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2)) 

2803 ), 

2804 Test( 

2805 "-tract,-visit.exposure_time", "tract,visit", ((1, 2), (1, 2), (0, 1), (0, 1), (0, 2), (0, 2)) 

2806 ), 

2807 Test("tract,-exposure_time", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))), 

2808 Test("tract,visit.name", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))), 

2809 Test( 

2810 "tract,-timespan.begin,timespan.end", 

2811 "tract,visit", 

2812 ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2)), 

2813 ), 

2814 Test("visit.day_obs,exposure.day_obs", "visit,exposure", ()), 

2815 Test("visit.timespan.begin,-exposure.timespan.begin", "visit,exposure", ()), 

2816 Test( 

2817 "tract,detector", 

2818 "tract,detector", 

2819 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)), 

2820 datasets="flat", 

2821 collections="imported_r", 

2822 ), 

2823 Test( 

2824 "tract,detector.full_name", 

2825 "tract,detector", 

2826 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)), 

2827 datasets="flat", 

2828 collections="imported_r", 

2829 ), 

2830 Test( 

2831 "tract,detector.raft,detector.name_in_raft", 

2832 "tract,detector", 

2833 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)), 

2834 datasets="flat", 

2835 collections="imported_r", 

2836 ), 

2837 ) 

2838 

2839 for test in test_data: 

2840 order_by = test.order_by.split(",") 

2841 keys = test.keys.split(",") 

2842 query = do_query(keys, test.datasets, test.collections).order_by(*order_by) 

2843 if test.limit is not None: 

2844 query = query.limit(*test.limit) 

2845 dataIds = tuple(tuple(dataId[k] for k in keys) for dataId in query) 

2846 self.assertEqual(dataIds, test.result) 

2847 

2848 # and materialize 

2849 query = do_query(keys).order_by(*order_by) 

2850 if test.limit is not None: 

2851 query = query.limit(*test.limit) 

2852 with self.assertRaises(RelationalAlgebraError): 

2853 with query.materialize(): 

2854 pass 

2855 

2856 # errors in a name 

2857 for order_by in ("", "-"): 

2858 with self.assertRaisesRegex(ValueError, "Empty dimension name in ORDER BY"): 

2859 list(do_query().order_by(order_by)) 

2860 

2861 for order_by in ("undimension.name", "-undimension.name"): 

2862 with self.assertRaisesRegex(ValueError, "Unknown dimension element 'undimension'"): 

2863 list(do_query().order_by(order_by)) 

2864 

2865 for order_by in ("attract", "-attract"): 

2866 with self.assertRaisesRegex(ValueError, "Metadata 'attract' cannot be found in any dimension"): 

2867 list(do_query().order_by(order_by)) 

2868 

2869 with self.assertRaisesRegex(ValueError, "Metadata 'exposure_time' exists in more than one dimension"): 

2870 list(do_query(("exposure", "visit")).order_by("exposure_time")) 

2871 

2872 with self.assertRaisesRegex( 

2873 ValueError, 

2874 r"Timespan exists in more than one dimension element \(exposure, visit\); " 

2875 r"qualify timespan with specific dimension name\.", 

2876 ): 

2877 list(do_query(("exposure", "visit")).order_by("timespan.begin")) 

2878 

2879 with self.assertRaisesRegex( 

2880 ValueError, "Cannot find any temporal dimension element for 'timespan.begin'" 

2881 ): 

2882 list(do_query("tract").order_by("timespan.begin")) 

2883 

2884 with self.assertRaisesRegex(ValueError, "Cannot use 'timespan.begin' with non-temporal element"): 

2885 list(do_query("tract").order_by("tract.timespan.begin")) 

2886 

2887 with self.assertRaisesRegex(ValueError, "Field 'name' does not exist in 'tract'."): 

2888 list(do_query("tract").order_by("tract.name")) 

2889 

2890 with self.assertRaisesRegex( 

2891 ValueError, r"Unknown dimension element 'timestamp'; perhaps you meant 'timespan.begin'\?" 

2892 ): 

2893 list(do_query("visit").order_by("timestamp.begin")) 

2894 

2895 def testQueryDataIdsGovernorExceptions(self): 

2896 """Test exceptions raised by queryDataIds() for incorrect governors.""" 

2897 registry = self.makeRegistry() 

2898 self.loadData(registry, "base.yaml") 

2899 self.loadData(registry, "datasets.yaml") 

2900 self.loadData(registry, "spatial.yaml") 

2901 

2902 def do_query(dimensions, dataId=None, where="", bind=None, **kwargs): 

2903 return registry.queryDataIds(dimensions, dataId=dataId, where=where, bind=bind, **kwargs) 

2904 

2905 Test = namedtuple( 

2906 "testQueryDataIdExceptionsTest", 

2907 ("dimensions", "dataId", "where", "bind", "kwargs", "exception", "count"), 

2908 defaults=(None, None, None, {}, None, 0), 

2909 ) 

2910 

2911 test_data = ( 

2912 Test("tract,visit", count=6), 

2913 Test("tract,visit", kwargs={"instrument": "Cam1", "skymap": "SkyMap1"}, count=6), 

2914 Test( 

2915 "tract,visit", kwargs={"instrument": "Cam2", "skymap": "SkyMap1"}, exception=DataIdValueError 

2916 ), 

2917 Test("tract,visit", dataId={"instrument": "Cam1", "skymap": "SkyMap1"}, count=6), 

2918 Test( 

2919 "tract,visit", dataId={"instrument": "Cam1", "skymap": "SkyMap2"}, exception=DataIdValueError 

2920 ), 

2921 Test("tract,visit", where="instrument='Cam1' AND skymap='SkyMap1'", count=6), 

2922 Test("tract,visit", where="instrument='Cam1' AND skymap='SkyMap5'", exception=DataIdValueError), 

2923 Test( 

2924 "tract,visit", 

2925 where="instrument=cam AND skymap=map", 

2926 bind={"cam": "Cam1", "map": "SkyMap1"}, 

2927 count=6, 

2928 ), 

2929 Test( 

2930 "tract,visit", 

2931 where="instrument=cam AND skymap=map", 

2932 bind={"cam": "Cam", "map": "SkyMap"}, 

2933 exception=DataIdValueError, 

2934 ), 

2935 ) 

2936 

2937 for test in test_data: 

2938 dimensions = test.dimensions.split(",") 

2939 if test.exception: 

2940 with self.assertRaises(test.exception): 

2941 do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs).count() 

2942 else: 

2943 query = do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs) 

2944 self.assertEqual(query.count(discard=True), test.count) 

2945 

2946 # and materialize 

2947 if test.exception: 

2948 with self.assertRaises(test.exception): 

2949 query = do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs) 

2950 with query.materialize() as materialized: 

2951 materialized.count(discard=True) 

2952 else: 

2953 query = do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs) 

2954 with query.materialize() as materialized: 

2955 self.assertEqual(materialized.count(discard=True), test.count) 

2956 

2957 def testQueryDimensionRecordsOrderBy(self): 

2958 """Test order_by and limit on result returned by 

2959 queryDimensionRecords(). 

2960 """ 

2961 registry = self.makeRegistry() 

2962 self.loadData(registry, "base.yaml") 

2963 self.loadData(registry, "datasets.yaml") 

2964 self.loadData(registry, "spatial.yaml") 

2965 

2966 def do_query(element, datasets=None, collections=None): 

2967 return registry.queryDimensionRecords( 

2968 element, instrument="Cam1", datasets=datasets, collections=collections 

2969 ) 

2970 

2971 query = do_query("detector") 

2972 self.assertEqual(len(list(query)), 4) 

2973 

2974 Test = namedtuple( 

2975 "testQueryDataIdsOrderByTest", 

2976 ("element", "order_by", "result", "limit", "datasets", "collections"), 

2977 defaults=(None, None, None), 

2978 ) 

2979 

2980 test_data = ( 

2981 Test("detector", "detector", (1, 2, 3, 4)), 

2982 Test("detector", "-detector", (4, 3, 2, 1)), 

2983 Test("detector", "raft,-name_in_raft", (2, 1, 4, 3)), 

2984 Test("detector", "-detector.purpose", (4,), limit=(1,)), 

2985 Test("detector", "-purpose,detector.raft,name_in_raft", (2, 3), limit=(2, 2)), 

2986 Test("visit", "visit", (1, 2)), 

2987 Test("visit", "-visit.id", (2, 1)), 

2988 Test("visit", "zenith_angle", (1, 2)), 

2989 Test("visit", "-visit.name", (2, 1)), 

2990 Test("visit", "day_obs,-timespan.begin", (2, 1)), 

2991 ) 

2992 

2993 for test in test_data: 

2994 order_by = test.order_by.split(",") 

2995 query = do_query(test.element).order_by(*order_by) 

2996 if test.limit is not None: 

2997 query = query.limit(*test.limit) 

2998 dataIds = tuple(rec.id for rec in query) 

2999 self.assertEqual(dataIds, test.result) 

3000 

3001 # errors in a name 

3002 for order_by in ("", "-"): 

3003 with self.assertRaisesRegex(ValueError, "Empty dimension name in ORDER BY"): 

3004 list(do_query("detector").order_by(order_by)) 

3005 

3006 for order_by in ("undimension.name", "-undimension.name"): 

3007 with self.assertRaisesRegex(ValueError, "Element name mismatch: 'undimension'"): 

3008 list(do_query("detector").order_by(order_by)) 

3009 

3010 for order_by in ("attract", "-attract"): 

3011 with self.assertRaisesRegex(ValueError, "Field 'attract' does not exist in 'detector'."): 

3012 list(do_query("detector").order_by(order_by)) 

3013 

3014 for order_by in ("timestamp.begin", "-timestamp.begin"): 

3015 with self.assertRaisesRegex( 

3016 ValueError, 

3017 r"Element name mismatch: 'timestamp' instead of 'visit'; " 

3018 r"perhaps you meant 'timespan.begin'\?", 

3019 ): 

3020 list(do_query("visit").order_by(order_by)) 

3021 

3022 def testQueryDimensionRecordsExceptions(self): 

3023 """Test exceptions raised by queryDimensionRecords().""" 

3024 registry = self.makeRegistry() 

3025 self.loadData(registry, "base.yaml") 

3026 self.loadData(registry, "datasets.yaml") 

3027 self.loadData(registry, "spatial.yaml") 

3028 

3029 result = registry.queryDimensionRecords("detector") 

3030 self.assertEqual(result.count(), 4) 

3031 result = registry.queryDimensionRecords("detector", instrument="Cam1") 

3032 self.assertEqual(result.count(), 4) 

3033 result = registry.queryDimensionRecords("detector", dataId={"instrument": "Cam1"}) 

3034 self.assertEqual(result.count(), 4) 

3035 result = registry.queryDimensionRecords("detector", where="instrument='Cam1'") 

3036 self.assertEqual(result.count(), 4) 

3037 result = registry.queryDimensionRecords("detector", where="instrument=instr", bind={"instr": "Cam1"}) 

3038 self.assertEqual(result.count(), 4) 

3039 

3040 with self.assertRaisesRegex(DataIdValueError, "dimension instrument"): 

3041 result = registry.queryDimensionRecords("detector", instrument="NotCam1") 

3042 result.count() 

3043 

3044 with self.assertRaisesRegex(DataIdValueError, "dimension instrument"): 

3045 result = registry.queryDimensionRecords("detector", dataId={"instrument": "NotCam1"}) 

3046 result.count() 

3047 

3048 with self.assertRaisesRegex(DataIdValueError, "Unknown values specified for governor dimension"): 

3049 result = registry.queryDimensionRecords("detector", where="instrument='NotCam1'") 

3050 result.count() 

3051 

3052 with self.assertRaisesRegex(DataIdValueError, "Unknown values specified for governor dimension"): 

3053 result = registry.queryDimensionRecords( 

3054 "detector", where="instrument=instr", bind={"instr": "NotCam1"} 

3055 ) 

3056 result.count() 

3057 

3058 def testDatasetConstrainedDimensionRecordQueries(self): 

3059 """Test that queryDimensionRecords works even when given a dataset 

3060 constraint whose dimensions extend beyond the requested dimension 

3061 element's. 

3062 """ 

3063 registry = self.makeRegistry() 

3064 self.loadData(registry, "base.yaml") 

3065 self.loadData(registry, "datasets.yaml") 

3066 # Query for physical_filter dimension records, using a dataset that 

3067 # has both physical_filter and dataset dimensions. 

3068 records = registry.queryDimensionRecords( 

3069 "physical_filter", 

3070 datasets=["flat"], 

3071 collections="imported_r", 

3072 ) 

3073 self.assertEqual({record.name for record in records}, {"Cam1-R1", "Cam1-R2"}) 

3074 # Trying to constrain by all dataset types is an error. 

3075 with self.assertRaises(TypeError): 

3076 list(registry.queryDimensionRecords("physical_filter", datasets=..., collections="imported_r")) 

3077 

3078 def testSkyPixDatasetQueries(self): 

3079 """Test that we can build queries involving skypix dimensions as long 

3080 as a dataset type that uses those dimensions is included. 

3081 """ 

3082 registry = self.makeRegistry() 

3083 self.loadData(registry, "base.yaml") 

3084 dataset_type = DatasetType( 

3085 "a", dimensions=["htm7", "instrument"], universe=registry.dimensions, storageClass="int" 

3086 ) 

3087 registry.registerDatasetType(dataset_type) 

3088 run = "r" 

3089 registry.registerRun(run) 

3090 # First try queries where there are no datasets; the concern is whether 

3091 # we can even build and execute these queries without raising, even 

3092 # when "doomed" query shortcuts are in play. 

3093 self.assertFalse( 

3094 list(registry.queryDataIds(["htm7", "instrument"], datasets=dataset_type, collections=run)) 

3095 ) 

3096 self.assertFalse(list(registry.queryDatasets(dataset_type, collections=run))) 

3097 # Now add a dataset and see that we can get it back. 

3098 htm7 = registry.dimensions.skypix["htm"][7].pixelization 

3099 data_id = registry.expandDataId(instrument="Cam1", htm7=htm7.universe()[0][0]) 

3100 (ref,) = registry.insertDatasets(dataset_type, [data_id], run=run) 

3101 self.assertEqual( 

3102 set(registry.queryDataIds(["htm7", "instrument"], datasets=dataset_type, collections=run)), 

3103 {data_id}, 

3104 ) 

3105 self.assertEqual(set(registry.queryDatasets(dataset_type, collections=run)), {ref}) 

3106 

3107 def testDatasetIdFactory(self): 

3108 """Simple test for DatasetIdFactory, mostly to catch potential changes 

3109 in its API. 

3110 """ 

3111 registry = self.makeRegistry() 

3112 factory = DatasetIdFactory() 

3113 dataset_type = DatasetType( 

3114 "datasetType", 

3115 dimensions=["detector", "instrument"], 

3116 universe=registry.dimensions, 

3117 storageClass="int", 

3118 ) 

3119 run = "run" 

3120 data_id = DataCoordinate.standardize( 

3121 instrument="Cam1", detector=1, dimensions=dataset_type.dimensions 

3122 ) 

3123 

3124 datasetId = factory.makeDatasetId(run, dataset_type, data_id, DatasetIdGenEnum.UNIQUE) 

3125 self.assertIsInstance(datasetId, uuid.UUID) 

3126 self.assertEqual(datasetId.version, 4) 

3127 

3128 datasetId = factory.makeDatasetId(run, dataset_type, data_id, DatasetIdGenEnum.DATAID_TYPE) 

3129 self.assertIsInstance(datasetId, uuid.UUID) 

3130 self.assertEqual(datasetId.version, 5) 

3131 

3132 datasetId = factory.makeDatasetId(run, dataset_type, data_id, DatasetIdGenEnum.DATAID_TYPE_RUN) 

3133 self.assertIsInstance(datasetId, uuid.UUID) 

3134 self.assertEqual(datasetId.version, 5) 

3135 

3136 def testExposureQueries(self): 

3137 """Test query methods using arguments sourced from the exposure log 

3138 service. 

3139 

3140 The most complete test dataset currently available to daf_butler tests 

3141 is hsc-rc2-subset.yaml export (which is unfortunately distinct from the 

3142 the lsst/rc2_subset GitHub repo), but that does not have 'exposure' 

3143 dimension records as it was focused on providing nontrivial spatial 

3144 overlaps between visit+detector and tract+patch. So in this test we 

3145 need to translate queries that originally used the exposure dimension 

3146 to use the (very similar) visit dimension instead. 

3147 """ 

3148 registry = self.makeRegistry() 

3149 self.loadData(registry, "hsc-rc2-subset.yaml") 

3150 self.assertEqual( 

3151 [ 

3152 record.id 

3153 for record in registry.queryDimensionRecords("visit", instrument="HSC") 

3154 .order_by("id") 

3155 .limit(5) 

3156 ], 

3157 [318, 322, 326, 330, 332], 

3158 ) 

3159 self.assertEqual( 

3160 [ 

3161 data_id["visit"] 

3162 for data_id in registry.queryDataIds(["visit"], instrument="HSC").order_by("id").limit(5) 

3163 ], 

3164 [318, 322, 326, 330, 332], 

3165 ) 

3166 self.assertEqual( 

3167 [ 

3168 record.id 

3169 for record in registry.queryDimensionRecords("detector", instrument="HSC") 

3170 .order_by("full_name") 

3171 .limit(5) 

3172 ], 

3173 [73, 72, 71, 70, 65], 

3174 ) 

3175 self.assertEqual( 

3176 [ 

3177 data_id["detector"] 

3178 for data_id in registry.queryDataIds(["detector"], instrument="HSC") 

3179 .order_by("full_name") 

3180 .limit(5) 

3181 ], 

3182 [73, 72, 71, 70, 65], 

3183 ) 

3184 

3185 def test_long_query_names(self) -> None: 

3186 """Test that queries involving very long names are handled correctly. 

3187 

3188 This is especially important for PostgreSQL, which truncates symbols 

3189 longer than 64 chars, but it's worth testing for all DBs. 

3190 """ 

3191 registry = self.makeRegistry() 

3192 name = "abcd" * 17 

3193 registry.registerDatasetType( 

3194 DatasetType( 

3195 name, 

3196 dimensions=(), 

3197 storageClass="Exposure", 

3198 universe=registry.dimensions, 

3199 ) 

3200 ) 

3201 # Need to search more than one collection actually containing a 

3202 # matching dataset to avoid optimizations that sidestep bugs due to 

3203 # truncation by making findFirst=True a no-op. 

3204 run1 = "run1" 

3205 registry.registerRun(run1) 

3206 run2 = "run2" 

3207 registry.registerRun(run2) 

3208 (ref1,) = registry.insertDatasets(name, [DataCoordinate.make_empty(registry.dimensions)], run1) 

3209 registry.insertDatasets(name, [DataCoordinate.make_empty(registry.dimensions)], run2) 

3210 self.assertEqual( 

3211 set(registry.queryDatasets(name, collections=[run1, run2], findFirst=True)), 

3212 {ref1}, 

3213 ) 

3214 

3215 def test_skypix_constraint_queries(self) -> None: 

3216 """Test queries spatially constrained by a skypix data ID.""" 

3217 registry = self.makeRegistry() 

3218 self.loadData(registry, "hsc-rc2-subset.yaml") 

3219 patch_regions = { 

3220 (data_id["tract"], data_id["patch"]): data_id.region 

3221 for data_id in registry.queryDataIds(["patch"]).expanded() 

3222 } 

3223 skypix_dimension: SkyPixDimension = registry.dimensions["htm11"] 

3224 # This check ensures the test doesn't become trivial due to a config 

3225 # change; if it does, just pick a different HTML level. 

3226 self.assertNotEqual(skypix_dimension, registry.dimensions.commonSkyPix) 

3227 # Gather all skypix IDs that definitely overlap at least one of these 

3228 # patches. 

3229 relevant_skypix_ids = lsst.sphgeom.RangeSet() 

3230 for patch_region in patch_regions.values(): 

3231 relevant_skypix_ids |= skypix_dimension.pixelization.interior(patch_region) 

3232 # Look for a "nontrivial" skypix_id that overlaps at least one patch 

3233 # and does not overlap at least one other patch. 

3234 for skypix_id in itertools.chain.from_iterable( 

3235 range(begin, end) for begin, end in relevant_skypix_ids 

3236 ): 

3237 skypix_region = skypix_dimension.pixelization.pixel(skypix_id) 

3238 overlapping_patches = { 

3239 patch_key 

3240 for patch_key, patch_region in patch_regions.items() 

3241 if not patch_region.isDisjointFrom(skypix_region) 

3242 } 

3243 if overlapping_patches and overlapping_patches != patch_regions.keys(): 

3244 break 

3245 else: 

3246 raise RuntimeError("Could not find usable skypix ID for this dimension configuration.") 

3247 self.assertEqual( 

3248 { 

3249 (data_id["tract"], data_id["patch"]) 

3250 for data_id in registry.queryDataIds( 

3251 ["patch"], 

3252 dataId={skypix_dimension.name: skypix_id}, 

3253 ) 

3254 }, 

3255 overlapping_patches, 

3256 ) 

3257 # Test that a three-way join that includes the common skypix system in 

3258 # the dimensions doesn't generate redundant join terms in the query. 

3259 full_data_ids = set( 

3260 registry.queryDataIds( 

3261 ["tract", "visit", "htm7"], skymap="hsc_rings_v1", instrument="HSC" 

3262 ).expanded() 

3263 ) 

3264 self.assertGreater(len(full_data_ids), 0) 

3265 for data_id in full_data_ids: 

3266 self.assertFalse(data_id.records["tract"].region.isDisjointFrom(data_id.records["htm7"].region)) 

3267 self.assertFalse(data_id.records["visit"].region.isDisjointFrom(data_id.records["htm7"].region)) 

3268 

3269 def test_spatial_constraint_queries(self) -> None: 

3270 """Test queries in which one spatial dimension in the constraint (data 

3271 ID or ``where`` string) constrains a different spatial dimension in the 

3272 query result columns. 

3273 """ 

3274 registry = self.makeRegistry() 

3275 self.loadData(registry, "hsc-rc2-subset.yaml") 

3276 patch_regions = { 

3277 (data_id["tract"], data_id["patch"]): data_id.region 

3278 for data_id in registry.queryDataIds(["patch"]).expanded() 

3279 } 

3280 observation_regions = { 

3281 (data_id["visit"], data_id["detector"]): data_id.region 

3282 for data_id in registry.queryDataIds(["visit", "detector"]).expanded() 

3283 } 

3284 all_combos = { 

3285 (patch_key, observation_key) 

3286 for patch_key, observation_key in itertools.product(patch_regions, observation_regions) 

3287 } 

3288 overlapping_combos = { 

3289 (patch_key, observation_key) 

3290 for patch_key, observation_key in all_combos 

3291 if not patch_regions[patch_key].isDisjointFrom(observation_regions[observation_key]) 

3292 } 

3293 # Check a direct spatial join with no constraint first. 

3294 self.assertEqual( 

3295 { 

3296 ((data_id["tract"], data_id["patch"]), (data_id["visit"], data_id["detector"])) 

3297 for data_id in registry.queryDataIds(["patch", "visit", "detector"]) 

3298 }, 

3299 overlapping_combos, 

3300 ) 

3301 overlaps_by_patch: defaultdict[tuple[int, int], set[tuple[str, str]]] = defaultdict(set) 

3302 overlaps_by_observation: defaultdict[tuple[int, int], set[tuple[str, str]]] = defaultdict(set) 

3303 for patch_key, observation_key in overlapping_combos: 

3304 overlaps_by_patch[patch_key].add(observation_key) 

3305 overlaps_by_observation[observation_key].add(patch_key) 

3306 # Find patches and observations that overlap at least one of the other 

3307 # but not all of the other. 

3308 nontrivial_patch = next( 

3309 iter( 

3310 patch_key 

3311 for patch_key, observation_keys in overlaps_by_patch.items() 

3312 if observation_keys and observation_keys != observation_regions.keys() 

3313 ) 

3314 ) 

3315 nontrivial_observation = next( 

3316 iter( 

3317 observation_key 

3318 for observation_key, patch_keys in overlaps_by_observation.items() 

3319 if patch_keys and patch_keys != patch_regions.keys() 

3320 ) 

3321 ) 

3322 # Use the nontrivial patches and observations as constraints on the 

3323 # other dimensions in various ways, first via a 'where' expression. 

3324 # It's better in general to us 'bind' instead of f-strings, but these 

3325 # all integers so there are no quoting concerns. 

3326 self.assertEqual( 

3327 { 

3328 (data_id["visit"], data_id["detector"]) 

3329 for data_id in registry.queryDataIds( 

3330 ["visit", "detector"], 

3331 where=f"tract={nontrivial_patch[0]} AND patch={nontrivial_patch[1]}", 

3332 skymap="hsc_rings_v1", 

3333 ) 

3334 }, 

3335 overlaps_by_patch[nontrivial_patch], 

3336 ) 

3337 self.assertEqual( 

3338 { 

3339 (data_id["tract"], data_id["patch"]) 

3340 for data_id in registry.queryDataIds( 

3341 ["patch"], 

3342 where=f"visit={nontrivial_observation[0]} AND detector={nontrivial_observation[1]}", 

3343 instrument="HSC", 

3344 ) 

3345 }, 

3346 overlaps_by_observation[nontrivial_observation], 

3347 ) 

3348 # and then via the dataId argument. 

3349 self.assertEqual( 

3350 { 

3351 (data_id["visit"], data_id["detector"]) 

3352 for data_id in registry.queryDataIds( 

3353 ["visit", "detector"], 

3354 dataId={ 

3355 "tract": nontrivial_patch[0], 

3356 "patch": nontrivial_patch[1], 

3357 }, 

3358 skymap="hsc_rings_v1", 

3359 ) 

3360 }, 

3361 overlaps_by_patch[nontrivial_patch], 

3362 ) 

3363 self.assertEqual( 

3364 { 

3365 (data_id["tract"], data_id["patch"]) 

3366 for data_id in registry.queryDataIds( 

3367 ["patch"], 

3368 dataId={ 

3369 "visit": nontrivial_observation[0], 

3370 "detector": nontrivial_observation[1], 

3371 }, 

3372 instrument="HSC", 

3373 ) 

3374 }, 

3375 overlaps_by_observation[nontrivial_observation], 

3376 ) 

3377 

3378 def test_query_projection_drop_postprocessing(self) -> None: 

3379 """Test that projections and deduplications on query objects can 

3380 drop post-query region filtering to ensure the query remains in 

3381 the SQL engine. 

3382 """ 

3383 registry = self.makeRegistry() 

3384 self.loadData(registry, "base.yaml") 

3385 self.loadData(registry, "spatial.yaml") 

3386 

3387 def pop_transfer(tree: Relation) -> Relation: 

3388 """If a relation tree terminates with a transfer to a new engine, 

3389 return the relation prior to that transfer. If not, return the 

3390 original relation. 

3391 

3392 Parameters 

3393 ---------- 

3394 tree : `Relation` 

3395 The relation tree to modify. 

3396 """ 

3397 match tree: 

3398 case Transfer(target=target): 

3399 return target 

3400 case _: 

3401 return tree 

3402 

3403 # There's no public way to get a Query object yet, so we get one from a 

3404 # DataCoordinateQueryResults private attribute. When a public API is 

3405 # available this test should use it. 

3406 query = registry.queryDataIds(["visit", "detector", "tract", "patch"])._query 

3407 # We expect this query to terminate in the iteration engine originally, 

3408 # because region-filtering is necessary. 

3409 self.assertIsInstance(pop_transfer(query.relation).engine, iteration.Engine) 

3410 # If we deduplicate, we usually have to do that downstream of the 

3411 # filtering. That means the deduplication has to happen in the 

3412 # iteration engine. 

3413 self.assertIsInstance(pop_transfer(query.projected(unique=True).relation).engine, iteration.Engine) 

3414 # If we pass drop_postprocessing, we instead drop the region filtering 

3415 # so the deduplication can happen in SQL (though there might still be 

3416 # transfer to iteration at the tail of the tree that we can ignore; 

3417 # that's what the pop_transfer takes care of here). 

3418 self.assertIsInstance( 

3419 pop_transfer(query.projected(unique=True, drop_postprocessing=True).relation).engine, 

3420 sql.Engine, 

3421 ) 

3422 

3423 def test_query_find_datasets_drop_postprocessing(self) -> None: 

3424 """Test that DataCoordinateQueryResults.findDatasets avoids commutator 

3425 problems with the FindFirstDataset relation operation. 

3426 """ 

3427 # Setup: load some visit, tract, and patch records, and insert two 

3428 # datasets with dimensions {visit, patch}, with one in each of two 

3429 # RUN collections. 

3430 registry = self.makeRegistry() 

3431 self.loadData(registry, "base.yaml") 

3432 self.loadData(registry, "spatial.yaml") 

3433 storage_class = StorageClass("Warpy") 

3434 registry.storageClasses.registerStorageClass(storage_class) 

3435 dataset_type = DatasetType( 

3436 "warp", {"visit", "patch"}, storageClass=storage_class, universe=registry.dimensions 

3437 ) 

3438 registry.registerDatasetType(dataset_type) 

3439 (data_id,) = registry.queryDataIds(["visit", "patch"]).limit(1) 

3440 registry.registerRun("run1") 

3441 registry.registerRun("run2") 

3442 (ref1,) = registry.insertDatasets(dataset_type, [data_id], run="run1") 

3443 (ref2,) = registry.insertDatasets(dataset_type, [data_id], run="run2") 

3444 # Query for the dataset using queryDataIds(...).findDatasets(...) 

3445 # against only one of the two collections. This should work even 

3446 # though the relation returned by queryDataIds ends with 

3447 # iteration-engine region-filtering, because we can recognize before 

3448 # running the query that there is only one collecton to search and 

3449 # hence the (default) findFirst=True is irrelevant, and joining in the 

3450 # dataset query commutes past the iteration-engine postprocessing. 

3451 query1 = registry.queryDataIds( 

3452 {"visit", "patch"}, visit=data_id["visit"], instrument=data_id["instrument"] 

3453 ) 

3454 self.assertEqual( 

3455 set(query1.findDatasets(dataset_type.name, collections=["run1"])), 

3456 {ref1}, 

3457 ) 

3458 # Query for the dataset using queryDataIds(...).findDatasets(...) 

3459 # against both collections. This can only work if the FindFirstDataset 

3460 # operation can be commuted past the iteration-engine options into SQL. 

3461 query2 = registry.queryDataIds( 

3462 {"visit", "patch"}, visit=data_id["visit"], instrument=data_id["instrument"] 

3463 ) 

3464 self.assertEqual( 

3465 set(query2.findDatasets(dataset_type.name, collections=["run2", "run1"])), 

3466 {ref2}, 

3467 ) 

3468 

3469 def test_query_empty_collections(self) -> None: 

3470 """Test for registry query methods with empty collections. The methods 

3471 should return empty result set (or None when applicable) and provide 

3472 "doomed" diagnostics. 

3473 """ 

3474 registry = self.makeRegistry() 

3475 self.loadData(registry, "base.yaml") 

3476 self.loadData(registry, "datasets.yaml") 

3477 

3478 # Tests for registry.findDataset() 

3479 with self.assertRaises(NoDefaultCollectionError): 

3480 registry.findDataset("bias", instrument="Cam1", detector=1) 

3481 self.assertIsNotNone(registry.findDataset("bias", instrument="Cam1", detector=1, collections=...)) 

3482 self.assertIsNone(registry.findDataset("bias", instrument="Cam1", detector=1, collections=[])) 

3483 

3484 # Tests for registry.queryDatasets() 

3485 with self.assertRaises(NoDefaultCollectionError): 

3486 registry.queryDatasets("bias") 

3487 self.assertTrue(list(registry.queryDatasets("bias", collections=...))) 

3488 

3489 result = registry.queryDatasets("bias", collections=[]) 

3490 self.assertEqual(len(list(result)), 0) 

3491 messages = list(result.explain_no_results()) 

3492 self.assertTrue(messages) 

3493 self.assertTrue(any("because collection list is empty" in message for message in messages)) 

3494 

3495 # Tests for registry.queryDataIds() 

3496 with self.assertRaises(NoDefaultCollectionError): 

3497 registry.queryDataIds("detector", datasets="bias") 

3498 self.assertTrue(list(registry.queryDataIds("detector", datasets="bias", collections=...))) 

3499 

3500 result = registry.queryDataIds("detector", datasets="bias", collections=[]) 

3501 self.assertEqual(len(list(result)), 0) 

3502 messages = list(result.explain_no_results()) 

3503 self.assertTrue(messages) 

3504 self.assertTrue(any("because collection list is empty" in message for message in messages)) 

3505 

3506 # Tests for registry.queryDimensionRecords() 

3507 with self.assertRaises(NoDefaultCollectionError): 

3508 registry.queryDimensionRecords("detector", datasets="bias") 

3509 self.assertTrue(list(registry.queryDimensionRecords("detector", datasets="bias", collections=...))) 

3510 

3511 result = registry.queryDimensionRecords("detector", datasets="bias", collections=[]) 

3512 self.assertEqual(len(list(result)), 0) 

3513 messages = list(result.explain_no_results()) 

3514 self.assertTrue(messages) 

3515 self.assertTrue(any("because collection list is empty" in message for message in messages)) 

3516 

3517 def test_dataset_followup_spatial_joins(self) -> None: 

3518 """Test queryDataIds(...).findRelatedDatasets(...) where a spatial join 

3519 is involved. 

3520 """ 

3521 registry = self.makeRegistry() 

3522 self.loadData(registry, "base.yaml") 

3523 self.loadData(registry, "spatial.yaml") 

3524 pvi_dataset_type = DatasetType( 

3525 "pvi", {"visit", "detector"}, storageClass="StructuredDataDict", universe=registry.dimensions 

3526 ) 

3527 registry.registerDatasetType(pvi_dataset_type) 

3528 collection = "datasets" 

3529 registry.registerRun(collection) 

3530 (pvi1,) = registry.insertDatasets( 

3531 pvi_dataset_type, [{"instrument": "Cam1", "visit": 1, "detector": 1}], run=collection 

3532 ) 

3533 (pvi2,) = registry.insertDatasets( 

3534 pvi_dataset_type, [{"instrument": "Cam1", "visit": 1, "detector": 2}], run=collection 

3535 ) 

3536 (pvi3,) = registry.insertDatasets( 

3537 pvi_dataset_type, [{"instrument": "Cam1", "visit": 1, "detector": 3}], run=collection 

3538 ) 

3539 self.assertEqual( 

3540 set( 

3541 registry.queryDataIds(["patch"], skymap="SkyMap1", tract=0) 

3542 .expanded() 

3543 .findRelatedDatasets("pvi", [collection]) 

3544 ), 

3545 { 

3546 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=0), pvi1), 

3547 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=0), pvi2), 

3548 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=1), pvi2), 

3549 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=2), pvi1), 

3550 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=2), pvi2), 

3551 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=2), pvi3), 

3552 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=3), pvi2), 

3553 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=4), pvi3), 

3554 }, 

3555 )