Coverage for python/lsst/daf/butler/registry/tests/_registry.py: 5%

1520 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-10-27 09:44 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27from __future__ import annotations 

28 

29from ... import ddl 

30 

31__all__ = ["RegistryTests"] 

32 

33import itertools 

34import logging 

35import os 

36import re 

37import unittest 

38import uuid 

39from abc import ABC, abstractmethod 

40from collections import defaultdict, namedtuple 

41from collections.abc import Iterator 

42from datetime import datetime, timedelta 

43from typing import TYPE_CHECKING 

44 

45import astropy.time 

46import sqlalchemy 

47 

48try: 

49 import numpy as np 

50except ImportError: 

51 np = None 

52 

53import lsst.sphgeom 

54from lsst.daf.relation import Relation, RelationalAlgebraError, Transfer, iteration, sql 

55 

56from ..._dataset_association import DatasetAssociation 

57from ..._dataset_ref import DatasetIdFactory, DatasetIdGenEnum, DatasetRef 

58from ..._dataset_type import DatasetType 

59from ..._named import NamedValueSet 

60from ..._storage_class import StorageClass 

61from ..._timespan import Timespan 

62from ...dimensions import DataCoordinate, DataCoordinateSet, DimensionGraph, SkyPixDimension 

63from .._collection_summary import CollectionSummary 

64from .._collection_type import CollectionType 

65from .._config import RegistryConfig 

66from .._exceptions import ( 

67 ArgumentError, 

68 CollectionError, 

69 CollectionTypeError, 

70 ConflictingDefinitionError, 

71 DataIdValueError, 

72 DatasetTypeError, 

73 InconsistentDataIdError, 

74 MissingCollectionError, 

75 MissingDatasetTypeError, 

76 NoDefaultCollectionError, 

77 OrphanedRecordError, 

78) 

79from ..interfaces import ButlerAttributeExistsError 

80 

81if TYPE_CHECKING: 

82 from ..sql_registry import SqlRegistry 

83 

84 

85class RegistryTests(ABC): 

86 """Generic tests for the `SqlRegistry` class that can be subclassed to 

87 generate tests for different configurations. 

88 """ 

89 

90 collectionsManager: str | None = None 

91 """Name of the collections manager class, if subclass provides value for 

92 this member then it overrides name specified in default configuration 

93 (`str`). 

94 """ 

95 

96 datasetsManager: str | dict[str, str] | None = None 

97 """Name or configuration dictionary of the datasets manager class, if 

98 subclass provides value for this member then it overrides name specified 

99 in default configuration (`str` or `dict`). 

100 """ 

101 

102 @classmethod 

103 @abstractmethod 

104 def getDataDir(cls) -> str: 

105 """Return the root directory containing test data YAML files.""" 

106 raise NotImplementedError() 

107 

108 def makeRegistryConfig(self) -> RegistryConfig: 

109 """Create RegistryConfig used to create a registry. 

110 

111 This method should be called by a subclass from `makeRegistry`. 

112 Returned instance will be pre-configured based on the values of class 

113 members, and default-configured for all other parameters. Subclasses 

114 that need default configuration should just instantiate 

115 `RegistryConfig` directly. 

116 """ 

117 config = RegistryConfig() 

118 if self.collectionsManager: 

119 config["managers", "collections"] = self.collectionsManager 

120 if self.datasetsManager: 

121 config["managers", "datasets"] = self.datasetsManager 

122 return config 

123 

124 @abstractmethod 

125 def makeRegistry(self, share_repo_with: SqlRegistry | None = None) -> SqlRegistry | None: 

126 """Return the SqlRegistry instance to be tested. 

127 

128 Parameters 

129 ---------- 

130 share_repo_with : `SqlRegistry`, optional 

131 If provided, the new registry should point to the same data 

132 repository as this existing registry. 

133 

134 Returns 

135 ------- 

136 registry : `SqlRegistry` 

137 New `SqlRegistry` instance, or `None` *only* if `share_repo_with` 

138 is not `None` and this test case does not support that argument 

139 (e.g. it is impossible with in-memory SQLite DBs). 

140 """ 

141 raise NotImplementedError() 

142 

143 def loadData(self, registry: SqlRegistry, filename: str): 

144 """Load registry test data from ``getDataDir/<filename>``, 

145 which should be a YAML import/export file. 

146 """ 

147 from ...transfers import YamlRepoImportBackend 

148 

149 with open(os.path.join(self.getDataDir(), filename)) as stream: 

150 backend = YamlRepoImportBackend(stream, registry) 

151 backend.register() 

152 backend.load(datastore=None) 

153 

154 def checkQueryResults(self, results, expected): 

155 """Check that a query results object contains expected values. 

156 

157 Parameters 

158 ---------- 

159 results : `DataCoordinateQueryResults` or `DatasetQueryResults` 

160 A lazy-evaluation query results object. 

161 expected : `list` 

162 A list of `DataCoordinate` o `DatasetRef` objects that should be 

163 equal to results of the query, aside from ordering. 

164 """ 

165 self.assertCountEqual(list(results), expected) 

166 self.assertEqual(results.count(), len(expected)) 

167 if expected: 

168 self.assertTrue(results.any()) 

169 else: 

170 self.assertFalse(results.any()) 

171 

172 def testOpaque(self): 

173 """Tests for `SqlRegistry.registerOpaqueTable`, 

174 `SqlRegistry.insertOpaqueData`, `SqlRegistry.fetchOpaqueData`, and 

175 `SqlRegistry.deleteOpaqueData`. 

176 """ 

177 registry = self.makeRegistry() 

178 table = "opaque_table_for_testing" 

179 registry.registerOpaqueTable( 

180 table, 

181 spec=ddl.TableSpec( 

182 fields=[ 

183 ddl.FieldSpec("id", dtype=sqlalchemy.BigInteger, primaryKey=True), 

184 ddl.FieldSpec("name", dtype=sqlalchemy.String, length=16, nullable=False), 

185 ddl.FieldSpec("count", dtype=sqlalchemy.SmallInteger, nullable=True), 

186 ], 

187 ), 

188 ) 

189 rows = [ 

190 {"id": 1, "name": "one", "count": None}, 

191 {"id": 2, "name": "two", "count": 5}, 

192 {"id": 3, "name": "three", "count": 6}, 

193 ] 

194 registry.insertOpaqueData(table, *rows) 

195 self.assertCountEqual(rows, list(registry.fetchOpaqueData(table))) 

196 self.assertEqual(rows[0:1], list(registry.fetchOpaqueData(table, id=1))) 

197 self.assertEqual(rows[1:2], list(registry.fetchOpaqueData(table, name="two"))) 

198 self.assertEqual(rows[0:1], list(registry.fetchOpaqueData(table, id=(1, 3), name=("one", "two")))) 

199 self.assertEqual(rows, list(registry.fetchOpaqueData(table, id=(1, 2, 3)))) 

200 # Test very long IN clause which exceeds sqlite limit on number of 

201 # parameters. SQLite says the limit is 32k but it looks like it is 

202 # much higher. 

203 self.assertEqual(rows, list(registry.fetchOpaqueData(table, id=list(range(300_000))))) 

204 # Two IN clauses, each longer than 1k batch size, first with 

205 # duplicates, second has matching elements in different batches (after 

206 # sorting). 

207 self.assertEqual( 

208 rows[0:2], 

209 list( 

210 registry.fetchOpaqueData( 

211 table, 

212 id=list(range(1000)) + list(range(100, 0, -1)), 

213 name=["one"] + [f"q{i}" for i in range(2200)] + ["two"], 

214 ) 

215 ), 

216 ) 

217 self.assertEqual([], list(registry.fetchOpaqueData(table, id=1, name="two"))) 

218 registry.deleteOpaqueData(table, id=3) 

219 self.assertCountEqual(rows[:2], list(registry.fetchOpaqueData(table))) 

220 registry.deleteOpaqueData(table) 

221 self.assertEqual([], list(registry.fetchOpaqueData(table))) 

222 

223 def testDatasetType(self): 

224 """Tests for `SqlRegistry.registerDatasetType` and 

225 `SqlRegistry.getDatasetType`. 

226 """ 

227 registry = self.makeRegistry() 

228 # Check valid insert 

229 datasetTypeName = "test" 

230 storageClass = StorageClass("testDatasetType") 

231 registry.storageClasses.registerStorageClass(storageClass) 

232 dimensions = registry.dimensions.extract(("instrument", "visit")) 

233 differentDimensions = registry.dimensions.extract(("instrument", "patch")) 

234 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

235 # Inserting for the first time should return True 

236 self.assertTrue(registry.registerDatasetType(inDatasetType)) 

237 outDatasetType1 = registry.getDatasetType(datasetTypeName) 

238 self.assertEqual(outDatasetType1, inDatasetType) 

239 

240 # Re-inserting should work 

241 self.assertFalse(registry.registerDatasetType(inDatasetType)) 

242 # Except when they are not identical 

243 with self.assertRaises(ConflictingDefinitionError): 

244 nonIdenticalDatasetType = DatasetType(datasetTypeName, differentDimensions, storageClass) 

245 registry.registerDatasetType(nonIdenticalDatasetType) 

246 

247 # Template can be None 

248 datasetTypeName = "testNoneTemplate" 

249 storageClass = StorageClass("testDatasetType2") 

250 registry.storageClasses.registerStorageClass(storageClass) 

251 dimensions = registry.dimensions.extract(("instrument", "visit")) 

252 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

253 registry.registerDatasetType(inDatasetType) 

254 outDatasetType2 = registry.getDatasetType(datasetTypeName) 

255 self.assertEqual(outDatasetType2, inDatasetType) 

256 

257 allTypes = set(registry.queryDatasetTypes()) 

258 self.assertEqual(allTypes, {outDatasetType1, outDatasetType2}) 

259 

260 def testDimensions(self): 

261 """Tests for `SqlRegistry.insertDimensionData`, 

262 `SqlRegistry.syncDimensionData`, and `SqlRegistry.expandDataId`. 

263 """ 

264 registry = self.makeRegistry() 

265 dimensionName = "instrument" 

266 dimension = registry.dimensions[dimensionName] 

267 dimensionValue = { 

268 "name": "DummyCam", 

269 "visit_max": 10, 

270 "visit_system": 0, 

271 "exposure_max": 10, 

272 "detector_max": 2, 

273 "class_name": "lsst.pipe.base.Instrument", 

274 } 

275 registry.insertDimensionData(dimensionName, dimensionValue) 

276 # Inserting the same value twice should fail 

277 with self.assertRaises(sqlalchemy.exc.IntegrityError): 

278 registry.insertDimensionData(dimensionName, dimensionValue) 

279 # expandDataId should retrieve the record we just inserted 

280 self.assertEqual( 

281 registry.expandDataId(instrument="DummyCam", graph=dimension.graph) 

282 .records[dimensionName] 

283 .toDict(), 

284 dimensionValue, 

285 ) 

286 # expandDataId should raise if there is no record with the given ID. 

287 with self.assertRaises(DataIdValueError): 

288 registry.expandDataId({"instrument": "Unknown"}, graph=dimension.graph) 

289 # band doesn't have a table; insert should fail. 

290 with self.assertRaises(TypeError): 

291 registry.insertDimensionData("band", {"band": "i"}) 

292 dimensionName2 = "physical_filter" 

293 dimension2 = registry.dimensions[dimensionName2] 

294 dimensionValue2 = {"name": "DummyCam_i", "band": "i"} 

295 # Missing required dependency ("instrument") should fail 

296 with self.assertRaises(KeyError): 

297 registry.insertDimensionData(dimensionName2, dimensionValue2) 

298 # Adding required dependency should fix the failure 

299 dimensionValue2["instrument"] = "DummyCam" 

300 registry.insertDimensionData(dimensionName2, dimensionValue2) 

301 # expandDataId should retrieve the record we just inserted. 

302 self.assertEqual( 

303 registry.expandDataId(instrument="DummyCam", physical_filter="DummyCam_i", graph=dimension2.graph) 

304 .records[dimensionName2] 

305 .toDict(), 

306 dimensionValue2, 

307 ) 

308 # Use syncDimensionData to insert a new record successfully. 

309 dimensionName3 = "detector" 

310 dimensionValue3 = { 

311 "instrument": "DummyCam", 

312 "id": 1, 

313 "full_name": "one", 

314 "name_in_raft": "zero", 

315 "purpose": "SCIENCE", 

316 } 

317 self.assertTrue(registry.syncDimensionData(dimensionName3, dimensionValue3)) 

318 # Sync that again. Note that one field ("raft") is NULL, and that 

319 # should be okay. 

320 self.assertFalse(registry.syncDimensionData(dimensionName3, dimensionValue3)) 

321 # Now try that sync with the same primary key but a different value. 

322 # This should fail. 

323 with self.assertRaises(ConflictingDefinitionError): 

324 registry.syncDimensionData( 

325 dimensionName3, 

326 { 

327 "instrument": "DummyCam", 

328 "id": 1, 

329 "full_name": "one", 

330 "name_in_raft": "four", 

331 "purpose": "SCIENCE", 

332 }, 

333 ) 

334 

335 @unittest.skipIf(np is None, "numpy not available.") 

336 def testNumpyDataId(self): 

337 """Test that we can use a numpy int in a dataId.""" 

338 registry = self.makeRegistry() 

339 dimensionEntries = [ 

340 ("instrument", {"instrument": "DummyCam"}), 

341 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}), 

342 # Using an np.int64 here fails unless Records.fromDict is also 

343 # patched to look for numbers.Integral 

344 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}), 

345 ] 

346 for args in dimensionEntries: 

347 registry.insertDimensionData(*args) 

348 

349 # Try a normal integer and something that looks like an int but 

350 # is not. 

351 for visit_id in (42, np.int64(42)): 

352 with self.subTest(visit_id=visit_id, id_type=type(visit_id).__name__): 

353 expanded = registry.expandDataId({"instrument": "DummyCam", "visit": visit_id}) 

354 self.assertEqual(expanded["visit"], int(visit_id)) 

355 self.assertIsInstance(expanded["visit"], int) 

356 

357 def testDataIdRelationships(self): 

358 """Test that `SqlRegistry.expandDataId` raises an exception when the 

359 given keys are inconsistent. 

360 """ 

361 registry = self.makeRegistry() 

362 self.loadData(registry, "base.yaml") 

363 # Insert a few more dimension records for the next test. 

364 registry.insertDimensionData( 

365 "exposure", 

366 {"instrument": "Cam1", "id": 1, "obs_id": "one", "physical_filter": "Cam1-G"}, 

367 ) 

368 registry.insertDimensionData( 

369 "exposure", 

370 {"instrument": "Cam1", "id": 2, "obs_id": "two", "physical_filter": "Cam1-G"}, 

371 ) 

372 registry.insertDimensionData( 

373 "visit_system", 

374 {"instrument": "Cam1", "id": 0, "name": "one-to-one"}, 

375 ) 

376 registry.insertDimensionData( 

377 "visit", 

378 {"instrument": "Cam1", "id": 1, "name": "one", "physical_filter": "Cam1-G", "visit_system": 0}, 

379 ) 

380 registry.insertDimensionData( 

381 "visit_definition", 

382 {"instrument": "Cam1", "visit": 1, "exposure": 1, "visit_system": 0}, 

383 ) 

384 with self.assertRaises(InconsistentDataIdError): 

385 registry.expandDataId( 

386 {"instrument": "Cam1", "visit": 1, "exposure": 2}, 

387 ) 

388 

389 def testDataset(self): 

390 """Basic tests for `SqlRegistry.insertDatasets`, 

391 `SqlRegistry.getDataset`, and `SqlRegistry.removeDatasets`. 

392 """ 

393 registry = self.makeRegistry() 

394 self.loadData(registry, "base.yaml") 

395 run = "tésτ" 

396 registry.registerRun(run) 

397 datasetType = registry.getDatasetType("bias") 

398 dataId = {"instrument": "Cam1", "detector": 2} 

399 (ref,) = registry.insertDatasets(datasetType, dataIds=[dataId], run=run) 

400 outRef = registry.getDataset(ref.id) 

401 self.assertIsNotNone(ref.id) 

402 self.assertEqual(ref, outRef) 

403 with self.assertRaises(ConflictingDefinitionError): 

404 registry.insertDatasets(datasetType, dataIds=[dataId], run=run) 

405 registry.removeDatasets([ref]) 

406 self.assertIsNone(registry.findDataset(datasetType, dataId, collections=[run])) 

407 

408 def testFindDataset(self): 

409 """Tests for `SqlRegistry.findDataset`.""" 

410 registry = self.makeRegistry() 

411 self.loadData(registry, "base.yaml") 

412 run = "tésτ" 

413 datasetType = registry.getDatasetType("bias") 

414 dataId = {"instrument": "Cam1", "detector": 4} 

415 registry.registerRun(run) 

416 (inputRef,) = registry.insertDatasets(datasetType, dataIds=[dataId], run=run) 

417 outputRef = registry.findDataset(datasetType, dataId, collections=[run]) 

418 self.assertEqual(outputRef, inputRef) 

419 # Check that retrieval with invalid dataId raises 

420 with self.assertRaises(LookupError): 

421 dataId = {"instrument": "Cam1"} # no detector 

422 registry.findDataset(datasetType, dataId, collections=run) 

423 # Check that different dataIds match to different datasets 

424 dataId1 = {"instrument": "Cam1", "detector": 1} 

425 (inputRef1,) = registry.insertDatasets(datasetType, dataIds=[dataId1], run=run) 

426 dataId2 = {"instrument": "Cam1", "detector": 2} 

427 (inputRef2,) = registry.insertDatasets(datasetType, dataIds=[dataId2], run=run) 

428 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef1) 

429 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef2) 

430 self.assertNotEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef2) 

431 self.assertNotEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef1) 

432 # Check that requesting a non-existing dataId returns None 

433 nonExistingDataId = {"instrument": "Cam1", "detector": 3} 

434 self.assertIsNone(registry.findDataset(datasetType, nonExistingDataId, collections=run)) 

435 # Search more than one collection, in which two have the right 

436 # dataset type and another does not. 

437 registry.registerRun("empty") 

438 self.loadData(registry, "datasets-uuid.yaml") 

439 bias1 = registry.findDataset("bias", instrument="Cam1", detector=2, collections=["imported_g"]) 

440 self.assertIsNotNone(bias1) 

441 bias2 = registry.findDataset("bias", instrument="Cam1", detector=2, collections=["imported_r"]) 

442 self.assertIsNotNone(bias2) 

443 self.assertEqual( 

444 bias1, 

445 registry.findDataset( 

446 "bias", instrument="Cam1", detector=2, collections=["empty", "imported_g", "imported_r"] 

447 ), 

448 ) 

449 self.assertEqual( 

450 bias2, 

451 registry.findDataset( 

452 "bias", instrument="Cam1", detector=2, collections=["empty", "imported_r", "imported_g"] 

453 ), 

454 ) 

455 # Search more than one collection, with one of them a CALIBRATION 

456 # collection. 

457 registry.registerCollection("Cam1/calib", CollectionType.CALIBRATION) 

458 timespan = Timespan( 

459 begin=astropy.time.Time("2020-01-01T01:00:00", format="isot", scale="tai"), 

460 end=astropy.time.Time("2020-01-01T02:00:00", format="isot", scale="tai"), 

461 ) 

462 registry.certify("Cam1/calib", [bias2], timespan=timespan) 

463 self.assertEqual( 

464 bias1, 

465 registry.findDataset( 

466 "bias", 

467 instrument="Cam1", 

468 detector=2, 

469 collections=["empty", "imported_g", "Cam1/calib"], 

470 timespan=timespan, 

471 ), 

472 ) 

473 self.assertEqual( 

474 bias2, 

475 registry.findDataset( 

476 "bias", 

477 instrument="Cam1", 

478 detector=2, 

479 collections=["empty", "Cam1/calib", "imported_g"], 

480 timespan=timespan, 

481 ), 

482 ) 

483 # If we try to search those same collections without a timespan, it 

484 # should still work, since the CALIBRATION collection is ignored. 

485 self.assertEqual( 

486 bias1, 

487 registry.findDataset( 

488 "bias", instrument="Cam1", detector=2, collections=["empty", "imported_g", "Cam1/calib"] 

489 ), 

490 ) 

491 self.assertEqual( 

492 bias1, 

493 registry.findDataset( 

494 "bias", instrument="Cam1", detector=2, collections=["empty", "Cam1/calib", "imported_g"] 

495 ), 

496 ) 

497 

498 def testRemoveDatasetTypeSuccess(self): 

499 """Test that SqlRegistry.removeDatasetType works when there are no 

500 datasets of that type present. 

501 """ 

502 registry = self.makeRegistry() 

503 self.loadData(registry, "base.yaml") 

504 registry.removeDatasetType("flat") 

505 with self.assertRaises(MissingDatasetTypeError): 

506 registry.getDatasetType("flat") 

507 

508 def testRemoveDatasetTypeFailure(self): 

509 """Test that SqlRegistry.removeDatasetType raises when there are 

510 datasets of that type present or if the dataset type is for a 

511 component. 

512 """ 

513 registry = self.makeRegistry() 

514 self.loadData(registry, "base.yaml") 

515 self.loadData(registry, "datasets.yaml") 

516 with self.assertRaises(OrphanedRecordError): 

517 registry.removeDatasetType("flat") 

518 with self.assertRaises(ValueError): 

519 registry.removeDatasetType(DatasetType.nameWithComponent("flat", "image")) 

520 

521 def testImportDatasetsUUID(self): 

522 """Test for `SqlRegistry._importDatasets` with UUID dataset ID.""" 

523 if isinstance(self.datasetsManager, str): 

524 if not self.datasetsManager.endswith(".ByDimensionsDatasetRecordStorageManagerUUID"): 

525 self.skipTest(f"Unexpected dataset manager {self.datasetsManager}") 

526 elif isinstance(self.datasetsManager, dict) and not self.datasetsManager["cls"].endswith( 

527 ".ByDimensionsDatasetRecordStorageManagerUUID" 

528 ): 

529 self.skipTest(f"Unexpected dataset manager {self.datasetsManager['cls']}") 

530 

531 registry = self.makeRegistry() 

532 self.loadData(registry, "base.yaml") 

533 for run in range(6): 

534 registry.registerRun(f"run{run}") 

535 datasetTypeBias = registry.getDatasetType("bias") 

536 datasetTypeFlat = registry.getDatasetType("flat") 

537 dataIdBias1 = {"instrument": "Cam1", "detector": 1} 

538 dataIdBias2 = {"instrument": "Cam1", "detector": 2} 

539 dataIdFlat1 = {"instrument": "Cam1", "detector": 1, "physical_filter": "Cam1-G", "band": "g"} 

540 

541 ref = DatasetRef(datasetTypeBias, dataIdBias1, run="run0") 

542 (ref1,) = registry._importDatasets([ref]) 

543 # UUID is used without change 

544 self.assertEqual(ref.id, ref1.id) 

545 

546 # All different failure modes 

547 refs = ( 

548 # Importing same DatasetRef with different dataset ID is an error 

549 DatasetRef(datasetTypeBias, dataIdBias1, run="run0"), 

550 # Same DatasetId but different DataId 

551 DatasetRef(datasetTypeBias, dataIdBias2, id=ref1.id, run="run0"), 

552 DatasetRef(datasetTypeFlat, dataIdFlat1, id=ref1.id, run="run0"), 

553 # Same DatasetRef and DatasetId but different run 

554 DatasetRef(datasetTypeBias, dataIdBias1, id=ref1.id, run="run1"), 

555 ) 

556 for ref in refs: 

557 with self.assertRaises(ConflictingDefinitionError): 

558 registry._importDatasets([ref]) 

559 

560 # Test for non-unique IDs, they can be re-imported multiple times. 

561 for run, idGenMode in ((2, DatasetIdGenEnum.DATAID_TYPE), (4, DatasetIdGenEnum.DATAID_TYPE_RUN)): 

562 with self.subTest(idGenMode=idGenMode): 

563 # Make dataset ref with reproducible dataset ID. 

564 ref = DatasetRef(datasetTypeBias, dataIdBias1, run=f"run{run}", id_generation_mode=idGenMode) 

565 (ref1,) = registry._importDatasets([ref]) 

566 self.assertIsInstance(ref1.id, uuid.UUID) 

567 self.assertEqual(ref1.id.version, 5) 

568 self.assertEqual(ref1.id, ref.id) 

569 

570 # Importing it again is OK 

571 (ref2,) = registry._importDatasets([ref1]) 

572 self.assertEqual(ref2.id, ref1.id) 

573 

574 # Cannot import to different run with the same ID 

575 ref = DatasetRef(datasetTypeBias, dataIdBias1, id=ref1.id, run=f"run{run+1}") 

576 with self.assertRaises(ConflictingDefinitionError): 

577 registry._importDatasets([ref]) 

578 

579 ref = DatasetRef( 

580 datasetTypeBias, dataIdBias1, run=f"run{run+1}", id_generation_mode=idGenMode 

581 ) 

582 if idGenMode is DatasetIdGenEnum.DATAID_TYPE: 

583 # Cannot import same DATAID_TYPE ref into a new run 

584 with self.assertRaises(ConflictingDefinitionError): 

585 (ref2,) = registry._importDatasets([ref]) 

586 else: 

587 # DATAID_TYPE_RUN ref can be imported into a new run 

588 (ref2,) = registry._importDatasets([ref]) 

589 

590 def testDatasetTypeComponentQueries(self): 

591 """Test component options when querying for dataset types. 

592 

593 All of the behavior here is deprecated, so many of these tests are 

594 currently wrapped in a context to check that we get a warning whenever 

595 a component dataset is actually returned. 

596 """ 

597 registry = self.makeRegistry() 

598 self.loadData(registry, "base.yaml") 

599 self.loadData(registry, "datasets.yaml") 

600 # Test querying for dataset types with different inputs. 

601 # First query for all dataset types; components should only be included 

602 # when components=True. 

603 self.assertEqual({"bias", "flat"}, NamedValueSet(registry.queryDatasetTypes()).names) 

604 self.assertEqual({"bias", "flat"}, NamedValueSet(registry.queryDatasetTypes(components=False)).names) 

605 with self.assertWarns(FutureWarning): 

606 self.assertLess( 

607 {"bias", "flat", "bias.wcs", "flat.photoCalib"}, 

608 NamedValueSet(registry.queryDatasetTypes(components=True)).names, 

609 ) 

610 # Use a pattern that can match either parent or components. Again, 

611 # components are only returned if components=True. 

612 self.assertEqual({"bias"}, NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"))).names) 

613 self.assertEqual( 

614 {"bias"}, NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"), components=False)).names 

615 ) 

616 with self.assertWarns(FutureWarning): 

617 self.assertLess( 

618 {"bias", "bias.wcs"}, 

619 NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"), components=True)).names, 

620 ) 

621 # This pattern matches only a component. In this case we also return 

622 # that component dataset type if components=None. 

623 with self.assertWarns(FutureWarning): 

624 self.assertEqual( 

625 {"bias.wcs"}, 

626 NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"), components=None)).names, 

627 ) 

628 self.assertEqual( 

629 set(), 

630 NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"), components=False)).names, 

631 ) 

632 with self.assertWarns(FutureWarning): 

633 self.assertEqual( 

634 {"bias.wcs"}, 

635 NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"), components=True)).names, 

636 ) 

637 # Add a dataset type using a StorageClass that we'll then remove; check 

638 # that this does not affect our ability to query for dataset types 

639 # (though it will warn). 

640 tempStorageClass = StorageClass( 

641 name="TempStorageClass", 

642 components={ 

643 "data1": registry.storageClasses.getStorageClass("StructuredDataDict"), 

644 "data2": registry.storageClasses.getStorageClass("StructuredDataDict"), 

645 }, 

646 ) 

647 registry.storageClasses.registerStorageClass(tempStorageClass) 

648 datasetType = DatasetType( 

649 "temporary", 

650 dimensions=["instrument"], 

651 storageClass=tempStorageClass, 

652 universe=registry.dimensions, 

653 ) 

654 registry.registerDatasetType(datasetType) 

655 registry.storageClasses._unregisterStorageClass(tempStorageClass.name) 

656 datasetType._storageClass = None 

657 del tempStorageClass 

658 # Querying for all dataset types, including components, should include 

659 # at least all non-component dataset types (and I don't want to 

660 # enumerate all of the Exposure components for bias and flat here). 

661 with self.assertWarns(FutureWarning): 

662 with self.assertLogs("lsst.daf.butler.registry", logging.WARN) as cm: 

663 everything = NamedValueSet(registry.queryDatasetTypes(components=True)) 

664 self.assertIn("TempStorageClass", cm.output[0]) 

665 self.assertLess({"bias", "flat", "temporary"}, everything.names) 

666 # It should not include "temporary.columns", because we tried to remove 

667 # the storage class that would tell it about that. So if the next line 

668 # fails (i.e. "temporary.columns" _is_ in everything.names), it means 

669 # this part of the test isn't doing anything, because the _unregister 

670 # call about isn't simulating the real-life case we want it to 

671 # simulate, in which different versions of daf_butler in entirely 

672 # different Python processes interact with the same repo. 

673 self.assertNotIn("temporary.data", everything.names) 

674 # Query for dataset types that start with "temp". This should again 

675 # not include the component, and also not fail. 

676 with self.assertLogs("lsst.daf.butler.registry", logging.WARN) as cm: 

677 startsWithTemp = NamedValueSet(registry.queryDatasetTypes(re.compile("temp.*"), components=True)) 

678 self.assertIn("TempStorageClass", cm.output[0]) 

679 self.assertEqual({"temporary"}, startsWithTemp.names) 

680 # Querying with no components should not warn at all. 

681 with self.assertLogs("lsst.daf.butler.registries", logging.WARN) as cm: 

682 startsWithTemp = NamedValueSet(registry.queryDatasetTypes(re.compile("temp.*"), components=False)) 

683 # Must issue a warning of our own to be captured. 

684 logging.getLogger("lsst.daf.butler.registries").warning("test message") 

685 self.assertEqual(len(cm.output), 1) 

686 self.assertIn("test message", cm.output[0]) 

687 

688 def testComponentLookups(self): 

689 """Test searching for component datasets via their parents. 

690 

691 All of the behavior here is deprecated, so many of these tests are 

692 currently wrapped in a context to check that we get a warning whenever 

693 a component dataset is actually returned. 

694 """ 

695 registry = self.makeRegistry() 

696 self.loadData(registry, "base.yaml") 

697 self.loadData(registry, "datasets.yaml") 

698 # Test getting the child dataset type (which does still exist in the 

699 # Registry), and check for consistency with 

700 # DatasetRef.makeComponentRef. 

701 collection = "imported_g" 

702 parentType = registry.getDatasetType("bias") 

703 childType = registry.getDatasetType("bias.wcs") 

704 parentRefResolved = registry.findDataset( 

705 parentType, collections=collection, instrument="Cam1", detector=1 

706 ) 

707 self.assertIsInstance(parentRefResolved, DatasetRef) 

708 self.assertEqual(childType, parentRefResolved.makeComponentRef("wcs").datasetType) 

709 # Search for a single dataset with findDataset. 

710 childRef1 = registry.findDataset("bias.wcs", collections=collection, dataId=parentRefResolved.dataId) 

711 self.assertEqual(childRef1, parentRefResolved.makeComponentRef("wcs")) 

712 # Search for detector data IDs constrained by component dataset 

713 # existence with queryDataIds. 

714 with self.assertWarns(FutureWarning): 

715 dataIds = registry.queryDataIds( 

716 ["detector"], 

717 datasets=["bias.wcs"], 

718 collections=collection, 

719 ).toSet() 

720 self.assertEqual( 

721 dataIds, 

722 DataCoordinateSet( 

723 { 

724 DataCoordinate.standardize(instrument="Cam1", detector=d, graph=parentType.dimensions) 

725 for d in (1, 2, 3) 

726 }, 

727 parentType.dimensions, 

728 ), 

729 ) 

730 # Search for multiple datasets of a single type with queryDatasets. 

731 with self.assertWarns(FutureWarning): 

732 childRefs2 = set( 

733 registry.queryDatasets( 

734 "bias.wcs", 

735 collections=collection, 

736 ) 

737 ) 

738 self.assertEqual({ref.datasetType for ref in childRefs2}, {childType}) 

739 self.assertEqual({ref.dataId for ref in childRefs2}, set(dataIds)) 

740 

741 def testCollections(self): 

742 """Tests for registry methods that manage collections.""" 

743 registry = self.makeRegistry() 

744 other_registry = self.makeRegistry(share_repo_with=registry) 

745 self.loadData(registry, "base.yaml") 

746 self.loadData(registry, "datasets.yaml") 

747 run1 = "imported_g" 

748 run2 = "imported_r" 

749 # Test setting a collection docstring after it has been created. 

750 registry.setCollectionDocumentation(run1, "doc for run1") 

751 self.assertEqual(registry.getCollectionDocumentation(run1), "doc for run1") 

752 registry.setCollectionDocumentation(run1, None) 

753 self.assertIsNone(registry.getCollectionDocumentation(run1)) 

754 datasetType = "bias" 

755 # Find some datasets via their run's collection. 

756 dataId1 = {"instrument": "Cam1", "detector": 1} 

757 ref1 = registry.findDataset(datasetType, dataId1, collections=run1) 

758 self.assertIsNotNone(ref1) 

759 dataId2 = {"instrument": "Cam1", "detector": 2} 

760 ref2 = registry.findDataset(datasetType, dataId2, collections=run1) 

761 self.assertIsNotNone(ref2) 

762 # Associate those into a new collection, then look for them there. 

763 tag1 = "tag1" 

764 registry.registerCollection(tag1, type=CollectionType.TAGGED, doc="doc for tag1") 

765 # Check that we can query for old and new collections by type. 

766 self.assertEqual(set(registry.queryCollections(collectionTypes=CollectionType.RUN)), {run1, run2}) 

767 self.assertEqual( 

768 set(registry.queryCollections(collectionTypes={CollectionType.TAGGED, CollectionType.RUN})), 

769 {tag1, run1, run2}, 

770 ) 

771 self.assertEqual(registry.getCollectionDocumentation(tag1), "doc for tag1") 

772 registry.associate(tag1, [ref1, ref2]) 

773 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1) 

774 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

775 # Disassociate one and verify that we can't it there anymore... 

776 registry.disassociate(tag1, [ref1]) 

777 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=tag1)) 

778 # ...but we can still find ref2 in tag1, and ref1 in the run. 

779 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run1), ref1) 

780 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

781 collections = set(registry.queryCollections()) 

782 self.assertEqual(collections, {run1, run2, tag1}) 

783 # Associate both refs into tag1 again; ref2 is already there, but that 

784 # should be a harmless no-op. 

785 registry.associate(tag1, [ref1, ref2]) 

786 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1) 

787 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

788 # Get a different dataset (from a different run) that has the same 

789 # dataset type and data ID as ref2. 

790 ref2b = registry.findDataset(datasetType, dataId2, collections=run2) 

791 self.assertNotEqual(ref2, ref2b) 

792 # Attempting to associate that into tag1 should be an error. 

793 with self.assertRaises(ConflictingDefinitionError): 

794 registry.associate(tag1, [ref2b]) 

795 # That error shouldn't have messed up what we had before. 

796 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1) 

797 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

798 # Attempt to associate the conflicting dataset again, this time with 

799 # a dataset that isn't in the collection and won't cause a conflict. 

800 # Should also fail without modifying anything. 

801 dataId3 = {"instrument": "Cam1", "detector": 3} 

802 ref3 = registry.findDataset(datasetType, dataId3, collections=run1) 

803 with self.assertRaises(ConflictingDefinitionError): 

804 registry.associate(tag1, [ref3, ref2b]) 

805 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1) 

806 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

807 self.assertIsNone(registry.findDataset(datasetType, dataId3, collections=tag1)) 

808 # Register a chained collection that searches [tag1, run2] 

809 chain1 = "chain1" 

810 registry.registerCollection(chain1, type=CollectionType.CHAINED) 

811 self.assertIs(registry.getCollectionType(chain1), CollectionType.CHAINED) 

812 # Chained collection exists, but has no collections in it. 

813 self.assertFalse(registry.getCollectionChain(chain1)) 

814 # If we query for all collections, we should get the chained collection 

815 # only if we don't ask to flatten it (i.e. yield only its children). 

816 self.assertEqual(set(registry.queryCollections(flattenChains=False)), {tag1, run1, run2, chain1}) 

817 self.assertEqual(set(registry.queryCollections(flattenChains=True)), {tag1, run1, run2}) 

818 # Attempt to set its child collections to something circular; that 

819 # should fail. 

820 with self.assertRaises(ValueError): 

821 registry.setCollectionChain(chain1, [tag1, chain1]) 

822 # Add the child collections. 

823 registry.setCollectionChain(chain1, [tag1, run2]) 

824 self.assertEqual(list(registry.getCollectionChain(chain1)), [tag1, run2]) 

825 self.assertEqual(registry.getCollectionParentChains(tag1), {chain1}) 

826 self.assertEqual(registry.getCollectionParentChains(run2), {chain1}) 

827 # Refresh the other registry that points to the same repo, and make 

828 # sure it can see the things we've done (note that this does require 

829 # an explicit refresh(); that's the documented behavior, because 

830 # caching is ~impossible otherwise). 

831 if other_registry is not None: 

832 other_registry.refresh() 

833 self.assertEqual(list(other_registry.getCollectionChain(chain1)), [tag1, run2]) 

834 self.assertEqual(other_registry.getCollectionParentChains(tag1), {chain1}) 

835 self.assertEqual(other_registry.getCollectionParentChains(run2), {chain1}) 

836 # Searching for dataId1 or dataId2 in the chain should return ref1 and 

837 # ref2, because both are in tag1. 

838 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain1), ref1) 

839 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain1), ref2) 

840 # Now disassociate ref2 from tag1. The search (for bias) with 

841 # dataId2 in chain1 should then: 

842 # 1. not find it in tag1 

843 # 2. find a different dataset in run2 

844 registry.disassociate(tag1, [ref2]) 

845 ref2b = registry.findDataset(datasetType, dataId2, collections=chain1) 

846 self.assertNotEqual(ref2b, ref2) 

847 self.assertEqual(ref2b, registry.findDataset(datasetType, dataId2, collections=run2)) 

848 # Define a new chain so we can test recursive chains. 

849 chain2 = "chain2" 

850 registry.registerCollection(chain2, type=CollectionType.CHAINED) 

851 registry.setCollectionChain(chain2, [run2, chain1]) 

852 self.assertEqual(registry.getCollectionParentChains(chain1), {chain2}) 

853 self.assertEqual(registry.getCollectionParentChains(run2), {chain1, chain2}) 

854 # Query for collections matching a regex. 

855 self.assertCountEqual( 

856 list(registry.queryCollections(re.compile("imported_."), flattenChains=False)), 

857 ["imported_r", "imported_g"], 

858 ) 

859 # Query for collections matching a regex or an explicit str. 

860 self.assertCountEqual( 

861 list(registry.queryCollections([re.compile("imported_."), "chain1"], flattenChains=False)), 

862 ["imported_r", "imported_g", "chain1"], 

863 ) 

864 # Search for bias with dataId1 should find it via tag1 in chain2, 

865 # recursing, because is not in run1. 

866 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=run2)) 

867 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain2), ref1) 

868 # Search for bias with dataId2 should find it in run2 (ref2b). 

869 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain2), ref2b) 

870 # Search for a flat that is in run2. That should not be found 

871 # at the front of chain2, because of the restriction to bias 

872 # on run2 there, but it should be found in at the end of chain1. 

873 dataId4 = {"instrument": "Cam1", "detector": 3, "physical_filter": "Cam1-R2"} 

874 ref4 = registry.findDataset("flat", dataId4, collections=run2) 

875 self.assertIsNotNone(ref4) 

876 self.assertEqual(ref4, registry.findDataset("flat", dataId4, collections=chain2)) 

877 # Deleting a collection that's part of a CHAINED collection is not 

878 # allowed, and is exception-safe. 

879 with self.assertRaises(sqlalchemy.exc.IntegrityError): 

880 registry.removeCollection(run2) 

881 self.assertEqual(registry.getCollectionType(run2), CollectionType.RUN) 

882 with self.assertRaises(sqlalchemy.exc.IntegrityError): 

883 registry.removeCollection(chain1) 

884 self.assertEqual(registry.getCollectionType(chain1), CollectionType.CHAINED) 

885 # Actually remove chain2, test that it's gone by asking for its type. 

886 registry.removeCollection(chain2) 

887 with self.assertRaises(MissingCollectionError): 

888 registry.getCollectionType(chain2) 

889 # Actually remove run2 and chain1, which should work now. 

890 registry.removeCollection(chain1) 

891 registry.removeCollection(run2) 

892 with self.assertRaises(MissingCollectionError): 

893 registry.getCollectionType(run2) 

894 with self.assertRaises(MissingCollectionError): 

895 registry.getCollectionType(chain1) 

896 # Remove tag1 as well, just to test that we can remove TAGGED 

897 # collections. 

898 registry.removeCollection(tag1) 

899 with self.assertRaises(MissingCollectionError): 

900 registry.getCollectionType(tag1) 

901 

902 def testCollectionChainFlatten(self): 

903 """Test that `SqlRegistry.setCollectionChain` obeys its 'flatten' 

904 option. 

905 """ 

906 registry = self.makeRegistry() 

907 registry.registerCollection("inner", CollectionType.CHAINED) 

908 registry.registerCollection("innermost", CollectionType.RUN) 

909 registry.setCollectionChain("inner", ["innermost"]) 

910 registry.registerCollection("outer", CollectionType.CHAINED) 

911 registry.setCollectionChain("outer", ["inner"], flatten=False) 

912 self.assertEqual(list(registry.getCollectionChain("outer")), ["inner"]) 

913 registry.setCollectionChain("outer", ["inner"], flatten=True) 

914 self.assertEqual(list(registry.getCollectionChain("outer")), ["innermost"]) 

915 

916 def testBasicTransaction(self): 

917 """Test that all operations within a single transaction block are 

918 rolled back if an exception propagates out of the block. 

919 """ 

920 registry = self.makeRegistry() 

921 storageClass = StorageClass("testDatasetType") 

922 registry.storageClasses.registerStorageClass(storageClass) 

923 with registry.transaction(): 

924 registry.insertDimensionData("instrument", {"name": "Cam1", "class_name": "A"}) 

925 with self.assertRaises(ValueError): 

926 with registry.transaction(): 

927 registry.insertDimensionData("instrument", {"name": "Cam2"}) 

928 raise ValueError("Oops, something went wrong") 

929 # Cam1 should exist 

930 self.assertEqual(registry.expandDataId(instrument="Cam1").records["instrument"].class_name, "A") 

931 # But Cam2 and Cam3 should both not exist 

932 with self.assertRaises(DataIdValueError): 

933 registry.expandDataId(instrument="Cam2") 

934 with self.assertRaises(DataIdValueError): 

935 registry.expandDataId(instrument="Cam3") 

936 

937 def testNestedTransaction(self): 

938 """Test that operations within a transaction block are not rolled back 

939 if an exception propagates out of an inner transaction block and is 

940 then caught. 

941 """ 

942 registry = self.makeRegistry() 

943 dimension = registry.dimensions["instrument"] 

944 dataId1 = {"instrument": "DummyCam"} 

945 dataId2 = {"instrument": "DummyCam2"} 

946 checkpointReached = False 

947 with registry.transaction(): 

948 # This should be added and (ultimately) committed. 

949 registry.insertDimensionData(dimension, dataId1) 

950 with self.assertRaises(sqlalchemy.exc.IntegrityError): 

951 with registry.transaction(savepoint=True): 

952 # This does not conflict, and should succeed (but not 

953 # be committed). 

954 registry.insertDimensionData(dimension, dataId2) 

955 checkpointReached = True 

956 # This should conflict and raise, triggerring a rollback 

957 # of the previous insertion within the same transaction 

958 # context, but not the original insertion in the outer 

959 # block. 

960 registry.insertDimensionData(dimension, dataId1) 

961 self.assertTrue(checkpointReached) 

962 self.assertIsNotNone(registry.expandDataId(dataId1, graph=dimension.graph)) 

963 with self.assertRaises(DataIdValueError): 

964 registry.expandDataId(dataId2, graph=dimension.graph) 

965 

966 def testInstrumentDimensions(self): 

967 """Test queries involving only instrument dimensions, with no joins to 

968 skymap. 

969 """ 

970 registry = self.makeRegistry() 

971 

972 # need a bunch of dimensions and datasets for test 

973 registry.insertDimensionData( 

974 "instrument", dict(name="DummyCam", visit_max=25, exposure_max=300, detector_max=6) 

975 ) 

976 registry.insertDimensionData( 

977 "physical_filter", 

978 dict(instrument="DummyCam", name="dummy_r", band="r"), 

979 dict(instrument="DummyCam", name="dummy_i", band="i"), 

980 ) 

981 registry.insertDimensionData( 

982 "detector", *[dict(instrument="DummyCam", id=i, full_name=str(i)) for i in range(1, 6)] 

983 ) 

984 registry.insertDimensionData( 

985 "visit_system", 

986 dict(instrument="DummyCam", id=1, name="default"), 

987 ) 

988 registry.insertDimensionData( 

989 "visit", 

990 dict(instrument="DummyCam", id=10, name="ten", physical_filter="dummy_i", visit_system=1), 

991 dict(instrument="DummyCam", id=11, name="eleven", physical_filter="dummy_r", visit_system=1), 

992 dict(instrument="DummyCam", id=20, name="twelve", physical_filter="dummy_r", visit_system=1), 

993 ) 

994 for i in range(1, 6): 

995 registry.insertDimensionData( 

996 "visit_detector_region", 

997 dict(instrument="DummyCam", visit=10, detector=i), 

998 dict(instrument="DummyCam", visit=11, detector=i), 

999 dict(instrument="DummyCam", visit=20, detector=i), 

1000 ) 

1001 registry.insertDimensionData( 

1002 "exposure", 

1003 dict(instrument="DummyCam", id=100, obs_id="100", physical_filter="dummy_i"), 

1004 dict(instrument="DummyCam", id=101, obs_id="101", physical_filter="dummy_i"), 

1005 dict(instrument="DummyCam", id=110, obs_id="110", physical_filter="dummy_r"), 

1006 dict(instrument="DummyCam", id=111, obs_id="111", physical_filter="dummy_r"), 

1007 dict(instrument="DummyCam", id=200, obs_id="200", physical_filter="dummy_r"), 

1008 dict(instrument="DummyCam", id=201, obs_id="201", physical_filter="dummy_r"), 

1009 ) 

1010 registry.insertDimensionData( 

1011 "visit_definition", 

1012 dict(instrument="DummyCam", exposure=100, visit_system=1, visit=10), 

1013 dict(instrument="DummyCam", exposure=101, visit_system=1, visit=10), 

1014 dict(instrument="DummyCam", exposure=110, visit_system=1, visit=11), 

1015 dict(instrument="DummyCam", exposure=111, visit_system=1, visit=11), 

1016 dict(instrument="DummyCam", exposure=200, visit_system=1, visit=20), 

1017 dict(instrument="DummyCam", exposure=201, visit_system=1, visit=20), 

1018 ) 

1019 # dataset types 

1020 run1 = "test1_r" 

1021 run2 = "test2_r" 

1022 tagged2 = "test2_t" 

1023 registry.registerRun(run1) 

1024 registry.registerRun(run2) 

1025 registry.registerCollection(tagged2) 

1026 storageClass = StorageClass("testDataset") 

1027 registry.storageClasses.registerStorageClass(storageClass) 

1028 rawType = DatasetType( 

1029 name="RAW", 

1030 dimensions=registry.dimensions.extract(("instrument", "exposure", "detector")), 

1031 storageClass=storageClass, 

1032 ) 

1033 registry.registerDatasetType(rawType) 

1034 calexpType = DatasetType( 

1035 name="CALEXP", 

1036 dimensions=registry.dimensions.extract(("instrument", "visit", "detector")), 

1037 storageClass=storageClass, 

1038 ) 

1039 registry.registerDatasetType(calexpType) 

1040 

1041 # add pre-existing datasets 

1042 for exposure in (100, 101, 110, 111): 

1043 for detector in (1, 2, 3): 

1044 # note that only 3 of 5 detectors have datasets 

1045 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector) 

1046 (ref,) = registry.insertDatasets(rawType, dataIds=[dataId], run=run1) 

1047 # exposures 100 and 101 appear in both run1 and tagged2. 

1048 # 100 has different datasets in the different collections 

1049 # 101 has the same dataset in both collections. 

1050 if exposure == 100: 

1051 (ref,) = registry.insertDatasets(rawType, dataIds=[dataId], run=run2) 

1052 if exposure in (100, 101): 

1053 registry.associate(tagged2, [ref]) 

1054 # Add pre-existing datasets to tagged2. 

1055 for exposure in (200, 201): 

1056 for detector in (3, 4, 5): 

1057 # note that only 3 of 5 detectors have datasets 

1058 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector) 

1059 (ref,) = registry.insertDatasets(rawType, dataIds=[dataId], run=run2) 

1060 registry.associate(tagged2, [ref]) 

1061 

1062 dimensions = DimensionGraph( 

1063 registry.dimensions, dimensions=(rawType.dimensions.required | calexpType.dimensions.required) 

1064 ) 

1065 # Test that single dim string works as well as list of str 

1066 rows = registry.queryDataIds("visit", datasets=rawType, collections=run1).expanded().toSet() 

1067 rowsI = registry.queryDataIds(["visit"], datasets=rawType, collections=run1).expanded().toSet() 

1068 self.assertEqual(rows, rowsI) 

1069 # with empty expression 

1070 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1).expanded().toSet() 

1071 self.assertEqual(len(rows), 4 * 3) # 4 exposures times 3 detectors 

1072 self.assertCountEqual({dataId["exposure"] for dataId in rows}, (100, 101, 110, 111)) 

1073 self.assertCountEqual({dataId["visit"] for dataId in rows}, (10, 11)) 

1074 self.assertCountEqual({dataId["detector"] for dataId in rows}, (1, 2, 3)) 

1075 

1076 # second collection 

1077 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=tagged2).toSet() 

1078 self.assertEqual(len(rows), 4 * 3) # 4 exposures times 3 detectors 

1079 for dataId in rows: 

1080 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit")) 

1081 self.assertCountEqual({dataId["exposure"] for dataId in rows}, (100, 101, 200, 201)) 

1082 self.assertCountEqual({dataId["visit"] for dataId in rows}, (10, 20)) 

1083 self.assertCountEqual({dataId["detector"] for dataId in rows}, (1, 2, 3, 4, 5)) 

1084 

1085 # with two input datasets 

1086 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=[run1, tagged2]).toSet() 

1087 self.assertEqual(len(set(rows)), 6 * 3) # 6 exposures times 3 detectors; set needed to de-dupe 

1088 for dataId in rows: 

1089 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit")) 

1090 self.assertCountEqual({dataId["exposure"] for dataId in rows}, (100, 101, 110, 111, 200, 201)) 

1091 self.assertCountEqual({dataId["visit"] for dataId in rows}, (10, 11, 20)) 

1092 self.assertCountEqual({dataId["detector"] for dataId in rows}, (1, 2, 3, 4, 5)) 

1093 

1094 # limit to single visit 

1095 rows = registry.queryDataIds( 

1096 dimensions, datasets=rawType, collections=run1, where="visit = 10", instrument="DummyCam" 

1097 ).toSet() 

1098 self.assertEqual(len(rows), 2 * 3) # 2 exposures times 3 detectors 

1099 self.assertCountEqual({dataId["exposure"] for dataId in rows}, (100, 101)) 

1100 self.assertCountEqual({dataId["visit"] for dataId in rows}, (10,)) 

1101 self.assertCountEqual({dataId["detector"] for dataId in rows}, (1, 2, 3)) 

1102 

1103 # more limiting expression, using link names instead of Table.column 

1104 rows = registry.queryDataIds( 

1105 dimensions, 

1106 datasets=rawType, 

1107 collections=run1, 

1108 where="visit = 10 and detector > 1 and 'DummyCam'=instrument", 

1109 ).toSet() 

1110 self.assertEqual(len(rows), 2 * 2) # 2 exposures times 2 detectors 

1111 self.assertCountEqual({dataId["exposure"] for dataId in rows}, (100, 101)) 

1112 self.assertCountEqual({dataId["visit"] for dataId in rows}, (10,)) 

1113 self.assertCountEqual({dataId["detector"] for dataId in rows}, (2, 3)) 

1114 

1115 # queryDataIds with only one of `datasets` and `collections` is an 

1116 # error. 

1117 with self.assertRaises(CollectionError): 

1118 registry.queryDataIds(dimensions, datasets=rawType) 

1119 with self.assertRaises(ArgumentError): 

1120 registry.queryDataIds(dimensions, collections=run1) 

1121 

1122 # expression excludes everything 

1123 rows = registry.queryDataIds( 

1124 dimensions, datasets=rawType, collections=run1, where="visit > 1000", instrument="DummyCam" 

1125 ).toSet() 

1126 self.assertEqual(len(rows), 0) 

1127 

1128 # Selecting by physical_filter, this is not in the dimensions, but it 

1129 # is a part of the full expression so it should work too. 

1130 rows = registry.queryDataIds( 

1131 dimensions, 

1132 datasets=rawType, 

1133 collections=run1, 

1134 where="physical_filter = 'dummy_r'", 

1135 instrument="DummyCam", 

1136 ).toSet() 

1137 self.assertEqual(len(rows), 2 * 3) # 2 exposures times 3 detectors 

1138 self.assertCountEqual({dataId["exposure"] for dataId in rows}, (110, 111)) 

1139 self.assertCountEqual({dataId["visit"] for dataId in rows}, (11,)) 

1140 self.assertCountEqual({dataId["detector"] for dataId in rows}, (1, 2, 3)) 

1141 

1142 def testSkyMapDimensions(self): 

1143 """Tests involving only skymap dimensions, no joins to instrument.""" 

1144 registry = self.makeRegistry() 

1145 

1146 # need a bunch of dimensions and datasets for test, we want 

1147 # "band" in the test so also have to add physical_filter 

1148 # dimensions 

1149 registry.insertDimensionData("instrument", dict(instrument="DummyCam")) 

1150 registry.insertDimensionData( 

1151 "physical_filter", 

1152 dict(instrument="DummyCam", name="dummy_r", band="r"), 

1153 dict(instrument="DummyCam", name="dummy_i", band="i"), 

1154 ) 

1155 registry.insertDimensionData("skymap", dict(name="DummyMap", hash=b"sha!")) 

1156 for tract in range(10): 

1157 registry.insertDimensionData("tract", dict(skymap="DummyMap", id=tract)) 

1158 registry.insertDimensionData( 

1159 "patch", 

1160 *[dict(skymap="DummyMap", tract=tract, id=patch, cell_x=0, cell_y=0) for patch in range(10)], 

1161 ) 

1162 

1163 # dataset types 

1164 run = "tésτ" 

1165 registry.registerRun(run) 

1166 storageClass = StorageClass("testDataset") 

1167 registry.storageClasses.registerStorageClass(storageClass) 

1168 calexpType = DatasetType( 

1169 name="deepCoadd_calexp", 

1170 dimensions=registry.dimensions.extract(("skymap", "tract", "patch", "band")), 

1171 storageClass=storageClass, 

1172 ) 

1173 registry.registerDatasetType(calexpType) 

1174 mergeType = DatasetType( 

1175 name="deepCoadd_mergeDet", 

1176 dimensions=registry.dimensions.extract(("skymap", "tract", "patch")), 

1177 storageClass=storageClass, 

1178 ) 

1179 registry.registerDatasetType(mergeType) 

1180 measType = DatasetType( 

1181 name="deepCoadd_meas", 

1182 dimensions=registry.dimensions.extract(("skymap", "tract", "patch", "band")), 

1183 storageClass=storageClass, 

1184 ) 

1185 registry.registerDatasetType(measType) 

1186 

1187 dimensions = DimensionGraph( 

1188 registry.dimensions, 

1189 dimensions=( 

1190 calexpType.dimensions.required | mergeType.dimensions.required | measType.dimensions.required 

1191 ), 

1192 ) 

1193 

1194 # add pre-existing datasets 

1195 for tract in (1, 3, 5): 

1196 for patch in (2, 4, 6, 7): 

1197 dataId = dict(skymap="DummyMap", tract=tract, patch=patch) 

1198 registry.insertDatasets(mergeType, dataIds=[dataId], run=run) 

1199 for aFilter in ("i", "r"): 

1200 dataId = dict(skymap="DummyMap", tract=tract, patch=patch, band=aFilter) 

1201 registry.insertDatasets(calexpType, dataIds=[dataId], run=run) 

1202 

1203 # with empty expression 

1204 rows = registry.queryDataIds(dimensions, datasets=[calexpType, mergeType], collections=run).toSet() 

1205 self.assertEqual(len(rows), 3 * 4 * 2) # 4 tracts x 4 patches x 2 filters 

1206 for dataId in rows: 

1207 self.assertCountEqual(dataId.keys(), ("skymap", "tract", "patch", "band")) 

1208 self.assertCountEqual({dataId["tract"] for dataId in rows}, (1, 3, 5)) 

1209 self.assertCountEqual({dataId["patch"] for dataId in rows}, (2, 4, 6, 7)) 

1210 self.assertCountEqual({dataId["band"] for dataId in rows}, ("i", "r")) 

1211 

1212 # limit to 2 tracts and 2 patches 

1213 rows = registry.queryDataIds( 

1214 dimensions, 

1215 datasets=[calexpType, mergeType], 

1216 collections=run, 

1217 where="tract IN (1, 5) AND patch IN (2, 7)", 

1218 skymap="DummyMap", 

1219 ).toSet() 

1220 self.assertEqual(len(rows), 2 * 2 * 2) # 2 tracts x 2 patches x 2 filters 

1221 self.assertCountEqual({dataId["tract"] for dataId in rows}, (1, 5)) 

1222 self.assertCountEqual({dataId["patch"] for dataId in rows}, (2, 7)) 

1223 self.assertCountEqual({dataId["band"] for dataId in rows}, ("i", "r")) 

1224 

1225 # limit to single filter 

1226 rows = registry.queryDataIds( 

1227 dimensions, datasets=[calexpType, mergeType], collections=run, where="band = 'i'" 

1228 ).toSet() 

1229 self.assertEqual(len(rows), 3 * 4 * 1) # 4 tracts x 4 patches x 2 filters 

1230 self.assertCountEqual({dataId["tract"] for dataId in rows}, (1, 3, 5)) 

1231 self.assertCountEqual({dataId["patch"] for dataId in rows}, (2, 4, 6, 7)) 

1232 self.assertCountEqual({dataId["band"] for dataId in rows}, ("i",)) 

1233 

1234 # Specifying non-existing skymap is an exception 

1235 with self.assertRaisesRegex(DataIdValueError, "Unknown values specified for governor dimension"): 

1236 rows = registry.queryDataIds( 

1237 dimensions, datasets=[calexpType, mergeType], collections=run, where="skymap = 'Mars'" 

1238 ).toSet() 

1239 

1240 def testSpatialJoin(self): 

1241 """Test queries that involve spatial overlap joins.""" 

1242 registry = self.makeRegistry() 

1243 self.loadData(registry, "hsc-rc2-subset.yaml") 

1244 

1245 # Dictionary of spatial DatabaseDimensionElements, keyed by the name of 

1246 # the TopologicalFamily they belong to. We'll relate all elements in 

1247 # each family to all of the elements in each other family. 

1248 families = defaultdict(set) 

1249 # Dictionary of {element.name: {dataId: region}}. 

1250 regions = {} 

1251 for element in registry.dimensions.getDatabaseElements(): 

1252 if element.spatial is not None: 

1253 families[element.spatial.name].add(element) 

1254 regions[element.name] = { 

1255 record.dataId: record.region for record in registry.queryDimensionRecords(element) 

1256 } 

1257 

1258 # If this check fails, it's not necessarily a problem - it may just be 

1259 # a reasonable change to the default dimension definitions - but the 

1260 # test below depends on there being more than one family to do anything 

1261 # useful. 

1262 self.assertEqual(len(families), 2) 

1263 

1264 # Overlap DatabaseDimensionElements with each other. 

1265 for family1, family2 in itertools.combinations(families, 2): 

1266 for element1, element2 in itertools.product(families[family1], families[family2]): 

1267 graph = DimensionGraph.union(element1.graph, element2.graph) 

1268 # Construct expected set of overlapping data IDs via a 

1269 # brute-force comparison of the regions we've already fetched. 

1270 expected = { 

1271 DataCoordinate.standardize({**dataId1.byName(), **dataId2.byName()}, graph=graph) 

1272 for (dataId1, region1), (dataId2, region2) in itertools.product( 

1273 regions[element1.name].items(), regions[element2.name].items() 

1274 ) 

1275 if not region1.isDisjointFrom(region2) 

1276 } 

1277 self.assertGreater(len(expected), 2, msg="Test that we aren't just comparing empty sets.") 

1278 queried = set(registry.queryDataIds(graph)) 

1279 self.assertEqual(expected, queried) 

1280 

1281 # Overlap each DatabaseDimensionElement with the commonSkyPix system. 

1282 commonSkyPix = registry.dimensions.commonSkyPix 

1283 for elementName, these_regions in regions.items(): 

1284 graph = DimensionGraph.union(registry.dimensions[elementName].graph, commonSkyPix.graph) 

1285 expected = set() 

1286 for dataId, region in these_regions.items(): 

1287 for begin, end in commonSkyPix.pixelization.envelope(region): 

1288 expected.update( 

1289 DataCoordinate.standardize({commonSkyPix.name: index, **dataId.byName()}, graph=graph) 

1290 for index in range(begin, end) 

1291 ) 

1292 self.assertGreater(len(expected), 2, msg="Test that we aren't just comparing empty sets.") 

1293 queried = set(registry.queryDataIds(graph)) 

1294 self.assertEqual(expected, queried) 

1295 

1296 def testAbstractQuery(self): 

1297 """Test that we can run a query that just lists the known 

1298 bands. This is tricky because band is 

1299 backed by a query against physical_filter. 

1300 """ 

1301 registry = self.makeRegistry() 

1302 registry.insertDimensionData("instrument", dict(name="DummyCam")) 

1303 registry.insertDimensionData( 

1304 "physical_filter", 

1305 dict(instrument="DummyCam", name="dummy_i", band="i"), 

1306 dict(instrument="DummyCam", name="dummy_i2", band="i"), 

1307 dict(instrument="DummyCam", name="dummy_r", band="r"), 

1308 ) 

1309 rows = registry.queryDataIds(["band"]).toSet() 

1310 self.assertCountEqual( 

1311 rows, 

1312 [ 

1313 DataCoordinate.standardize(band="i", universe=registry.dimensions), 

1314 DataCoordinate.standardize(band="r", universe=registry.dimensions), 

1315 ], 

1316 ) 

1317 

1318 def testAttributeManager(self): 

1319 """Test basic functionality of attribute manager.""" 

1320 # number of attributes with schema versions in a fresh database, 

1321 # 6 managers with 2 records per manager, plus config for dimensions 

1322 VERSION_COUNT = 6 * 2 + 1 

1323 

1324 registry = self.makeRegistry() 

1325 attributes = registry._managers.attributes 

1326 

1327 # check what get() returns for non-existing key 

1328 self.assertIsNone(attributes.get("attr")) 

1329 self.assertEqual(attributes.get("attr", ""), "") 

1330 self.assertEqual(attributes.get("attr", "Value"), "Value") 

1331 self.assertEqual(len(list(attributes.items())), VERSION_COUNT) 

1332 

1333 # cannot store empty key or value 

1334 with self.assertRaises(ValueError): 

1335 attributes.set("", "value") 

1336 with self.assertRaises(ValueError): 

1337 attributes.set("attr", "") 

1338 

1339 # set value of non-existing key 

1340 attributes.set("attr", "value") 

1341 self.assertEqual(len(list(attributes.items())), VERSION_COUNT + 1) 

1342 self.assertEqual(attributes.get("attr"), "value") 

1343 

1344 # update value of existing key 

1345 with self.assertRaises(ButlerAttributeExistsError): 

1346 attributes.set("attr", "value2") 

1347 

1348 attributes.set("attr", "value2", force=True) 

1349 self.assertEqual(len(list(attributes.items())), VERSION_COUNT + 1) 

1350 self.assertEqual(attributes.get("attr"), "value2") 

1351 

1352 # delete existing key 

1353 self.assertTrue(attributes.delete("attr")) 

1354 self.assertEqual(len(list(attributes.items())), VERSION_COUNT) 

1355 

1356 # delete non-existing key 

1357 self.assertFalse(attributes.delete("non-attr")) 

1358 

1359 # store bunch of keys and get the list back 

1360 data = [ 

1361 ("version.core", "1.2.3"), 

1362 ("version.dimensions", "3.2.1"), 

1363 ("config.managers.opaque", "ByNameOpaqueTableStorageManager"), 

1364 ] 

1365 for key, value in data: 

1366 attributes.set(key, value) 

1367 items = dict(attributes.items()) 

1368 for key, value in data: 

1369 self.assertEqual(items[key], value) 

1370 

1371 def testQueryDatasetsDeduplication(self): 

1372 """Test that the findFirst option to queryDatasets selects datasets 

1373 from collections in the order given". 

1374 """ 

1375 registry = self.makeRegistry() 

1376 self.loadData(registry, "base.yaml") 

1377 self.loadData(registry, "datasets.yaml") 

1378 self.assertCountEqual( 

1379 list(registry.queryDatasets("bias", collections=["imported_g", "imported_r"])), 

1380 [ 

1381 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"), 

1382 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g"), 

1383 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g"), 

1384 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r"), 

1385 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r"), 

1386 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

1387 ], 

1388 ) 

1389 self.assertCountEqual( 

1390 list(registry.queryDatasets("bias", collections=["imported_g", "imported_r"], findFirst=True)), 

1391 [ 

1392 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"), 

1393 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g"), 

1394 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g"), 

1395 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

1396 ], 

1397 ) 

1398 self.assertCountEqual( 

1399 list(registry.queryDatasets("bias", collections=["imported_r", "imported_g"], findFirst=True)), 

1400 [ 

1401 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"), 

1402 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r"), 

1403 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r"), 

1404 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

1405 ], 

1406 ) 

1407 

1408 def testQueryResults(self): 

1409 """Test querying for data IDs and then manipulating the QueryResults 

1410 object returned to perform other queries. 

1411 """ 

1412 registry = self.makeRegistry() 

1413 self.loadData(registry, "base.yaml") 

1414 self.loadData(registry, "datasets.yaml") 

1415 bias = registry.getDatasetType("bias") 

1416 flat = registry.getDatasetType("flat") 

1417 # Obtain expected results from methods other than those we're testing 

1418 # here. That includes: 

1419 # - the dimensions of the data IDs we want to query: 

1420 expectedGraph = DimensionGraph(registry.dimensions, names=["detector", "physical_filter"]) 

1421 # - the dimensions of some other data IDs we'll extract from that: 

1422 expectedSubsetGraph = DimensionGraph(registry.dimensions, names=["detector"]) 

1423 # - the data IDs we expect to obtain from the first queries: 

1424 expectedDataIds = DataCoordinateSet( 

1425 { 

1426 DataCoordinate.standardize( 

1427 instrument="Cam1", detector=d, physical_filter=p, universe=registry.dimensions 

1428 ) 

1429 for d, p in itertools.product({1, 2, 3}, {"Cam1-G", "Cam1-R1", "Cam1-R2"}) 

1430 }, 

1431 graph=expectedGraph, 

1432 hasFull=False, 

1433 hasRecords=False, 

1434 ) 

1435 # - the flat datasets we expect to find from those data IDs, in just 

1436 # one collection (so deduplication is irrelevant): 

1437 expectedFlats = [ 

1438 registry.findDataset( 

1439 flat, instrument="Cam1", detector=1, physical_filter="Cam1-R1", collections="imported_r" 

1440 ), 

1441 registry.findDataset( 

1442 flat, instrument="Cam1", detector=2, physical_filter="Cam1-R1", collections="imported_r" 

1443 ), 

1444 registry.findDataset( 

1445 flat, instrument="Cam1", detector=3, physical_filter="Cam1-R2", collections="imported_r" 

1446 ), 

1447 ] 

1448 # - the data IDs we expect to extract from that: 

1449 expectedSubsetDataIds = expectedDataIds.subset(expectedSubsetGraph) 

1450 # - the bias datasets we expect to find from those data IDs, after we 

1451 # subset-out the physical_filter dimension, both with duplicates: 

1452 expectedAllBiases = [ 

1453 registry.findDataset(bias, instrument="Cam1", detector=1, collections="imported_g"), 

1454 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_g"), 

1455 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_g"), 

1456 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_r"), 

1457 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_r"), 

1458 ] 

1459 # - ...and without duplicates: 

1460 expectedDeduplicatedBiases = [ 

1461 registry.findDataset(bias, instrument="Cam1", detector=1, collections="imported_g"), 

1462 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_r"), 

1463 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_r"), 

1464 ] 

1465 # Test against those expected results, using a "lazy" query for the 

1466 # data IDs (which re-executes that query each time we use it to do 

1467 # something new). 

1468 dataIds = registry.queryDataIds( 

1469 ["detector", "physical_filter"], 

1470 where="detector.purpose = 'SCIENCE'", # this rejects detector=4 

1471 instrument="Cam1", 

1472 ) 

1473 self.assertEqual(dataIds.graph, expectedGraph) 

1474 self.assertEqual(dataIds.toSet(), expectedDataIds) 

1475 self.assertCountEqual( 

1476 list( 

1477 dataIds.findDatasets( 

1478 flat, 

1479 collections=["imported_r"], 

1480 ) 

1481 ), 

1482 expectedFlats, 

1483 ) 

1484 subsetDataIds = dataIds.subset(expectedSubsetGraph, unique=True) 

1485 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph) 

1486 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds) 

1487 self.assertCountEqual( 

1488 list(subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=False)), 

1489 expectedAllBiases, 

1490 ) 

1491 self.assertCountEqual( 

1492 list(subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=True)), 

1493 expectedDeduplicatedBiases, 

1494 ) 

1495 

1496 # Searching for a dataset with dimensions we had projected away 

1497 # restores those dimensions. 

1498 self.assertCountEqual( 

1499 list(subsetDataIds.findDatasets("flat", collections=["imported_r"], findFirst=True)), 

1500 expectedFlats, 

1501 ) 

1502 

1503 # Use a component dataset type. 

1504 self.assertCountEqual( 

1505 [ 

1506 ref.makeComponentRef("image") 

1507 for ref in subsetDataIds.findDatasets( 

1508 bias, 

1509 collections=["imported_r", "imported_g"], 

1510 findFirst=False, 

1511 ) 

1512 ], 

1513 [ref.makeComponentRef("image") for ref in expectedAllBiases], 

1514 ) 

1515 

1516 # Use a named dataset type that does not exist and a dataset type 

1517 # object that does not exist. 

1518 unknown_type = DatasetType("not_known", dimensions=bias.dimensions, storageClass="Exposure") 

1519 

1520 # Test both string name and dataset type object. 

1521 test_type: str | DatasetType 

1522 for test_type, test_type_name in ( 

1523 (unknown_type, unknown_type.name), 

1524 (unknown_type.name, unknown_type.name), 

1525 ): 

1526 with self.assertRaisesRegex(DatasetTypeError, expected_regex=test_type_name): 

1527 list( 

1528 subsetDataIds.findDatasets( 

1529 test_type, collections=["imported_r", "imported_g"], findFirst=True 

1530 ) 

1531 ) 

1532 

1533 # Materialize the bias dataset queries (only) by putting the results 

1534 # into temporary tables, then repeat those tests. 

1535 with subsetDataIds.findDatasets( 

1536 bias, collections=["imported_r", "imported_g"], findFirst=False 

1537 ).materialize() as biases: 

1538 self.assertCountEqual(list(biases), expectedAllBiases) 

1539 with subsetDataIds.findDatasets( 

1540 bias, collections=["imported_r", "imported_g"], findFirst=True 

1541 ).materialize() as biases: 

1542 self.assertCountEqual(list(biases), expectedDeduplicatedBiases) 

1543 # Materialize the data ID subset query, but not the dataset queries. 

1544 with subsetDataIds.materialize() as subsetDataIds: 

1545 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph) 

1546 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds) 

1547 self.assertCountEqual( 

1548 list( 

1549 subsetDataIds.findDatasets( 

1550 bias, collections=["imported_r", "imported_g"], findFirst=False 

1551 ) 

1552 ), 

1553 expectedAllBiases, 

1554 ) 

1555 self.assertCountEqual( 

1556 list( 

1557 subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=True) 

1558 ), 

1559 expectedDeduplicatedBiases, 

1560 ) 

1561 # Materialize the dataset queries, too. 

1562 with subsetDataIds.findDatasets( 

1563 bias, collections=["imported_r", "imported_g"], findFirst=False 

1564 ).materialize() as biases: 

1565 self.assertCountEqual(list(biases), expectedAllBiases) 

1566 with subsetDataIds.findDatasets( 

1567 bias, collections=["imported_r", "imported_g"], findFirst=True 

1568 ).materialize() as biases: 

1569 self.assertCountEqual(list(biases), expectedDeduplicatedBiases) 

1570 # Materialize the original query, but none of the follow-up queries. 

1571 with dataIds.materialize() as dataIds: 

1572 self.assertEqual(dataIds.graph, expectedGraph) 

1573 self.assertEqual(dataIds.toSet(), expectedDataIds) 

1574 self.assertCountEqual( 

1575 list( 

1576 dataIds.findDatasets( 

1577 flat, 

1578 collections=["imported_r"], 

1579 ) 

1580 ), 

1581 expectedFlats, 

1582 ) 

1583 subsetDataIds = dataIds.subset(expectedSubsetGraph, unique=True) 

1584 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph) 

1585 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds) 

1586 self.assertCountEqual( 

1587 list( 

1588 subsetDataIds.findDatasets( 

1589 bias, collections=["imported_r", "imported_g"], findFirst=False 

1590 ) 

1591 ), 

1592 expectedAllBiases, 

1593 ) 

1594 self.assertCountEqual( 

1595 list( 

1596 subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=True) 

1597 ), 

1598 expectedDeduplicatedBiases, 

1599 ) 

1600 # Materialize just the bias dataset queries. 

1601 with subsetDataIds.findDatasets( 

1602 bias, collections=["imported_r", "imported_g"], findFirst=False 

1603 ).materialize() as biases: 

1604 self.assertCountEqual(list(biases), expectedAllBiases) 

1605 with subsetDataIds.findDatasets( 

1606 bias, collections=["imported_r", "imported_g"], findFirst=True 

1607 ).materialize() as biases: 

1608 self.assertCountEqual(list(biases), expectedDeduplicatedBiases) 

1609 # Materialize the subset data ID query, but not the dataset 

1610 # queries. 

1611 with subsetDataIds.materialize() as subsetDataIds: 

1612 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph) 

1613 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds) 

1614 self.assertCountEqual( 

1615 list( 

1616 subsetDataIds.findDatasets( 

1617 bias, collections=["imported_r", "imported_g"], findFirst=False 

1618 ) 

1619 ), 

1620 expectedAllBiases, 

1621 ) 

1622 self.assertCountEqual( 

1623 list( 

1624 subsetDataIds.findDatasets( 

1625 bias, collections=["imported_r", "imported_g"], findFirst=True 

1626 ) 

1627 ), 

1628 expectedDeduplicatedBiases, 

1629 ) 

1630 # Materialize the bias dataset queries, too, so now we're 

1631 # materializing every single step. 

1632 with subsetDataIds.findDatasets( 

1633 bias, collections=["imported_r", "imported_g"], findFirst=False 

1634 ).materialize() as biases: 

1635 self.assertCountEqual(list(biases), expectedAllBiases) 

1636 with subsetDataIds.findDatasets( 

1637 bias, collections=["imported_r", "imported_g"], findFirst=True 

1638 ).materialize() as biases: 

1639 self.assertCountEqual(list(biases), expectedDeduplicatedBiases) 

1640 

1641 def testStorageClassPropagation(self): 

1642 """Test that queries for datasets respect the storage class passed in 

1643 as part of a full dataset type. 

1644 """ 

1645 registry = self.makeRegistry() 

1646 self.loadData(registry, "base.yaml") 

1647 dataset_type_in_registry = DatasetType( 

1648 "tbl", dimensions=["instrument"], storageClass="Packages", universe=registry.dimensions 

1649 ) 

1650 registry.registerDatasetType(dataset_type_in_registry) 

1651 run = "run1" 

1652 registry.registerRun(run) 

1653 (inserted_ref,) = registry.insertDatasets( 

1654 dataset_type_in_registry, [registry.expandDataId(instrument="Cam1")], run=run 

1655 ) 

1656 self.assertEqual(inserted_ref.datasetType, dataset_type_in_registry) 

1657 query_dataset_type = DatasetType( 

1658 "tbl", dimensions=["instrument"], storageClass="StructuredDataDict", universe=registry.dimensions 

1659 ) 

1660 self.assertNotEqual(dataset_type_in_registry, query_dataset_type) 

1661 query_datasets_result = registry.queryDatasets(query_dataset_type, collections=[run]) 

1662 self.assertEqual(query_datasets_result.parentDatasetType, query_dataset_type) # type: ignore 

1663 (query_datasets_ref,) = query_datasets_result 

1664 self.assertEqual(query_datasets_ref.datasetType, query_dataset_type) 

1665 query_data_ids_find_datasets_result = registry.queryDataIds(["instrument"]).findDatasets( 

1666 query_dataset_type, collections=[run] 

1667 ) 

1668 self.assertEqual(query_data_ids_find_datasets_result.parentDatasetType, query_dataset_type) 

1669 (query_data_ids_find_datasets_ref,) = query_data_ids_find_datasets_result 

1670 self.assertEqual(query_data_ids_find_datasets_ref.datasetType, query_dataset_type) 

1671 query_dataset_types_result = registry.queryDatasetTypes(query_dataset_type) 

1672 self.assertEqual(list(query_dataset_types_result), [query_dataset_type]) 

1673 find_dataset_ref = registry.findDataset(query_dataset_type, instrument="Cam1", collections=[run]) 

1674 self.assertEqual(find_dataset_ref.datasetType, query_dataset_type) 

1675 

1676 def testEmptyDimensionsQueries(self): 

1677 """Test Query and QueryResults objects in the case where there are no 

1678 dimensions. 

1679 """ 

1680 # Set up test data: one dataset type, two runs, one dataset in each. 

1681 registry = self.makeRegistry() 

1682 self.loadData(registry, "base.yaml") 

1683 schema = DatasetType("schema", dimensions=registry.dimensions.empty, storageClass="Catalog") 

1684 registry.registerDatasetType(schema) 

1685 dataId = DataCoordinate.makeEmpty(registry.dimensions) 

1686 run1 = "run1" 

1687 run2 = "run2" 

1688 registry.registerRun(run1) 

1689 registry.registerRun(run2) 

1690 (dataset1,) = registry.insertDatasets(schema, dataIds=[dataId], run=run1) 

1691 (dataset2,) = registry.insertDatasets(schema, dataIds=[dataId], run=run2) 

1692 # Query directly for both of the datasets, and each one, one at a time. 

1693 self.checkQueryResults( 

1694 registry.queryDatasets(schema, collections=[run1, run2], findFirst=False), [dataset1, dataset2] 

1695 ) 

1696 self.checkQueryResults( 

1697 registry.queryDatasets(schema, collections=[run1, run2], findFirst=True), 

1698 [dataset1], 

1699 ) 

1700 self.checkQueryResults( 

1701 registry.queryDatasets(schema, collections=[run2, run1], findFirst=True), 

1702 [dataset2], 

1703 ) 

1704 # Query for data IDs with no dimensions. 

1705 dataIds = registry.queryDataIds([]) 

1706 self.checkQueryResults(dataIds, [dataId]) 

1707 # Use queried data IDs to find the datasets. 

1708 self.checkQueryResults( 

1709 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False), 

1710 [dataset1, dataset2], 

1711 ) 

1712 self.checkQueryResults( 

1713 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), 

1714 [dataset1], 

1715 ) 

1716 self.checkQueryResults( 

1717 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), 

1718 [dataset2], 

1719 ) 

1720 # Now materialize the data ID query results and repeat those tests. 

1721 with dataIds.materialize() as dataIds: 

1722 self.checkQueryResults(dataIds, [dataId]) 

1723 self.checkQueryResults( 

1724 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), 

1725 [dataset1], 

1726 ) 

1727 self.checkQueryResults( 

1728 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), 

1729 [dataset2], 

1730 ) 

1731 # Query for non-empty data IDs, then subset that to get the empty one. 

1732 # Repeat the above tests starting from that. 

1733 dataIds = registry.queryDataIds(["instrument"]).subset(registry.dimensions.empty, unique=True) 

1734 self.checkQueryResults(dataIds, [dataId]) 

1735 self.checkQueryResults( 

1736 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False), 

1737 [dataset1, dataset2], 

1738 ) 

1739 self.checkQueryResults( 

1740 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), 

1741 [dataset1], 

1742 ) 

1743 self.checkQueryResults( 

1744 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), 

1745 [dataset2], 

1746 ) 

1747 with dataIds.materialize() as dataIds: 

1748 self.checkQueryResults(dataIds, [dataId]) 

1749 self.checkQueryResults( 

1750 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False), 

1751 [dataset1, dataset2], 

1752 ) 

1753 self.checkQueryResults( 

1754 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), 

1755 [dataset1], 

1756 ) 

1757 self.checkQueryResults( 

1758 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), 

1759 [dataset2], 

1760 ) 

1761 # Query for non-empty data IDs, then materialize, then subset to get 

1762 # the empty one. Repeat again. 

1763 with registry.queryDataIds(["instrument"]).materialize() as nonEmptyDataIds: 

1764 dataIds = nonEmptyDataIds.subset(registry.dimensions.empty, unique=True) 

1765 self.checkQueryResults(dataIds, [dataId]) 

1766 self.checkQueryResults( 

1767 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False), 

1768 [dataset1, dataset2], 

1769 ) 

1770 self.checkQueryResults( 

1771 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), 

1772 [dataset1], 

1773 ) 

1774 self.checkQueryResults( 

1775 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), 

1776 [dataset2], 

1777 ) 

1778 with dataIds.materialize() as dataIds: 

1779 self.checkQueryResults(dataIds, [dataId]) 

1780 self.checkQueryResults( 

1781 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False), 

1782 [dataset1, dataset2], 

1783 ) 

1784 self.checkQueryResults( 

1785 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), 

1786 [dataset1], 

1787 ) 

1788 self.checkQueryResults( 

1789 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), 

1790 [dataset2], 

1791 ) 

1792 # Query for non-empty data IDs with a constraint on an empty-data-ID 

1793 # dataset that exists. 

1794 dataIds = registry.queryDataIds(["instrument"], datasets="schema", collections=...) 

1795 self.checkQueryResults( 

1796 dataIds.subset(unique=True), 

1797 [DataCoordinate.standardize(instrument="Cam1", universe=registry.dimensions)], 

1798 ) 

1799 # Again query for non-empty data IDs with a constraint on empty-data-ID 

1800 # datasets, but when the datasets don't exist. We delete the existing 

1801 # dataset and query just that collection rather than creating a new 

1802 # empty collection because this is a bit less likely for our build-time 

1803 # logic to shortcut-out (via the collection summaries), and such a 

1804 # shortcut would make this test a bit more trivial than we'd like. 

1805 registry.removeDatasets([dataset2]) 

1806 dataIds = registry.queryDataIds(["instrument"], datasets="schema", collections=run2) 

1807 self.checkQueryResults(dataIds, []) 

1808 

1809 def testDimensionDataModifications(self): 

1810 """Test that modifying dimension records via: 

1811 syncDimensionData(..., update=True) and 

1812 insertDimensionData(..., replace=True) works as expected, even in the 

1813 presence of datasets using those dimensions and spatial overlap 

1814 relationships. 

1815 """ 

1816 

1817 def unpack_range_set(ranges: lsst.sphgeom.RangeSet) -> Iterator[int]: 

1818 """Unpack a sphgeom.RangeSet into the integers it contains.""" 

1819 for begin, end in ranges: 

1820 yield from range(begin, end) 

1821 

1822 def range_set_hull( 

1823 ranges: lsst.sphgeom.RangeSet, 

1824 pixelization: lsst.sphgeom.HtmPixelization, 

1825 ) -> lsst.sphgeom.ConvexPolygon: 

1826 """Create a ConvexPolygon hull of the region defined by a set of 

1827 HTM pixelization index ranges. 

1828 """ 

1829 points = [] 

1830 for index in unpack_range_set(ranges): 

1831 points.extend(pixelization.triangle(index).getVertices()) 

1832 return lsst.sphgeom.ConvexPolygon(points) 

1833 

1834 # Use HTM to set up an initial parent region (one arbitrary trixel) 

1835 # and four child regions (the trixels within the parent at the next 

1836 # level. We'll use the parent as a tract/visit region and the children 

1837 # as its patch/visit_detector regions. 

1838 registry = self.makeRegistry() 

1839 htm6 = registry.dimensions.skypix["htm"][6].pixelization 

1840 commonSkyPix = registry.dimensions.commonSkyPix.pixelization 

1841 index = 12288 

1842 child_ranges_small = lsst.sphgeom.RangeSet(index).scaled(4) 

1843 assert htm6.universe().contains(child_ranges_small) 

1844 child_regions_small = [htm6.triangle(i) for i in unpack_range_set(child_ranges_small)] 

1845 parent_region_small = lsst.sphgeom.ConvexPolygon( 

1846 list(itertools.chain.from_iterable(c.getVertices() for c in child_regions_small)) 

1847 ) 

1848 assert all(parent_region_small.contains(c) for c in child_regions_small) 

1849 # Make a larger version of each child region, defined to be the set of 

1850 # htm6 trixels that overlap the original's bounding circle. Make a new 

1851 # parent that's the convex hull of the new children. 

1852 child_regions_large = [ 

1853 range_set_hull(htm6.envelope(c.getBoundingCircle()), htm6) for c in child_regions_small 

1854 ] 

1855 assert all( 

1856 large.contains(small) 

1857 for large, small in zip(child_regions_large, child_regions_small, strict=True) 

1858 ) 

1859 parent_region_large = lsst.sphgeom.ConvexPolygon( 

1860 list(itertools.chain.from_iterable(c.getVertices() for c in child_regions_large)) 

1861 ) 

1862 assert all(parent_region_large.contains(c) for c in child_regions_large) 

1863 assert parent_region_large.contains(parent_region_small) 

1864 assert not parent_region_small.contains(parent_region_large) 

1865 assert not all(parent_region_small.contains(c) for c in child_regions_large) 

1866 # Find some commonSkyPix indices that overlap the large regions but not 

1867 # overlap the small regions. We use commonSkyPix here to make sure the 

1868 # real tests later involve what's in the database, not just post-query 

1869 # filtering of regions. 

1870 child_difference_indices = [] 

1871 for large, small in zip(child_regions_large, child_regions_small, strict=True): 

1872 difference = list(unpack_range_set(commonSkyPix.envelope(large) - commonSkyPix.envelope(small))) 

1873 assert difference, "if this is empty, we can't test anything useful with these regions" 

1874 assert all( 

1875 not commonSkyPix.triangle(d).isDisjointFrom(large) 

1876 and commonSkyPix.triangle(d).isDisjointFrom(small) 

1877 for d in difference 

1878 ) 

1879 child_difference_indices.append(difference) 

1880 parent_difference_indices = list( 

1881 unpack_range_set( 

1882 commonSkyPix.envelope(parent_region_large) - commonSkyPix.envelope(parent_region_small) 

1883 ) 

1884 ) 

1885 assert parent_difference_indices, "if this is empty, we can't test anything useful with these regions" 

1886 assert all( 

1887 ( 

1888 not commonSkyPix.triangle(d).isDisjointFrom(parent_region_large) 

1889 and commonSkyPix.triangle(d).isDisjointFrom(parent_region_small) 

1890 ) 

1891 for d in parent_difference_indices 

1892 ) 

1893 # Now that we've finally got those regions, we'll insert the large ones 

1894 # as tract/patch dimension records. 

1895 skymap_name = "testing_v1" 

1896 registry.insertDimensionData( 

1897 "skymap", 

1898 { 

1899 "name": skymap_name, 

1900 "hash": bytes([42]), 

1901 "tract_max": 1, 

1902 "patch_nx_max": 2, 

1903 "patch_ny_max": 2, 

1904 }, 

1905 ) 

1906 registry.insertDimensionData("tract", {"skymap": skymap_name, "id": 0, "region": parent_region_large}) 

1907 registry.insertDimensionData( 

1908 "patch", 

1909 *[ 

1910 {"skymap": skymap_name, "tract": 0, "id": n, "cell_x": n % 2, "cell_y": n // 2, "region": c} 

1911 for n, c in enumerate(child_regions_large) 

1912 ], 

1913 ) 

1914 # Add at dataset that uses these dimensions to make sure that modifying 

1915 # them doesn't disrupt foreign keys (need to make sure DB doesn't 

1916 # implement insert with replace=True as delete-then-insert). 

1917 dataset_type = DatasetType( 

1918 "coadd", 

1919 dimensions=["tract", "patch"], 

1920 universe=registry.dimensions, 

1921 storageClass="Exposure", 

1922 ) 

1923 registry.registerDatasetType(dataset_type) 

1924 registry.registerCollection("the_run", CollectionType.RUN) 

1925 registry.insertDatasets( 

1926 dataset_type, 

1927 [{"skymap": skymap_name, "tract": 0, "patch": 2}], 

1928 run="the_run", 

1929 ) 

1930 # Query for tracts and patches that overlap some "difference" htm9 

1931 # pixels; there should be overlaps, because the database has 

1932 # the "large" suite of regions. 

1933 self.assertEqual( 

1934 {0}, 

1935 { 

1936 data_id["tract"] 

1937 for data_id in registry.queryDataIds( 

1938 ["tract"], 

1939 skymap=skymap_name, 

1940 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]}, 

1941 ) 

1942 }, 

1943 ) 

1944 for patch_id, patch_difference_indices in enumerate(child_difference_indices): 

1945 self.assertIn( 

1946 patch_id, 

1947 { 

1948 data_id["patch"] 

1949 for data_id in registry.queryDataIds( 

1950 ["patch"], 

1951 skymap=skymap_name, 

1952 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]}, 

1953 ) 

1954 }, 

1955 ) 

1956 # Use sync to update the tract region and insert to update the regions 

1957 # of the patches, to the "small" suite. 

1958 updated = registry.syncDimensionData( 

1959 "tract", 

1960 {"skymap": skymap_name, "id": 0, "region": parent_region_small}, 

1961 update=True, 

1962 ) 

1963 self.assertEqual(updated, {"region": parent_region_large}) 

1964 registry.insertDimensionData( 

1965 "patch", 

1966 *[ 

1967 {"skymap": skymap_name, "tract": 0, "id": n, "cell_x": n % 2, "cell_y": n // 2, "region": c} 

1968 for n, c in enumerate(child_regions_small) 

1969 ], 

1970 replace=True, 

1971 ) 

1972 # Query again; there now should be no such overlaps, because the 

1973 # database has the "small" suite of regions. 

1974 self.assertFalse( 

1975 set( 

1976 registry.queryDataIds( 

1977 ["tract"], 

1978 skymap=skymap_name, 

1979 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]}, 

1980 ) 

1981 ) 

1982 ) 

1983 for patch_id, patch_difference_indices in enumerate(child_difference_indices): 

1984 self.assertNotIn( 

1985 patch_id, 

1986 { 

1987 data_id["patch"] 

1988 for data_id in registry.queryDataIds( 

1989 ["patch"], 

1990 skymap=skymap_name, 

1991 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]}, 

1992 ) 

1993 }, 

1994 ) 

1995 # Update back to the large regions and query one more time. 

1996 updated = registry.syncDimensionData( 

1997 "tract", 

1998 {"skymap": skymap_name, "id": 0, "region": parent_region_large}, 

1999 update=True, 

2000 ) 

2001 self.assertEqual(updated, {"region": parent_region_small}) 

2002 registry.insertDimensionData( 

2003 "patch", 

2004 *[ 

2005 {"skymap": skymap_name, "tract": 0, "id": n, "cell_x": n % 2, "cell_y": n // 2, "region": c} 

2006 for n, c in enumerate(child_regions_large) 

2007 ], 

2008 replace=True, 

2009 ) 

2010 self.assertEqual( 

2011 {0}, 

2012 { 

2013 data_id["tract"] 

2014 for data_id in registry.queryDataIds( 

2015 ["tract"], 

2016 skymap=skymap_name, 

2017 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]}, 

2018 ) 

2019 }, 

2020 ) 

2021 for patch_id, patch_difference_indices in enumerate(child_difference_indices): 

2022 self.assertIn( 

2023 patch_id, 

2024 { 

2025 data_id["patch"] 

2026 for data_id in registry.queryDataIds( 

2027 ["patch"], 

2028 skymap=skymap_name, 

2029 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]}, 

2030 ) 

2031 }, 

2032 ) 

2033 

2034 def testCalibrationCollections(self): 

2035 """Test operations on `~CollectionType.CALIBRATION` collections, 

2036 including `SqlRegistry.certify`, `SqlRegistry.decertify`, 

2037 `SqlRegistry.findDataset`, and 

2038 `DataCoordinateQueryResults.findRelatedDatasets`. 

2039 """ 

2040 # Setup - make a Registry, fill it with some datasets in 

2041 # non-calibration collections. 

2042 registry = self.makeRegistry() 

2043 self.loadData(registry, "base.yaml") 

2044 self.loadData(registry, "datasets.yaml") 

2045 # Set up some timestamps. 

2046 t1 = astropy.time.Time("2020-01-01T01:00:00", format="isot", scale="tai") 

2047 t2 = astropy.time.Time("2020-01-01T02:00:00", format="isot", scale="tai") 

2048 t3 = astropy.time.Time("2020-01-01T03:00:00", format="isot", scale="tai") 

2049 t4 = astropy.time.Time("2020-01-01T04:00:00", format="isot", scale="tai") 

2050 t5 = astropy.time.Time("2020-01-01T05:00:00", format="isot", scale="tai") 

2051 allTimespans = [ 

2052 Timespan(a, b) for a, b in itertools.combinations([None, t1, t2, t3, t4, t5, None], r=2) 

2053 ] 

2054 # Insert some exposure records with timespans between each sequential 

2055 # pair of those. 

2056 registry.insertDimensionData( 

2057 "exposure", 

2058 { 

2059 "instrument": "Cam1", 

2060 "id": 0, 

2061 "obs_id": "zero", 

2062 "physical_filter": "Cam1-G", 

2063 "timespan": Timespan(t1, t2), 

2064 }, 

2065 { 

2066 "instrument": "Cam1", 

2067 "id": 1, 

2068 "obs_id": "one", 

2069 "physical_filter": "Cam1-G", 

2070 "timespan": Timespan(t2, t3), 

2071 }, 

2072 { 

2073 "instrument": "Cam1", 

2074 "id": 2, 

2075 "obs_id": "two", 

2076 "physical_filter": "Cam1-G", 

2077 "timespan": Timespan(t3, t4), 

2078 }, 

2079 { 

2080 "instrument": "Cam1", 

2081 "id": 3, 

2082 "obs_id": "three", 

2083 "physical_filter": "Cam1-G", 

2084 "timespan": Timespan(t4, t5), 

2085 }, 

2086 ) 

2087 # Get references to some datasets. 

2088 bias2a = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g") 

2089 bias3a = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g") 

2090 bias2b = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r") 

2091 bias3b = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r") 

2092 # Register the main calibration collection we'll be working with. 

2093 collection = "Cam1/calibs/default" 

2094 registry.registerCollection(collection, type=CollectionType.CALIBRATION) 

2095 # Cannot associate into a calibration collection (no timespan). 

2096 with self.assertRaises(CollectionTypeError): 

2097 registry.associate(collection, [bias2a]) 

2098 # Certify 2a dataset with [t2, t4) validity. 

2099 registry.certify(collection, [bias2a], Timespan(begin=t2, end=t4)) 

2100 # Test that we can query for this dataset via the new collection, both 

2101 # on its own and with a RUN collection. 

2102 self.assertEqual( 

2103 set(registry.queryDatasets("bias", findFirst=False, collections=collection)), 

2104 {bias2a}, 

2105 ) 

2106 self.assertEqual( 

2107 set(registry.queryDatasets("bias", findFirst=False, collections=[collection, "imported_r"])), 

2108 { 

2109 bias2a, 

2110 bias2b, 

2111 bias3b, 

2112 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

2113 }, 

2114 ) 

2115 self.assertEqual( 

2116 set(registry.queryDataIds("detector", datasets="bias", collections=collection)), 

2117 {registry.expandDataId(instrument="Cam1", detector=2)}, 

2118 ) 

2119 self.assertEqual( 

2120 set(registry.queryDataIds("detector", datasets="bias", collections=[collection, "imported_r"])), 

2121 { 

2122 registry.expandDataId(instrument="Cam1", detector=2), 

2123 registry.expandDataId(instrument="Cam1", detector=3), 

2124 registry.expandDataId(instrument="Cam1", detector=4), 

2125 }, 

2126 ) 

2127 self.assertEqual( 

2128 set( 

2129 registry.queryDataIds(["exposure", "detector"]).findRelatedDatasets( 

2130 "bias", findFirst=True, collections=[collection] 

2131 ) 

2132 ), 

2133 { 

2134 (registry.expandDataId(instrument="Cam1", detector=2, exposure=1), bias2a), 

2135 (registry.expandDataId(instrument="Cam1", detector=2, exposure=2), bias2a), 

2136 }, 

2137 ) 

2138 self.assertEqual( 

2139 set( 

2140 registry.queryDataIds( 

2141 ["exposure", "detector"], instrument="Cam1", detector=2 

2142 ).findRelatedDatasets("bias", findFirst=True, collections=[collection, "imported_r"]) 

2143 ), 

2144 { 

2145 (registry.expandDataId(instrument="Cam1", detector=2, exposure=1), bias2a), 

2146 (registry.expandDataId(instrument="Cam1", detector=2, exposure=2), bias2a), 

2147 (registry.expandDataId(instrument="Cam1", detector=2, exposure=0), bias2b), 

2148 (registry.expandDataId(instrument="Cam1", detector=2, exposure=3), bias2b), 

2149 }, 

2150 ) 

2151 

2152 # We should not be able to certify 2b with anything overlapping that 

2153 # window. 

2154 with self.assertRaises(ConflictingDefinitionError): 

2155 registry.certify(collection, [bias2b], Timespan(begin=None, end=t3)) 

2156 with self.assertRaises(ConflictingDefinitionError): 

2157 registry.certify(collection, [bias2b], Timespan(begin=None, end=t5)) 

2158 with self.assertRaises(ConflictingDefinitionError): 

2159 registry.certify(collection, [bias2b], Timespan(begin=t1, end=t3)) 

2160 with self.assertRaises(ConflictingDefinitionError): 

2161 registry.certify(collection, [bias2b], Timespan(begin=t1, end=t5)) 

2162 with self.assertRaises(ConflictingDefinitionError): 

2163 registry.certify(collection, [bias2b], Timespan(begin=t1, end=None)) 

2164 with self.assertRaises(ConflictingDefinitionError): 

2165 registry.certify(collection, [bias2b], Timespan(begin=t2, end=t3)) 

2166 with self.assertRaises(ConflictingDefinitionError): 

2167 registry.certify(collection, [bias2b], Timespan(begin=t2, end=t5)) 

2168 with self.assertRaises(ConflictingDefinitionError): 

2169 registry.certify(collection, [bias2b], Timespan(begin=t2, end=None)) 

2170 # We should be able to certify 3a with a range overlapping that window, 

2171 # because it's for a different detector. 

2172 # We'll certify 3a over [t1, t3). 

2173 registry.certify(collection, [bias3a], Timespan(begin=t1, end=t3)) 

2174 # Now we'll certify 2b and 3b together over [t4, ∞). 

2175 registry.certify(collection, [bias2b, bias3b], Timespan(begin=t4, end=None)) 

2176 

2177 # Fetch all associations and check that they are what we expect. 

2178 self.assertCountEqual( 

2179 list( 

2180 registry.queryDatasetAssociations( 

2181 "bias", 

2182 collections=[collection, "imported_g", "imported_r"], 

2183 ) 

2184 ), 

2185 [ 

2186 DatasetAssociation( 

2187 ref=registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"), 

2188 collection="imported_g", 

2189 timespan=None, 

2190 ), 

2191 DatasetAssociation( 

2192 ref=registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

2193 collection="imported_r", 

2194 timespan=None, 

2195 ), 

2196 DatasetAssociation(ref=bias2a, collection="imported_g", timespan=None), 

2197 DatasetAssociation(ref=bias3a, collection="imported_g", timespan=None), 

2198 DatasetAssociation(ref=bias2b, collection="imported_r", timespan=None), 

2199 DatasetAssociation(ref=bias3b, collection="imported_r", timespan=None), 

2200 DatasetAssociation(ref=bias2a, collection=collection, timespan=Timespan(begin=t2, end=t4)), 

2201 DatasetAssociation(ref=bias3a, collection=collection, timespan=Timespan(begin=t1, end=t3)), 

2202 DatasetAssociation(ref=bias2b, collection=collection, timespan=Timespan(begin=t4, end=None)), 

2203 DatasetAssociation(ref=bias3b, collection=collection, timespan=Timespan(begin=t4, end=None)), 

2204 ], 

2205 ) 

2206 

2207 class Ambiguous: 

2208 """Tag class to denote lookups that should be ambiguous.""" 

2209 

2210 pass 

2211 

2212 def assertLookup( 

2213 detector: int, timespan: Timespan, expected: DatasetRef | type[Ambiguous] | None 

2214 ) -> None: 

2215 """Local function that asserts that a bias lookup returns the given 

2216 expected result. 

2217 """ 

2218 if expected is Ambiguous: 

2219 with self.assertRaises((DatasetTypeError, LookupError)): 

2220 registry.findDataset( 

2221 "bias", 

2222 collections=collection, 

2223 instrument="Cam1", 

2224 detector=detector, 

2225 timespan=timespan, 

2226 ) 

2227 else: 

2228 self.assertEqual( 

2229 expected, 

2230 registry.findDataset( 

2231 "bias", 

2232 collections=collection, 

2233 instrument="Cam1", 

2234 detector=detector, 

2235 timespan=timespan, 

2236 ), 

2237 ) 

2238 

2239 # Systematically test lookups against expected results. 

2240 assertLookup(detector=2, timespan=Timespan(None, t1), expected=None) 

2241 assertLookup(detector=2, timespan=Timespan(None, t2), expected=None) 

2242 assertLookup(detector=2, timespan=Timespan(None, t3), expected=bias2a) 

2243 assertLookup(detector=2, timespan=Timespan(None, t4), expected=bias2a) 

2244 assertLookup(detector=2, timespan=Timespan(None, t5), expected=Ambiguous) 

2245 assertLookup(detector=2, timespan=Timespan(None, None), expected=Ambiguous) 

2246 assertLookup(detector=2, timespan=Timespan(t1, t2), expected=None) 

2247 assertLookup(detector=2, timespan=Timespan(t1, t3), expected=bias2a) 

2248 assertLookup(detector=2, timespan=Timespan(t1, t4), expected=bias2a) 

2249 assertLookup(detector=2, timespan=Timespan(t1, t5), expected=Ambiguous) 

2250 assertLookup(detector=2, timespan=Timespan(t1, None), expected=Ambiguous) 

2251 assertLookup(detector=2, timespan=Timespan(t2, t3), expected=bias2a) 

2252 assertLookup(detector=2, timespan=Timespan(t2, t4), expected=bias2a) 

2253 assertLookup(detector=2, timespan=Timespan(t2, t5), expected=Ambiguous) 

2254 assertLookup(detector=2, timespan=Timespan(t2, None), expected=Ambiguous) 

2255 assertLookup(detector=2, timespan=Timespan(t3, t4), expected=bias2a) 

2256 assertLookup(detector=2, timespan=Timespan(t3, t5), expected=Ambiguous) 

2257 assertLookup(detector=2, timespan=Timespan(t3, None), expected=Ambiguous) 

2258 assertLookup(detector=2, timespan=Timespan(t4, t5), expected=bias2b) 

2259 assertLookup(detector=2, timespan=Timespan(t4, None), expected=bias2b) 

2260 assertLookup(detector=2, timespan=Timespan(t5, None), expected=bias2b) 

2261 assertLookup(detector=3, timespan=Timespan(None, t1), expected=None) 

2262 assertLookup(detector=3, timespan=Timespan(None, t2), expected=bias3a) 

2263 assertLookup(detector=3, timespan=Timespan(None, t3), expected=bias3a) 

2264 assertLookup(detector=3, timespan=Timespan(None, t4), expected=bias3a) 

2265 assertLookup(detector=3, timespan=Timespan(None, t5), expected=Ambiguous) 

2266 assertLookup(detector=3, timespan=Timespan(None, None), expected=Ambiguous) 

2267 assertLookup(detector=3, timespan=Timespan(t1, t2), expected=bias3a) 

2268 assertLookup(detector=3, timespan=Timespan(t1, t3), expected=bias3a) 

2269 assertLookup(detector=3, timespan=Timespan(t1, t4), expected=bias3a) 

2270 assertLookup(detector=3, timespan=Timespan(t1, t5), expected=Ambiguous) 

2271 assertLookup(detector=3, timespan=Timespan(t1, None), expected=Ambiguous) 

2272 assertLookup(detector=3, timespan=Timespan(t2, t3), expected=bias3a) 

2273 assertLookup(detector=3, timespan=Timespan(t2, t4), expected=bias3a) 

2274 assertLookup(detector=3, timespan=Timespan(t2, t5), expected=Ambiguous) 

2275 assertLookup(detector=3, timespan=Timespan(t2, None), expected=Ambiguous) 

2276 assertLookup(detector=3, timespan=Timespan(t3, t4), expected=None) 

2277 assertLookup(detector=3, timespan=Timespan(t3, t5), expected=bias3b) 

2278 assertLookup(detector=3, timespan=Timespan(t3, None), expected=bias3b) 

2279 assertLookup(detector=3, timespan=Timespan(t4, t5), expected=bias3b) 

2280 assertLookup(detector=3, timespan=Timespan(t4, None), expected=bias3b) 

2281 assertLookup(detector=3, timespan=Timespan(t5, None), expected=bias3b) 

2282 

2283 # Test lookups via temporal joins to exposures. 

2284 self.assertEqual( 

2285 set( 

2286 registry.queryDataIds( 

2287 ["exposure", "detector"], instrument="Cam1", detector=2 

2288 ).findRelatedDatasets("bias", collections=[collection]) 

2289 ), 

2290 { 

2291 (registry.expandDataId(instrument="Cam1", exposure=1, detector=2), bias2a), 

2292 (registry.expandDataId(instrument="Cam1", exposure=2, detector=2), bias2a), 

2293 (registry.expandDataId(instrument="Cam1", exposure=3, detector=2), bias2b), 

2294 }, 

2295 ) 

2296 self.assertEqual( 

2297 set( 

2298 registry.queryDataIds( 

2299 ["exposure", "detector"], instrument="Cam1", detector=3 

2300 ).findRelatedDatasets("bias", collections=[collection]) 

2301 ), 

2302 { 

2303 (registry.expandDataId(instrument="Cam1", exposure=0, detector=3), bias3a), 

2304 (registry.expandDataId(instrument="Cam1", exposure=1, detector=3), bias3a), 

2305 (registry.expandDataId(instrument="Cam1", exposure=3, detector=3), bias3b), 

2306 }, 

2307 ) 

2308 self.assertEqual( 

2309 set( 

2310 registry.queryDataIds( 

2311 ["exposure", "detector"], instrument="Cam1", detector=2 

2312 ).findRelatedDatasets("bias", collections=[collection, "imported_g"]) 

2313 ), 

2314 { 

2315 (registry.expandDataId(instrument="Cam1", exposure=0, detector=2), bias2a), 

2316 (registry.expandDataId(instrument="Cam1", exposure=1, detector=2), bias2a), 

2317 (registry.expandDataId(instrument="Cam1", exposure=2, detector=2), bias2a), 

2318 (registry.expandDataId(instrument="Cam1", exposure=3, detector=2), bias2b), 

2319 }, 

2320 ) 

2321 self.assertEqual( 

2322 set( 

2323 registry.queryDataIds( 

2324 ["exposure", "detector"], instrument="Cam1", detector=3 

2325 ).findRelatedDatasets("bias", collections=[collection, "imported_g"]) 

2326 ), 

2327 { 

2328 (registry.expandDataId(instrument="Cam1", exposure=0, detector=3), bias3a), 

2329 (registry.expandDataId(instrument="Cam1", exposure=1, detector=3), bias3a), 

2330 (registry.expandDataId(instrument="Cam1", exposure=2, detector=3), bias3a), 

2331 (registry.expandDataId(instrument="Cam1", exposure=3, detector=3), bias3b), 

2332 }, 

2333 ) 

2334 

2335 # Decertify [t3, t5) for all data IDs, and do test lookups again. 

2336 # This should truncate bias2a to [t2, t3), leave bias3a unchanged at 

2337 # [t1, t3), and truncate bias2b and bias3b to [t5, ∞). 

2338 registry.decertify(collection=collection, datasetType="bias", timespan=Timespan(t3, t5)) 

2339 assertLookup(detector=2, timespan=Timespan(None, t1), expected=None) 

2340 assertLookup(detector=2, timespan=Timespan(None, t2), expected=None) 

2341 assertLookup(detector=2, timespan=Timespan(None, t3), expected=bias2a) 

2342 assertLookup(detector=2, timespan=Timespan(None, t4), expected=bias2a) 

2343 assertLookup(detector=2, timespan=Timespan(None, t5), expected=bias2a) 

2344 assertLookup(detector=2, timespan=Timespan(None, None), expected=Ambiguous) 

2345 assertLookup(detector=2, timespan=Timespan(t1, t2), expected=None) 

2346 assertLookup(detector=2, timespan=Timespan(t1, t3), expected=bias2a) 

2347 assertLookup(detector=2, timespan=Timespan(t1, t4), expected=bias2a) 

2348 assertLookup(detector=2, timespan=Timespan(t1, t5), expected=bias2a) 

2349 assertLookup(detector=2, timespan=Timespan(t1, None), expected=Ambiguous) 

2350 assertLookup(detector=2, timespan=Timespan(t2, t3), expected=bias2a) 

2351 assertLookup(detector=2, timespan=Timespan(t2, t4), expected=bias2a) 

2352 assertLookup(detector=2, timespan=Timespan(t2, t5), expected=bias2a) 

2353 assertLookup(detector=2, timespan=Timespan(t2, None), expected=Ambiguous) 

2354 assertLookup(detector=2, timespan=Timespan(t3, t4), expected=None) 

2355 assertLookup(detector=2, timespan=Timespan(t3, t5), expected=None) 

2356 assertLookup(detector=2, timespan=Timespan(t3, None), expected=bias2b) 

2357 assertLookup(detector=2, timespan=Timespan(t4, t5), expected=None) 

2358 assertLookup(detector=2, timespan=Timespan(t4, None), expected=bias2b) 

2359 assertLookup(detector=2, timespan=Timespan(t5, None), expected=bias2b) 

2360 assertLookup(detector=3, timespan=Timespan(None, t1), expected=None) 

2361 assertLookup(detector=3, timespan=Timespan(None, t2), expected=bias3a) 

2362 assertLookup(detector=3, timespan=Timespan(None, t3), expected=bias3a) 

2363 assertLookup(detector=3, timespan=Timespan(None, t4), expected=bias3a) 

2364 assertLookup(detector=3, timespan=Timespan(None, t5), expected=bias3a) 

2365 assertLookup(detector=3, timespan=Timespan(None, None), expected=Ambiguous) 

2366 assertLookup(detector=3, timespan=Timespan(t1, t2), expected=bias3a) 

2367 assertLookup(detector=3, timespan=Timespan(t1, t3), expected=bias3a) 

2368 assertLookup(detector=3, timespan=Timespan(t1, t4), expected=bias3a) 

2369 assertLookup(detector=3, timespan=Timespan(t1, t5), expected=bias3a) 

2370 assertLookup(detector=3, timespan=Timespan(t1, None), expected=Ambiguous) 

2371 assertLookup(detector=3, timespan=Timespan(t2, t3), expected=bias3a) 

2372 assertLookup(detector=3, timespan=Timespan(t2, t4), expected=bias3a) 

2373 assertLookup(detector=3, timespan=Timespan(t2, t5), expected=bias3a) 

2374 assertLookup(detector=3, timespan=Timespan(t2, None), expected=Ambiguous) 

2375 assertLookup(detector=3, timespan=Timespan(t3, t4), expected=None) 

2376 assertLookup(detector=3, timespan=Timespan(t3, t5), expected=None) 

2377 assertLookup(detector=3, timespan=Timespan(t3, None), expected=bias3b) 

2378 assertLookup(detector=3, timespan=Timespan(t4, t5), expected=None) 

2379 assertLookup(detector=3, timespan=Timespan(t4, None), expected=bias3b) 

2380 assertLookup(detector=3, timespan=Timespan(t5, None), expected=bias3b) 

2381 

2382 # Decertify everything, this time with explicit data IDs, then check 

2383 # that no lookups succeed. 

2384 registry.decertify( 

2385 collection, 

2386 "bias", 

2387 Timespan(None, None), 

2388 dataIds=[ 

2389 dict(instrument="Cam1", detector=2), 

2390 dict(instrument="Cam1", detector=3), 

2391 ], 

2392 ) 

2393 for detector in (2, 3): 

2394 for timespan in allTimespans: 

2395 assertLookup(detector=detector, timespan=timespan, expected=None) 

2396 # Certify bias2a and bias3a over (-∞, ∞), check that all lookups return 

2397 # those. 

2398 registry.certify( 

2399 collection, 

2400 [bias2a, bias3a], 

2401 Timespan(None, None), 

2402 ) 

2403 for timespan in allTimespans: 

2404 assertLookup(detector=2, timespan=timespan, expected=bias2a) 

2405 assertLookup(detector=3, timespan=timespan, expected=bias3a) 

2406 # Decertify just bias2 over [t2, t4). 

2407 # This should split a single certification row into two (and leave the 

2408 # other existing row, for bias3a, alone). 

2409 registry.decertify( 

2410 collection, "bias", Timespan(t2, t4), dataIds=[dict(instrument="Cam1", detector=2)] 

2411 ) 

2412 for timespan in allTimespans: 

2413 assertLookup(detector=3, timespan=timespan, expected=bias3a) 

2414 overlapsBefore = timespan.overlaps(Timespan(None, t2)) 

2415 overlapsAfter = timespan.overlaps(Timespan(t4, None)) 

2416 if overlapsBefore and overlapsAfter: 

2417 expected = Ambiguous 

2418 elif overlapsBefore or overlapsAfter: 

2419 expected = bias2a 

2420 else: 

2421 expected = None 

2422 assertLookup(detector=2, timespan=timespan, expected=expected) 

2423 

2424 def testSkipCalibs(self): 

2425 """Test how queries handle skipping of calibration collections.""" 

2426 registry = self.makeRegistry() 

2427 self.loadData(registry, "base.yaml") 

2428 self.loadData(registry, "datasets.yaml") 

2429 

2430 coll_calib = "Cam1/calibs/default" 

2431 registry.registerCollection(coll_calib, type=CollectionType.CALIBRATION) 

2432 

2433 # Add all biases to the calibration collection. 

2434 # Without this, the logic that prunes dataset subqueries based on 

2435 # datasetType-collection summary information will fire before the logic 

2436 # we want to test below. This is a good thing (it avoids the dreaded 

2437 # NotImplementedError a bit more often) everywhere but here. 

2438 registry.certify(coll_calib, registry.queryDatasets("bias", collections=...), Timespan(None, None)) 

2439 

2440 coll_list = [coll_calib, "imported_g", "imported_r"] 

2441 chain = "Cam1/chain" 

2442 registry.registerCollection(chain, type=CollectionType.CHAINED) 

2443 registry.setCollectionChain(chain, coll_list) 

2444 

2445 # explicit list will raise if findFirst=True or there are temporal 

2446 # dimensions 

2447 with self.assertRaises(NotImplementedError): 

2448 registry.queryDatasets("bias", collections=coll_list, findFirst=True) 

2449 with self.assertRaises(NotImplementedError): 

2450 registry.queryDataIds( 

2451 ["instrument", "detector", "exposure"], datasets="bias", collections=coll_list 

2452 ).count() 

2453 

2454 # chain will skip 

2455 datasets = list(registry.queryDatasets("bias", collections=chain)) 

2456 self.assertGreater(len(datasets), 0) 

2457 

2458 dataIds = list(registry.queryDataIds(["instrument", "detector"], datasets="bias", collections=chain)) 

2459 self.assertGreater(len(dataIds), 0) 

2460 

2461 # glob will skip too 

2462 datasets = list(registry.queryDatasets("bias", collections="*d*")) 

2463 self.assertGreater(len(datasets), 0) 

2464 

2465 # regular expression will skip too 

2466 pattern = re.compile(".*") 

2467 datasets = list(registry.queryDatasets("bias", collections=pattern)) 

2468 self.assertGreater(len(datasets), 0) 

2469 

2470 # ellipsis should work as usual 

2471 datasets = list(registry.queryDatasets("bias", collections=...)) 

2472 self.assertGreater(len(datasets), 0) 

2473 

2474 # few tests with findFirst 

2475 datasets = list(registry.queryDatasets("bias", collections=chain, findFirst=True)) 

2476 self.assertGreater(len(datasets), 0) 

2477 

2478 def testIngestTimeQuery(self): 

2479 registry = self.makeRegistry() 

2480 self.loadData(registry, "base.yaml") 

2481 dt0 = datetime.utcnow() 

2482 self.loadData(registry, "datasets.yaml") 

2483 dt1 = datetime.utcnow() 

2484 

2485 datasets = list(registry.queryDatasets(..., collections=...)) 

2486 len0 = len(datasets) 

2487 self.assertGreater(len0, 0) 

2488 

2489 where = "ingest_date > T'2000-01-01'" 

2490 datasets = list(registry.queryDatasets(..., collections=..., where=where)) 

2491 len1 = len(datasets) 

2492 self.assertEqual(len0, len1) 

2493 

2494 # no one will ever use this piece of software in 30 years 

2495 where = "ingest_date > T'2050-01-01'" 

2496 datasets = list(registry.queryDatasets(..., collections=..., where=where)) 

2497 len2 = len(datasets) 

2498 self.assertEqual(len2, 0) 

2499 

2500 # Check more exact timing to make sure there is no 37 seconds offset 

2501 # (after fixing DM-30124). SQLite time precision is 1 second, make 

2502 # sure that we don't test with higher precision. 

2503 tests = [ 

2504 # format: (timestamp, operator, expected_len) 

2505 (dt0 - timedelta(seconds=1), ">", len0), 

2506 (dt0 - timedelta(seconds=1), "<", 0), 

2507 (dt1 + timedelta(seconds=1), "<", len0), 

2508 (dt1 + timedelta(seconds=1), ">", 0), 

2509 ] 

2510 for dt, op, expect_len in tests: 

2511 dt_str = dt.isoformat(sep=" ") 

2512 

2513 where = f"ingest_date {op} T'{dt_str}'" 

2514 datasets = list(registry.queryDatasets(..., collections=..., where=where)) 

2515 self.assertEqual(len(datasets), expect_len) 

2516 

2517 # same with bind using datetime or astropy Time 

2518 where = f"ingest_date {op} ingest_time" 

2519 datasets = list( 

2520 registry.queryDatasets(..., collections=..., where=where, bind={"ingest_time": dt}) 

2521 ) 

2522 self.assertEqual(len(datasets), expect_len) 

2523 

2524 dt_astropy = astropy.time.Time(dt, format="datetime") 

2525 datasets = list( 

2526 registry.queryDatasets(..., collections=..., where=where, bind={"ingest_time": dt_astropy}) 

2527 ) 

2528 self.assertEqual(len(datasets), expect_len) 

2529 

2530 def testTimespanQueries(self): 

2531 """Test query expressions involving timespans.""" 

2532 registry = self.makeRegistry() 

2533 self.loadData(registry, "hsc-rc2-subset.yaml") 

2534 # All exposures in the database; mapping from ID to timespan. 

2535 visits = {record.id: record.timespan for record in registry.queryDimensionRecords("visit")} 

2536 # Just those IDs, sorted (which is also temporal sorting, because HSC 

2537 # exposure IDs are monotonically increasing). 

2538 ids = sorted(visits.keys()) 

2539 self.assertGreater(len(ids), 20) 

2540 # Pick some quasi-random indexes into `ids` to play with. 

2541 i1 = int(len(ids) * 0.1) 

2542 i2 = int(len(ids) * 0.3) 

2543 i3 = int(len(ids) * 0.6) 

2544 i4 = int(len(ids) * 0.8) 

2545 # Extract some times from those: just before the beginning of i1 (which 

2546 # should be after the end of the exposure before), exactly the 

2547 # beginning of i2, just after the beginning of i3 (and before its end), 

2548 # and the exact end of i4. 

2549 t1 = visits[ids[i1]].begin - astropy.time.TimeDelta(1.0, format="sec") 

2550 self.assertGreater(t1, visits[ids[i1 - 1]].end) 

2551 t2 = visits[ids[i2]].begin 

2552 t3 = visits[ids[i3]].begin + astropy.time.TimeDelta(1.0, format="sec") 

2553 self.assertLess(t3, visits[ids[i3]].end) 

2554 t4 = visits[ids[i4]].end 

2555 # Make sure those are actually in order. 

2556 self.assertEqual([t1, t2, t3, t4], sorted([t4, t3, t2, t1])) 

2557 

2558 bind = { 

2559 "t1": t1, 

2560 "t2": t2, 

2561 "t3": t3, 

2562 "t4": t4, 

2563 "ts23": Timespan(t2, t3), 

2564 } 

2565 

2566 def query(where): 

2567 """Return results as a sorted, deduplicated list of visit IDs.""" 

2568 return sorted( 

2569 { 

2570 dataId["visit"] 

2571 for dataId in registry.queryDataIds("visit", instrument="HSC", bind=bind, where=where) 

2572 } 

2573 ) 

2574 

2575 # Try a bunch of timespan queries, mixing up the bounds themselves, 

2576 # where they appear in the expression, and how we get the timespan into 

2577 # the expression. 

2578 

2579 # t1 is before the start of i1, so this should not include i1. 

2580 self.assertEqual(ids[:i1], query("visit.timespan OVERLAPS (null, t1)")) 

2581 # t2 is exactly at the start of i2, but ends are exclusive, so these 

2582 # should not include i2. 

2583 self.assertEqual(ids[i1:i2], query("(t1, t2) OVERLAPS visit.timespan")) 

2584 self.assertEqual(ids[:i2], query("visit.timespan < (t2, t4)")) 

2585 # t3 is in the middle of i3, so this should include i3. 

2586 self.assertEqual(ids[i2 : i3 + 1], query("visit.timespan OVERLAPS ts23")) 

2587 # This one should not include t3 by the same reasoning. 

2588 self.assertEqual(ids[i3 + 1 :], query("visit.timespan > (t1, t3)")) 

2589 # t4 is exactly at the end of i4, so this should include i4. 

2590 self.assertEqual(ids[i3 : i4 + 1], query(f"visit.timespan OVERLAPS (T'{t3.tai.isot}', t4)")) 

2591 # i4's upper bound of t4 is exclusive so this should not include t4. 

2592 self.assertEqual(ids[i4 + 1 :], query("visit.timespan OVERLAPS (t4, NULL)")) 

2593 

2594 # Now some timespan vs. time scalar queries. 

2595 self.assertEqual(ids[:i2], query("visit.timespan < t2")) 

2596 self.assertEqual(ids[:i2], query("t2 > visit.timespan")) 

2597 self.assertEqual(ids[i3 + 1 :], query("visit.timespan > t3")) 

2598 self.assertEqual(ids[i3 + 1 :], query("t3 < visit.timespan")) 

2599 self.assertEqual(ids[i3 : i3 + 1], query("visit.timespan OVERLAPS t3")) 

2600 self.assertEqual(ids[i3 : i3 + 1], query(f"T'{t3.tai.isot}' OVERLAPS visit.timespan")) 

2601 

2602 # Empty timespans should not overlap anything. 

2603 self.assertEqual([], query("visit.timespan OVERLAPS (t3, t2)")) 

2604 

2605 def testCollectionSummaries(self): 

2606 """Test recording and retrieval of collection summaries.""" 

2607 self.maxDiff = None 

2608 registry = self.makeRegistry() 

2609 # Importing datasets from yaml should go through the code path where 

2610 # we update collection summaries as we insert datasets. 

2611 self.loadData(registry, "base.yaml") 

2612 self.loadData(registry, "datasets.yaml") 

2613 flat = registry.getDatasetType("flat") 

2614 expected1 = CollectionSummary() 

2615 expected1.dataset_types.add(registry.getDatasetType("bias")) 

2616 expected1.add_data_ids( 

2617 flat, [DataCoordinate.standardize(instrument="Cam1", universe=registry.dimensions)] 

2618 ) 

2619 self.assertEqual(registry.getCollectionSummary("imported_g"), expected1) 

2620 self.assertEqual(registry.getCollectionSummary("imported_r"), expected1) 

2621 # Create a chained collection with both of the imported runs; the 

2622 # summary should be the same, because it's a union with itself. 

2623 chain = "chain" 

2624 registry.registerCollection(chain, CollectionType.CHAINED) 

2625 registry.setCollectionChain(chain, ["imported_r", "imported_g"]) 

2626 self.assertEqual(registry.getCollectionSummary(chain), expected1) 

2627 # Associate flats only into a tagged collection and a calibration 

2628 # collection to check summaries of those. 

2629 tag = "tag" 

2630 registry.registerCollection(tag, CollectionType.TAGGED) 

2631 registry.associate(tag, registry.queryDatasets(flat, collections="imported_g")) 

2632 calibs = "calibs" 

2633 registry.registerCollection(calibs, CollectionType.CALIBRATION) 

2634 registry.certify( 

2635 calibs, registry.queryDatasets(flat, collections="imported_g"), timespan=Timespan(None, None) 

2636 ) 

2637 expected2 = expected1.copy() 

2638 expected2.dataset_types.discard("bias") 

2639 self.assertEqual(registry.getCollectionSummary(tag), expected2) 

2640 self.assertEqual(registry.getCollectionSummary(calibs), expected2) 

2641 # Explicitly calling SqlRegistry.refresh() should load those same 

2642 # summaries, via a totally different code path. 

2643 registry.refresh() 

2644 self.assertEqual(registry.getCollectionSummary("imported_g"), expected1) 

2645 self.assertEqual(registry.getCollectionSummary("imported_r"), expected1) 

2646 self.assertEqual(registry.getCollectionSummary(tag), expected2) 

2647 self.assertEqual(registry.getCollectionSummary(calibs), expected2) 

2648 

2649 def testBindInQueryDatasets(self): 

2650 """Test that the bind parameter is correctly forwarded in 

2651 queryDatasets recursion. 

2652 """ 

2653 registry = self.makeRegistry() 

2654 # Importing datasets from yaml should go through the code path where 

2655 # we update collection summaries as we insert datasets. 

2656 self.loadData(registry, "base.yaml") 

2657 self.loadData(registry, "datasets.yaml") 

2658 self.assertEqual( 

2659 set(registry.queryDatasets("flat", band="r", collections=...)), 

2660 set(registry.queryDatasets("flat", where="band=my_band", bind={"my_band": "r"}, collections=...)), 

2661 ) 

2662 

2663 def testQueryIntRangeExpressions(self): 

2664 """Test integer range expressions in ``where`` arguments. 

2665 

2666 Note that our expressions use inclusive stop values, unlike Python's. 

2667 """ 

2668 registry = self.makeRegistry() 

2669 self.loadData(registry, "base.yaml") 

2670 self.assertEqual( 

2671 set(registry.queryDataIds(["detector"], instrument="Cam1", where="detector IN (1..2)")), 

2672 {registry.expandDataId(instrument="Cam1", detector=n) for n in [1, 2]}, 

2673 ) 

2674 self.assertEqual( 

2675 set(registry.queryDataIds(["detector"], instrument="Cam1", where="detector IN (1..4:2)")), 

2676 {registry.expandDataId(instrument="Cam1", detector=n) for n in [1, 3]}, 

2677 ) 

2678 self.assertEqual( 

2679 set(registry.queryDataIds(["detector"], instrument="Cam1", where="detector IN (2..4:2)")), 

2680 {registry.expandDataId(instrument="Cam1", detector=n) for n in [2, 4]}, 

2681 ) 

2682 

2683 def testQueryResultSummaries(self): 

2684 """Test summary methods like `count`, `any`, and `explain_no_results` 

2685 on `DataCoordinateQueryResults` and `DatasetQueryResults`. 

2686 """ 

2687 registry = self.makeRegistry() 

2688 self.loadData(registry, "base.yaml") 

2689 self.loadData(registry, "datasets.yaml") 

2690 self.loadData(registry, "spatial.yaml") 

2691 # Default test dataset has two collections, each with both flats and 

2692 # biases. Add a new collection with only biases. 

2693 registry.registerCollection("biases", CollectionType.TAGGED) 

2694 registry.associate("biases", registry.queryDatasets("bias", collections=["imported_g"])) 

2695 # First query yields two results, and involves no postprocessing. 

2696 query1 = registry.queryDataIds(["physical_filter"], band="r") 

2697 self.assertTrue(query1.any(execute=False, exact=False)) 

2698 self.assertTrue(query1.any(execute=True, exact=False)) 

2699 self.assertTrue(query1.any(execute=True, exact=True)) 

2700 self.assertEqual(query1.count(exact=False), 2) 

2701 self.assertEqual(query1.count(exact=True), 2) 

2702 self.assertFalse(list(query1.explain_no_results())) 

2703 # Second query should yield no results, which we should see when 

2704 # we attempt to expand the data ID. 

2705 query2 = registry.queryDataIds(["physical_filter"], band="h") 

2706 # There's no execute=False, exact=Fals test here because the behavior 

2707 # not something we want to guarantee in this case (and exact=False 

2708 # says either answer is legal). 

2709 self.assertFalse(query2.any(execute=True, exact=False)) 

2710 self.assertFalse(query2.any(execute=True, exact=True)) 

2711 self.assertEqual(query2.count(exact=False), 0) 

2712 self.assertEqual(query2.count(exact=True), 0) 

2713 self.assertTrue(list(query2.explain_no_results())) 

2714 # These queries yield no results due to various problems that can be 

2715 # spotted prior to execution, yielding helpful diagnostics. 

2716 base_query = registry.queryDataIds(["detector", "physical_filter"]) 

2717 queries_and_snippets = [ 

2718 ( 

2719 # Dataset type name doesn't match any existing dataset types. 

2720 registry.queryDatasets("nonexistent", collections=...), 

2721 ["nonexistent"], 

2722 ), 

2723 ( 

2724 # Dataset type object isn't registered. 

2725 registry.queryDatasets( 

2726 DatasetType( 

2727 "nonexistent", 

2728 dimensions=["instrument"], 

2729 universe=registry.dimensions, 

2730 storageClass="Image", 

2731 ), 

2732 collections=..., 

2733 ), 

2734 ["nonexistent"], 

2735 ), 

2736 ( 

2737 # No datasets of this type in this collection. 

2738 registry.queryDatasets("flat", collections=["biases"]), 

2739 ["flat", "biases"], 

2740 ), 

2741 ( 

2742 # No datasets of this type in this collection. 

2743 base_query.findDatasets("flat", collections=["biases"]), 

2744 ["flat", "biases"], 

2745 ), 

2746 ( 

2747 # No collections matching at all. 

2748 registry.queryDatasets("flat", collections=re.compile("potato.+")), 

2749 ["potato"], 

2750 ), 

2751 ] 

2752 # The behavior of these additional queries is slated to change in the 

2753 # future, so we also check for deprecation warnings. 

2754 with self.assertWarns(FutureWarning): 

2755 queries_and_snippets.append( 

2756 ( 

2757 # Dataset type name doesn't match any existing dataset 

2758 # types. 

2759 registry.queryDataIds(["detector"], datasets=["nonexistent"], collections=...), 

2760 ["nonexistent"], 

2761 ) 

2762 ) 

2763 with self.assertWarns(FutureWarning): 

2764 queries_and_snippets.append( 

2765 ( 

2766 # Dataset type name doesn't match any existing dataset 

2767 # types. 

2768 registry.queryDimensionRecords("detector", datasets=["nonexistent"], collections=...), 

2769 ["nonexistent"], 

2770 ) 

2771 ) 

2772 for query, snippets in queries_and_snippets: 

2773 self.assertFalse(query.any(execute=False, exact=False)) 

2774 self.assertFalse(query.any(execute=True, exact=False)) 

2775 self.assertFalse(query.any(execute=True, exact=True)) 

2776 self.assertEqual(query.count(exact=False), 0) 

2777 self.assertEqual(query.count(exact=True), 0) 

2778 messages = list(query.explain_no_results()) 

2779 self.assertTrue(messages) 

2780 # Want all expected snippets to appear in at least one message. 

2781 self.assertTrue( 

2782 any( 

2783 all(snippet in message for snippet in snippets) for message in query.explain_no_results() 

2784 ), 

2785 messages, 

2786 ) 

2787 

2788 # This query does yield results, but should also emit a warning because 

2789 # dataset type patterns to queryDataIds is deprecated; just look for 

2790 # the warning. 

2791 with self.assertWarns(FutureWarning): 

2792 registry.queryDataIds(["detector"], datasets=re.compile("^nonexistent$"), collections=...) 

2793 

2794 # These queries yield no results due to problems that can be identified 

2795 # by cheap follow-up queries, yielding helpful diagnostics. 

2796 for query, snippets in [ 

2797 ( 

2798 # No records for one of the involved dimensions. 

2799 registry.queryDataIds(["subfilter"]), 

2800 ["no rows", "subfilter"], 

2801 ), 

2802 ( 

2803 # No records for one of the involved dimensions. 

2804 registry.queryDimensionRecords("subfilter"), 

2805 ["no rows", "subfilter"], 

2806 ), 

2807 ]: 

2808 self.assertFalse(query.any(execute=True, exact=False)) 

2809 self.assertFalse(query.any(execute=True, exact=True)) 

2810 self.assertEqual(query.count(exact=True), 0) 

2811 messages = list(query.explain_no_results()) 

2812 self.assertTrue(messages) 

2813 # Want all expected snippets to appear in at least one message. 

2814 self.assertTrue( 

2815 any( 

2816 all(snippet in message for snippet in snippets) for message in query.explain_no_results() 

2817 ), 

2818 messages, 

2819 ) 

2820 

2821 # This query yields four overlaps in the database, but one is filtered 

2822 # out in postprocessing. The count queries aren't accurate because 

2823 # they don't account for duplication that happens due to an internal 

2824 # join against commonSkyPix. 

2825 query3 = registry.queryDataIds(["visit", "tract"], instrument="Cam1", skymap="SkyMap1") 

2826 self.assertEqual( 

2827 { 

2828 DataCoordinate.standardize( 

2829 instrument="Cam1", 

2830 skymap="SkyMap1", 

2831 visit=v, 

2832 tract=t, 

2833 universe=registry.dimensions, 

2834 ) 

2835 for v, t in [(1, 0), (2, 0), (2, 1)] 

2836 }, 

2837 set(query3), 

2838 ) 

2839 self.assertTrue(query3.any(execute=False, exact=False)) 

2840 self.assertTrue(query3.any(execute=True, exact=False)) 

2841 self.assertTrue(query3.any(execute=True, exact=True)) 

2842 self.assertGreaterEqual(query3.count(exact=False), 4) 

2843 self.assertGreaterEqual(query3.count(exact=True, discard=True), 3) 

2844 self.assertFalse(list(query3.explain_no_results())) 

2845 # This query yields overlaps in the database, but all are filtered 

2846 # out in postprocessing. The count queries again aren't very useful. 

2847 # We have to use `where=` here to avoid an optimization that 

2848 # (currently) skips the spatial postprocess-filtering because it 

2849 # recognizes that no spatial join is necessary. That's not ideal, but 

2850 # fixing it is out of scope for this ticket. 

2851 query4 = registry.queryDataIds( 

2852 ["visit", "tract"], 

2853 instrument="Cam1", 

2854 skymap="SkyMap1", 

2855 where="visit=1 AND detector=1 AND tract=0 AND patch=4", 

2856 ) 

2857 self.assertFalse(set(query4)) 

2858 self.assertTrue(query4.any(execute=False, exact=False)) 

2859 self.assertTrue(query4.any(execute=True, exact=False)) 

2860 self.assertFalse(query4.any(execute=True, exact=True)) 

2861 self.assertGreaterEqual(query4.count(exact=False), 1) 

2862 self.assertEqual(query4.count(exact=True, discard=True), 0) 

2863 messages = query4.explain_no_results() 

2864 self.assertTrue(messages) 

2865 self.assertTrue(any("overlap" in message for message in messages)) 

2866 # This query should yield results from one dataset type but not the 

2867 # other, which is not registered. 

2868 query5 = registry.queryDatasets(["bias", "nonexistent"], collections=["biases"]) 

2869 self.assertTrue(set(query5)) 

2870 self.assertTrue(query5.any(execute=False, exact=False)) 

2871 self.assertTrue(query5.any(execute=True, exact=False)) 

2872 self.assertTrue(query5.any(execute=True, exact=True)) 

2873 self.assertGreaterEqual(query5.count(exact=False), 1) 

2874 self.assertGreaterEqual(query5.count(exact=True), 1) 

2875 self.assertFalse(list(query5.explain_no_results())) 

2876 # This query applies a selection that yields no results, fully in the 

2877 # database. Explaining why it fails involves traversing the relation 

2878 # tree and running a LIMIT 1 query at each level that has the potential 

2879 # to remove rows. 

2880 query6 = registry.queryDimensionRecords( 

2881 "detector", where="detector.purpose = 'no-purpose'", instrument="Cam1" 

2882 ) 

2883 self.assertEqual(query6.count(exact=True), 0) 

2884 messages = query6.explain_no_results() 

2885 self.assertTrue(messages) 

2886 self.assertTrue(any("no-purpose" in message for message in messages)) 

2887 

2888 def testQueryDataIdsExpressionError(self): 

2889 """Test error checking of 'where' expressions in queryDataIds.""" 

2890 registry = self.makeRegistry() 

2891 self.loadData(registry, "base.yaml") 

2892 bind = {"time": astropy.time.Time("2020-01-01T01:00:00", format="isot", scale="tai")} 

2893 with self.assertRaisesRegex(LookupError, r"No dimension element with name 'foo' in 'foo\.bar'\."): 

2894 registry.queryDataIds(["detector"], where="foo.bar = 12") 

2895 with self.assertRaisesRegex( 

2896 LookupError, "Dimension element name cannot be inferred in this context." 

2897 ): 

2898 registry.queryDataIds(["detector"], where="timespan.end < time", bind=bind) 

2899 

2900 def testQueryDataIdsOrderBy(self): 

2901 """Test order_by and limit on result returned by queryDataIds().""" 

2902 registry = self.makeRegistry() 

2903 self.loadData(registry, "base.yaml") 

2904 self.loadData(registry, "datasets.yaml") 

2905 self.loadData(registry, "spatial.yaml") 

2906 

2907 def do_query(dimensions=("visit", "tract"), datasets=None, collections=None): 

2908 return registry.queryDataIds( 

2909 dimensions, datasets=datasets, collections=collections, instrument="Cam1", skymap="SkyMap1" 

2910 ) 

2911 

2912 Test = namedtuple( 

2913 "testQueryDataIdsOrderByTest", 

2914 ("order_by", "keys", "result", "limit", "datasets", "collections"), 

2915 defaults=(None, None, None), 

2916 ) 

2917 

2918 test_data = ( 

2919 Test("tract,visit", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))), 

2920 Test("-tract,visit", "tract,visit", ((1, 2), (1, 2), (0, 1), (0, 1), (0, 2), (0, 2))), 

2921 Test("tract,-visit", "tract,visit", ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2))), 

2922 Test("-tract,-visit", "tract,visit", ((1, 2), (1, 2), (0, 2), (0, 2), (0, 1), (0, 1))), 

2923 Test( 

2924 "tract.id,visit.id", 

2925 "tract,visit", 

2926 ((0, 1), (0, 1), (0, 2)), 

2927 limit=(3,), 

2928 ), 

2929 Test("-tract,-visit", "tract,visit", ((1, 2), (1, 2), (0, 2)), limit=(3,)), 

2930 Test("tract,visit", "tract,visit", ((0, 2), (1, 2), (1, 2)), limit=(3, 3)), 

2931 Test("-tract,-visit", "tract,visit", ((0, 1),), limit=(3, 5)), 

2932 Test( 

2933 "tract,visit.exposure_time", "tract,visit", ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2)) 

2934 ), 

2935 Test( 

2936 "-tract,-visit.exposure_time", "tract,visit", ((1, 2), (1, 2), (0, 1), (0, 1), (0, 2), (0, 2)) 

2937 ), 

2938 Test("tract,-exposure_time", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))), 

2939 Test("tract,visit.name", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))), 

2940 Test( 

2941 "tract,-timespan.begin,timespan.end", 

2942 "tract,visit", 

2943 ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2)), 

2944 ), 

2945 Test("visit.day_obs,exposure.day_obs", "visit,exposure", ()), 

2946 Test("visit.timespan.begin,-exposure.timespan.begin", "visit,exposure", ()), 

2947 Test( 

2948 "tract,detector", 

2949 "tract,detector", 

2950 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)), 

2951 datasets="flat", 

2952 collections="imported_r", 

2953 ), 

2954 Test( 

2955 "tract,detector.full_name", 

2956 "tract,detector", 

2957 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)), 

2958 datasets="flat", 

2959 collections="imported_r", 

2960 ), 

2961 Test( 

2962 "tract,detector.raft,detector.name_in_raft", 

2963 "tract,detector", 

2964 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)), 

2965 datasets="flat", 

2966 collections="imported_r", 

2967 ), 

2968 ) 

2969 

2970 for test in test_data: 

2971 order_by = test.order_by.split(",") 

2972 keys = test.keys.split(",") 

2973 query = do_query(keys, test.datasets, test.collections).order_by(*order_by) 

2974 if test.limit is not None: 

2975 query = query.limit(*test.limit) 

2976 dataIds = tuple(tuple(dataId[k] for k in keys) for dataId in query) 

2977 self.assertEqual(dataIds, test.result) 

2978 

2979 # and materialize 

2980 query = do_query(keys).order_by(*order_by) 

2981 if test.limit is not None: 

2982 query = query.limit(*test.limit) 

2983 with self.assertRaises(RelationalAlgebraError): 

2984 with query.materialize(): 

2985 pass 

2986 

2987 # errors in a name 

2988 for order_by in ("", "-"): 

2989 with self.assertRaisesRegex(ValueError, "Empty dimension name in ORDER BY"): 

2990 list(do_query().order_by(order_by)) 

2991 

2992 for order_by in ("undimension.name", "-undimension.name"): 

2993 with self.assertRaisesRegex(ValueError, "Unknown dimension element 'undimension'"): 

2994 list(do_query().order_by(order_by)) 

2995 

2996 for order_by in ("attract", "-attract"): 

2997 with self.assertRaisesRegex(ValueError, "Metadata 'attract' cannot be found in any dimension"): 

2998 list(do_query().order_by(order_by)) 

2999 

3000 with self.assertRaisesRegex(ValueError, "Metadata 'exposure_time' exists in more than one dimension"): 

3001 list(do_query(("exposure", "visit")).order_by("exposure_time")) 

3002 

3003 with self.assertRaisesRegex( 

3004 ValueError, 

3005 r"Timespan exists in more than one dimension element \(exposure, visit\); " 

3006 r"qualify timespan with specific dimension name\.", 

3007 ): 

3008 list(do_query(("exposure", "visit")).order_by("timespan.begin")) 

3009 

3010 with self.assertRaisesRegex( 

3011 ValueError, "Cannot find any temporal dimension element for 'timespan.begin'" 

3012 ): 

3013 list(do_query("tract").order_by("timespan.begin")) 

3014 

3015 with self.assertRaisesRegex(ValueError, "Cannot use 'timespan.begin' with non-temporal element"): 

3016 list(do_query("tract").order_by("tract.timespan.begin")) 

3017 

3018 with self.assertRaisesRegex(ValueError, "Field 'name' does not exist in 'tract'."): 

3019 list(do_query("tract").order_by("tract.name")) 

3020 

3021 with self.assertRaisesRegex( 

3022 ValueError, r"Unknown dimension element 'timestamp'; perhaps you meant 'timespan.begin'\?" 

3023 ): 

3024 list(do_query("visit").order_by("timestamp.begin")) 

3025 

3026 def testQueryDataIdsGovernorExceptions(self): 

3027 """Test exceptions raised by queryDataIds() for incorrect governors.""" 

3028 registry = self.makeRegistry() 

3029 self.loadData(registry, "base.yaml") 

3030 self.loadData(registry, "datasets.yaml") 

3031 self.loadData(registry, "spatial.yaml") 

3032 

3033 def do_query(dimensions, dataId=None, where="", bind=None, **kwargs): 

3034 return registry.queryDataIds(dimensions, dataId=dataId, where=where, bind=bind, **kwargs) 

3035 

3036 Test = namedtuple( 

3037 "testQueryDataIdExceptionsTest", 

3038 ("dimensions", "dataId", "where", "bind", "kwargs", "exception", "count"), 

3039 defaults=(None, None, None, {}, None, 0), 

3040 ) 

3041 

3042 test_data = ( 

3043 Test("tract,visit", count=6), 

3044 Test("tract,visit", kwargs={"instrument": "Cam1", "skymap": "SkyMap1"}, count=6), 

3045 Test( 

3046 "tract,visit", kwargs={"instrument": "Cam2", "skymap": "SkyMap1"}, exception=DataIdValueError 

3047 ), 

3048 Test("tract,visit", dataId={"instrument": "Cam1", "skymap": "SkyMap1"}, count=6), 

3049 Test( 

3050 "tract,visit", dataId={"instrument": "Cam1", "skymap": "SkyMap2"}, exception=DataIdValueError 

3051 ), 

3052 Test("tract,visit", where="instrument='Cam1' AND skymap='SkyMap1'", count=6), 

3053 Test("tract,visit", where="instrument='Cam1' AND skymap='SkyMap5'", exception=DataIdValueError), 

3054 Test( 

3055 "tract,visit", 

3056 where="instrument=cam AND skymap=map", 

3057 bind={"cam": "Cam1", "map": "SkyMap1"}, 

3058 count=6, 

3059 ), 

3060 Test( 

3061 "tract,visit", 

3062 where="instrument=cam AND skymap=map", 

3063 bind={"cam": "Cam", "map": "SkyMap"}, 

3064 exception=DataIdValueError, 

3065 ), 

3066 ) 

3067 

3068 for test in test_data: 

3069 dimensions = test.dimensions.split(",") 

3070 if test.exception: 

3071 with self.assertRaises(test.exception): 

3072 do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs).count() 

3073 else: 

3074 query = do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs) 

3075 self.assertEqual(query.count(discard=True), test.count) 

3076 

3077 # and materialize 

3078 if test.exception: 

3079 with self.assertRaises(test.exception): 

3080 query = do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs) 

3081 with query.materialize() as materialized: 

3082 materialized.count(discard=True) 

3083 else: 

3084 query = do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs) 

3085 with query.materialize() as materialized: 

3086 self.assertEqual(materialized.count(discard=True), test.count) 

3087 

3088 def testQueryDimensionRecordsOrderBy(self): 

3089 """Test order_by and limit on result returned by 

3090 queryDimensionRecords(). 

3091 """ 

3092 registry = self.makeRegistry() 

3093 self.loadData(registry, "base.yaml") 

3094 self.loadData(registry, "datasets.yaml") 

3095 self.loadData(registry, "spatial.yaml") 

3096 

3097 def do_query(element, datasets=None, collections=None): 

3098 return registry.queryDimensionRecords( 

3099 element, instrument="Cam1", datasets=datasets, collections=collections 

3100 ) 

3101 

3102 query = do_query("detector") 

3103 self.assertEqual(len(list(query)), 4) 

3104 

3105 Test = namedtuple( 

3106 "testQueryDataIdsOrderByTest", 

3107 ("element", "order_by", "result", "limit", "datasets", "collections"), 

3108 defaults=(None, None, None), 

3109 ) 

3110 

3111 test_data = ( 

3112 Test("detector", "detector", (1, 2, 3, 4)), 

3113 Test("detector", "-detector", (4, 3, 2, 1)), 

3114 Test("detector", "raft,-name_in_raft", (2, 1, 4, 3)), 

3115 Test("detector", "-detector.purpose", (4,), limit=(1,)), 

3116 Test("detector", "-purpose,detector.raft,name_in_raft", (2, 3), limit=(2, 2)), 

3117 Test("visit", "visit", (1, 2)), 

3118 Test("visit", "-visit.id", (2, 1)), 

3119 Test("visit", "zenith_angle", (1, 2)), 

3120 Test("visit", "-visit.name", (2, 1)), 

3121 Test("visit", "day_obs,-timespan.begin", (2, 1)), 

3122 ) 

3123 

3124 for test in test_data: 

3125 order_by = test.order_by.split(",") 

3126 query = do_query(test.element).order_by(*order_by) 

3127 if test.limit is not None: 

3128 query = query.limit(*test.limit) 

3129 dataIds = tuple(rec.id for rec in query) 

3130 self.assertEqual(dataIds, test.result) 

3131 

3132 # errors in a name 

3133 for order_by in ("", "-"): 

3134 with self.assertRaisesRegex(ValueError, "Empty dimension name in ORDER BY"): 

3135 list(do_query("detector").order_by(order_by)) 

3136 

3137 for order_by in ("undimension.name", "-undimension.name"): 

3138 with self.assertRaisesRegex(ValueError, "Element name mismatch: 'undimension'"): 

3139 list(do_query("detector").order_by(order_by)) 

3140 

3141 for order_by in ("attract", "-attract"): 

3142 with self.assertRaisesRegex(ValueError, "Field 'attract' does not exist in 'detector'."): 

3143 list(do_query("detector").order_by(order_by)) 

3144 

3145 for order_by in ("timestamp.begin", "-timestamp.begin"): 

3146 with self.assertRaisesRegex( 

3147 ValueError, 

3148 r"Element name mismatch: 'timestamp' instead of 'visit'; " 

3149 r"perhaps you meant 'timespan.begin'\?", 

3150 ): 

3151 list(do_query("visit").order_by(order_by)) 

3152 

3153 def testQueryDimensionRecordsExceptions(self): 

3154 """Test exceptions raised by queryDimensionRecords().""" 

3155 registry = self.makeRegistry() 

3156 self.loadData(registry, "base.yaml") 

3157 self.loadData(registry, "datasets.yaml") 

3158 self.loadData(registry, "spatial.yaml") 

3159 

3160 result = registry.queryDimensionRecords("detector") 

3161 self.assertEqual(result.count(), 4) 

3162 result = registry.queryDimensionRecords("detector", instrument="Cam1") 

3163 self.assertEqual(result.count(), 4) 

3164 result = registry.queryDimensionRecords("detector", dataId={"instrument": "Cam1"}) 

3165 self.assertEqual(result.count(), 4) 

3166 result = registry.queryDimensionRecords("detector", where="instrument='Cam1'") 

3167 self.assertEqual(result.count(), 4) 

3168 result = registry.queryDimensionRecords("detector", where="instrument=instr", bind={"instr": "Cam1"}) 

3169 self.assertEqual(result.count(), 4) 

3170 

3171 with self.assertRaisesRegex(DataIdValueError, "dimension instrument"): 

3172 result = registry.queryDimensionRecords("detector", instrument="NotCam1") 

3173 result.count() 

3174 

3175 with self.assertRaisesRegex(DataIdValueError, "dimension instrument"): 

3176 result = registry.queryDimensionRecords("detector", dataId={"instrument": "NotCam1"}) 

3177 result.count() 

3178 

3179 with self.assertRaisesRegex(DataIdValueError, "Unknown values specified for governor dimension"): 

3180 result = registry.queryDimensionRecords("detector", where="instrument='NotCam1'") 

3181 result.count() 

3182 

3183 with self.assertRaisesRegex(DataIdValueError, "Unknown values specified for governor dimension"): 

3184 result = registry.queryDimensionRecords( 

3185 "detector", where="instrument=instr", bind={"instr": "NotCam1"} 

3186 ) 

3187 result.count() 

3188 

3189 def testDatasetConstrainedDimensionRecordQueries(self): 

3190 """Test that queryDimensionRecords works even when given a dataset 

3191 constraint whose dimensions extend beyond the requested dimension 

3192 element's. 

3193 """ 

3194 registry = self.makeRegistry() 

3195 self.loadData(registry, "base.yaml") 

3196 self.loadData(registry, "datasets.yaml") 

3197 # Query for physical_filter dimension records, using a dataset that 

3198 # has both physical_filter and dataset dimensions. 

3199 records = registry.queryDimensionRecords( 

3200 "physical_filter", 

3201 datasets=["flat"], 

3202 collections="imported_r", 

3203 ) 

3204 self.assertEqual({record.name for record in records}, {"Cam1-R1", "Cam1-R2"}) 

3205 # Trying to constrain by all dataset types is an error. 

3206 with self.assertRaises(TypeError): 

3207 list(registry.queryDimensionRecords("physical_filter", datasets=..., collections="imported_r")) 

3208 

3209 def testSkyPixDatasetQueries(self): 

3210 """Test that we can build queries involving skypix dimensions as long 

3211 as a dataset type that uses those dimensions is included. 

3212 """ 

3213 registry = self.makeRegistry() 

3214 self.loadData(registry, "base.yaml") 

3215 dataset_type = DatasetType( 

3216 "a", dimensions=["htm7", "instrument"], universe=registry.dimensions, storageClass="int" 

3217 ) 

3218 registry.registerDatasetType(dataset_type) 

3219 run = "r" 

3220 registry.registerRun(run) 

3221 # First try queries where there are no datasets; the concern is whether 

3222 # we can even build and execute these queries without raising, even 

3223 # when "doomed" query shortcuts are in play. 

3224 self.assertFalse( 

3225 list(registry.queryDataIds(["htm7", "instrument"], datasets=dataset_type, collections=run)) 

3226 ) 

3227 self.assertFalse(list(registry.queryDatasets(dataset_type, collections=run))) 

3228 # Now add a dataset and see that we can get it back. 

3229 htm7 = registry.dimensions.skypix["htm"][7].pixelization 

3230 data_id = registry.expandDataId(instrument="Cam1", htm7=htm7.universe()[0][0]) 

3231 (ref,) = registry.insertDatasets(dataset_type, [data_id], run=run) 

3232 self.assertEqual( 

3233 set(registry.queryDataIds(["htm7", "instrument"], datasets=dataset_type, collections=run)), 

3234 {data_id}, 

3235 ) 

3236 self.assertEqual(set(registry.queryDatasets(dataset_type, collections=run)), {ref}) 

3237 

3238 def testDatasetIdFactory(self): 

3239 """Simple test for DatasetIdFactory, mostly to catch potential changes 

3240 in its API. 

3241 """ 

3242 registry = self.makeRegistry() 

3243 factory = DatasetIdFactory() 

3244 dataset_type = DatasetType( 

3245 "datasetType", 

3246 dimensions=["detector", "instrument"], 

3247 universe=registry.dimensions, 

3248 storageClass="int", 

3249 ) 

3250 run = "run" 

3251 data_id = DataCoordinate.standardize(instrument="Cam1", detector=1, graph=dataset_type.dimensions) 

3252 

3253 datasetId = factory.makeDatasetId(run, dataset_type, data_id, DatasetIdGenEnum.UNIQUE) 

3254 self.assertIsInstance(datasetId, uuid.UUID) 

3255 self.assertEqual(datasetId.version, 4) 

3256 

3257 datasetId = factory.makeDatasetId(run, dataset_type, data_id, DatasetIdGenEnum.DATAID_TYPE) 

3258 self.assertIsInstance(datasetId, uuid.UUID) 

3259 self.assertEqual(datasetId.version, 5) 

3260 

3261 datasetId = factory.makeDatasetId(run, dataset_type, data_id, DatasetIdGenEnum.DATAID_TYPE_RUN) 

3262 self.assertIsInstance(datasetId, uuid.UUID) 

3263 self.assertEqual(datasetId.version, 5) 

3264 

3265 def testExposureQueries(self): 

3266 """Test query methods using arguments sourced from the exposure log 

3267 service. 

3268 

3269 The most complete test dataset currently available to daf_butler tests 

3270 is hsc-rc2-subset.yaml export (which is unfortunately distinct from the 

3271 the lsst/rc2_subset GitHub repo), but that does not have 'exposure' 

3272 dimension records as it was focused on providing nontrivial spatial 

3273 overlaps between visit+detector and tract+patch. So in this test we 

3274 need to translate queries that originally used the exposure dimension 

3275 to use the (very similar) visit dimension instead. 

3276 """ 

3277 registry = self.makeRegistry() 

3278 self.loadData(registry, "hsc-rc2-subset.yaml") 

3279 self.assertEqual( 

3280 [ 

3281 record.id 

3282 for record in registry.queryDimensionRecords("visit", instrument="HSC") 

3283 .order_by("id") 

3284 .limit(5) 

3285 ], 

3286 [318, 322, 326, 330, 332], 

3287 ) 

3288 self.assertEqual( 

3289 [ 

3290 data_id["visit"] 

3291 for data_id in registry.queryDataIds(["visit"], instrument="HSC").order_by("id").limit(5) 

3292 ], 

3293 [318, 322, 326, 330, 332], 

3294 ) 

3295 self.assertEqual( 

3296 [ 

3297 record.id 

3298 for record in registry.queryDimensionRecords("detector", instrument="HSC") 

3299 .order_by("full_name") 

3300 .limit(5) 

3301 ], 

3302 [73, 72, 71, 70, 65], 

3303 ) 

3304 self.assertEqual( 

3305 [ 

3306 data_id["detector"] 

3307 for data_id in registry.queryDataIds(["detector"], instrument="HSC") 

3308 .order_by("full_name") 

3309 .limit(5) 

3310 ], 

3311 [73, 72, 71, 70, 65], 

3312 ) 

3313 

3314 def test_long_query_names(self) -> None: 

3315 """Test that queries involving very long names are handled correctly. 

3316 

3317 This is especially important for PostgreSQL, which truncates symbols 

3318 longer than 64 chars, but it's worth testing for all DBs. 

3319 """ 

3320 registry = self.makeRegistry() 

3321 name = "abcd" * 17 

3322 registry.registerDatasetType( 

3323 DatasetType( 

3324 name, 

3325 dimensions=(), 

3326 storageClass="Exposure", 

3327 universe=registry.dimensions, 

3328 ) 

3329 ) 

3330 # Need to search more than one collection actually containing a 

3331 # matching dataset to avoid optimizations that sidestep bugs due to 

3332 # truncation by making findFirst=True a no-op. 

3333 run1 = "run1" 

3334 registry.registerRun(run1) 

3335 run2 = "run2" 

3336 registry.registerRun(run2) 

3337 (ref1,) = registry.insertDatasets(name, [DataCoordinate.makeEmpty(registry.dimensions)], run1) 

3338 registry.insertDatasets(name, [DataCoordinate.makeEmpty(registry.dimensions)], run2) 

3339 self.assertEqual( 

3340 set(registry.queryDatasets(name, collections=[run1, run2], findFirst=True)), 

3341 {ref1}, 

3342 ) 

3343 

3344 def test_skypix_constraint_queries(self) -> None: 

3345 """Test queries spatially constrained by a skypix data ID.""" 

3346 registry = self.makeRegistry() 

3347 self.loadData(registry, "hsc-rc2-subset.yaml") 

3348 patch_regions = { 

3349 (data_id["tract"], data_id["patch"]): data_id.region 

3350 for data_id in registry.queryDataIds(["patch"]).expanded() 

3351 } 

3352 skypix_dimension: SkyPixDimension = registry.dimensions["htm11"] 

3353 # This check ensures the test doesn't become trivial due to a config 

3354 # change; if it does, just pick a different HTML level. 

3355 self.assertNotEqual(skypix_dimension, registry.dimensions.commonSkyPix) 

3356 # Gather all skypix IDs that definitely overlap at least one of these 

3357 # patches. 

3358 relevant_skypix_ids = lsst.sphgeom.RangeSet() 

3359 for patch_region in patch_regions.values(): 

3360 relevant_skypix_ids |= skypix_dimension.pixelization.interior(patch_region) 

3361 # Look for a "nontrivial" skypix_id that overlaps at least one patch 

3362 # and does not overlap at least one other patch. 

3363 for skypix_id in itertools.chain.from_iterable( 

3364 range(begin, end) for begin, end in relevant_skypix_ids 

3365 ): 

3366 skypix_region = skypix_dimension.pixelization.pixel(skypix_id) 

3367 overlapping_patches = { 

3368 patch_key 

3369 for patch_key, patch_region in patch_regions.items() 

3370 if not patch_region.isDisjointFrom(skypix_region) 

3371 } 

3372 if overlapping_patches and overlapping_patches != patch_regions.keys(): 

3373 break 

3374 else: 

3375 raise RuntimeError("Could not find usable skypix ID for this dimension configuration.") 

3376 self.assertEqual( 

3377 { 

3378 (data_id["tract"], data_id["patch"]) 

3379 for data_id in registry.queryDataIds( 

3380 ["patch"], 

3381 dataId={skypix_dimension.name: skypix_id}, 

3382 ) 

3383 }, 

3384 overlapping_patches, 

3385 ) 

3386 # Test that a three-way join that includes the common skypix system in 

3387 # the dimensions doesn't generate redundant join terms in the query. 

3388 full_data_ids = set( 

3389 registry.queryDataIds( 

3390 ["tract", "visit", "htm7"], skymap="hsc_rings_v1", instrument="HSC" 

3391 ).expanded() 

3392 ) 

3393 self.assertGreater(len(full_data_ids), 0) 

3394 for data_id in full_data_ids: 

3395 self.assertFalse(data_id.records["tract"].region.isDisjointFrom(data_id.records["htm7"].region)) 

3396 self.assertFalse(data_id.records["visit"].region.isDisjointFrom(data_id.records["htm7"].region)) 

3397 

3398 def test_spatial_constraint_queries(self) -> None: 

3399 """Test queries in which one spatial dimension in the constraint (data 

3400 ID or ``where`` string) constrains a different spatial dimension in the 

3401 query result columns. 

3402 """ 

3403 registry = self.makeRegistry() 

3404 self.loadData(registry, "hsc-rc2-subset.yaml") 

3405 patch_regions = { 

3406 (data_id["tract"], data_id["patch"]): data_id.region 

3407 for data_id in registry.queryDataIds(["patch"]).expanded() 

3408 } 

3409 observation_regions = { 

3410 (data_id["visit"], data_id["detector"]): data_id.region 

3411 for data_id in registry.queryDataIds(["visit", "detector"]).expanded() 

3412 } 

3413 all_combos = { 

3414 (patch_key, observation_key) 

3415 for patch_key, observation_key in itertools.product(patch_regions, observation_regions) 

3416 } 

3417 overlapping_combos = { 

3418 (patch_key, observation_key) 

3419 for patch_key, observation_key in all_combos 

3420 if not patch_regions[patch_key].isDisjointFrom(observation_regions[observation_key]) 

3421 } 

3422 # Check a direct spatial join with no constraint first. 

3423 self.assertEqual( 

3424 { 

3425 ((data_id["tract"], data_id["patch"]), (data_id["visit"], data_id["detector"])) 

3426 for data_id in registry.queryDataIds(["patch", "visit", "detector"]) 

3427 }, 

3428 overlapping_combos, 

3429 ) 

3430 overlaps_by_patch: defaultdict[tuple[int, int], set[tuple[str, str]]] = defaultdict(set) 

3431 overlaps_by_observation: defaultdict[tuple[int, int], set[tuple[str, str]]] = defaultdict(set) 

3432 for patch_key, observation_key in overlapping_combos: 

3433 overlaps_by_patch[patch_key].add(observation_key) 

3434 overlaps_by_observation[observation_key].add(patch_key) 

3435 # Find patches and observations that overlap at least one of the other 

3436 # but not all of the other. 

3437 nontrivial_patch = next( 

3438 iter( 

3439 patch_key 

3440 for patch_key, observation_keys in overlaps_by_patch.items() 

3441 if observation_keys and observation_keys != observation_regions.keys() 

3442 ) 

3443 ) 

3444 nontrivial_observation = next( 

3445 iter( 

3446 observation_key 

3447 for observation_key, patch_keys in overlaps_by_observation.items() 

3448 if patch_keys and patch_keys != patch_regions.keys() 

3449 ) 

3450 ) 

3451 # Use the nontrivial patches and observations as constraints on the 

3452 # other dimensions in various ways, first via a 'where' expression. 

3453 # It's better in general to us 'bind' instead of f-strings, but these 

3454 # all integers so there are no quoting concerns. 

3455 self.assertEqual( 

3456 { 

3457 (data_id["visit"], data_id["detector"]) 

3458 for data_id in registry.queryDataIds( 

3459 ["visit", "detector"], 

3460 where=f"tract={nontrivial_patch[0]} AND patch={nontrivial_patch[1]}", 

3461 skymap="hsc_rings_v1", 

3462 ) 

3463 }, 

3464 overlaps_by_patch[nontrivial_patch], 

3465 ) 

3466 self.assertEqual( 

3467 { 

3468 (data_id["tract"], data_id["patch"]) 

3469 for data_id in registry.queryDataIds( 

3470 ["patch"], 

3471 where=f"visit={nontrivial_observation[0]} AND detector={nontrivial_observation[1]}", 

3472 instrument="HSC", 

3473 ) 

3474 }, 

3475 overlaps_by_observation[nontrivial_observation], 

3476 ) 

3477 # and then via the dataId argument. 

3478 self.assertEqual( 

3479 { 

3480 (data_id["visit"], data_id["detector"]) 

3481 for data_id in registry.queryDataIds( 

3482 ["visit", "detector"], 

3483 dataId={ 

3484 "tract": nontrivial_patch[0], 

3485 "patch": nontrivial_patch[1], 

3486 }, 

3487 skymap="hsc_rings_v1", 

3488 ) 

3489 }, 

3490 overlaps_by_patch[nontrivial_patch], 

3491 ) 

3492 self.assertEqual( 

3493 { 

3494 (data_id["tract"], data_id["patch"]) 

3495 for data_id in registry.queryDataIds( 

3496 ["patch"], 

3497 dataId={ 

3498 "visit": nontrivial_observation[0], 

3499 "detector": nontrivial_observation[1], 

3500 }, 

3501 instrument="HSC", 

3502 ) 

3503 }, 

3504 overlaps_by_observation[nontrivial_observation], 

3505 ) 

3506 

3507 def test_query_projection_drop_postprocessing(self) -> None: 

3508 """Test that projections and deduplications on query objects can 

3509 drop post-query region filtering to ensure the query remains in 

3510 the SQL engine. 

3511 """ 

3512 registry = self.makeRegistry() 

3513 self.loadData(registry, "base.yaml") 

3514 self.loadData(registry, "spatial.yaml") 

3515 

3516 def pop_transfer(tree: Relation) -> Relation: 

3517 """If a relation tree terminates with a transfer to a new engine, 

3518 return the relation prior to that transfer. If not, return the 

3519 original relation. 

3520 """ 

3521 match tree: 

3522 case Transfer(target=target): 

3523 return target 

3524 case _: 

3525 return tree 

3526 

3527 # There's no public way to get a Query object yet, so we get one from a 

3528 # DataCoordinateQueryResults private attribute. When a public API is 

3529 # available this test should use it. 

3530 query = registry.queryDataIds(["visit", "detector", "tract", "patch"])._query 

3531 # We expect this query to terminate in the iteration engine originally, 

3532 # because region-filtering is necessary. 

3533 self.assertIsInstance(pop_transfer(query.relation).engine, iteration.Engine) 

3534 # If we deduplicate, we usually have to do that downstream of the 

3535 # filtering. That means the deduplication has to happen in the 

3536 # iteration engine. 

3537 self.assertIsInstance(pop_transfer(query.projected(unique=True).relation).engine, iteration.Engine) 

3538 # If we pass drop_postprocessing, we instead drop the region filtering 

3539 # so the deduplication can happen in SQL (though there might still be 

3540 # transfer to iteration at the tail of the tree that we can ignore; 

3541 # that's what the pop_transfer takes care of here). 

3542 self.assertIsInstance( 

3543 pop_transfer(query.projected(unique=True, drop_postprocessing=True).relation).engine, 

3544 sql.Engine, 

3545 ) 

3546 

3547 def test_query_find_datasets_drop_postprocessing(self) -> None: 

3548 """Test that DataCoordinateQueryResults.findDatasets avoids commutator 

3549 problems with the FindFirstDataset relation operation. 

3550 """ 

3551 # Setup: load some visit, tract, and patch records, and insert two 

3552 # datasets with dimensions {visit, patch}, with one in each of two 

3553 # RUN collections. 

3554 registry = self.makeRegistry() 

3555 self.loadData(registry, "base.yaml") 

3556 self.loadData(registry, "spatial.yaml") 

3557 storage_class = StorageClass("Warpy") 

3558 registry.storageClasses.registerStorageClass(storage_class) 

3559 dataset_type = DatasetType( 

3560 "warp", {"visit", "patch"}, storageClass=storage_class, universe=registry.dimensions 

3561 ) 

3562 registry.registerDatasetType(dataset_type) 

3563 (data_id,) = registry.queryDataIds(["visit", "patch"]).limit(1) 

3564 registry.registerRun("run1") 

3565 registry.registerRun("run2") 

3566 (ref1,) = registry.insertDatasets(dataset_type, [data_id], run="run1") 

3567 (ref2,) = registry.insertDatasets(dataset_type, [data_id], run="run2") 

3568 # Query for the dataset using queryDataIds(...).findDatasets(...) 

3569 # against only one of the two collections. This should work even 

3570 # though the relation returned by queryDataIds ends with 

3571 # iteration-engine region-filtering, because we can recognize before 

3572 # running the query that there is only one collecton to search and 

3573 # hence the (default) findFirst=True is irrelevant, and joining in the 

3574 # dataset query commutes past the iteration-engine postprocessing. 

3575 query1 = registry.queryDataIds( 

3576 {"visit", "patch"}, visit=data_id["visit"], instrument=data_id["instrument"] 

3577 ) 

3578 self.assertEqual( 

3579 set(query1.findDatasets(dataset_type.name, collections=["run1"])), 

3580 {ref1}, 

3581 ) 

3582 # Query for the dataset using queryDataIds(...).findDatasets(...) 

3583 # against both collections. This can only work if the FindFirstDataset 

3584 # operation can be commuted past the iteration-engine options into SQL. 

3585 query2 = registry.queryDataIds( 

3586 {"visit", "patch"}, visit=data_id["visit"], instrument=data_id["instrument"] 

3587 ) 

3588 self.assertEqual( 

3589 set(query2.findDatasets(dataset_type.name, collections=["run2", "run1"])), 

3590 {ref2}, 

3591 ) 

3592 

3593 def test_query_empty_collections(self) -> None: 

3594 """Test for registry query methods with empty collections. The methods 

3595 should return empty result set (or None when applicable) and provide 

3596 "doomed" diagnostics. 

3597 """ 

3598 registry = self.makeRegistry() 

3599 self.loadData(registry, "base.yaml") 

3600 self.loadData(registry, "datasets.yaml") 

3601 

3602 # Tests for registry.findDataset() 

3603 with self.assertRaises(NoDefaultCollectionError): 

3604 registry.findDataset("bias", instrument="Cam1", detector=1) 

3605 self.assertIsNotNone(registry.findDataset("bias", instrument="Cam1", detector=1, collections=...)) 

3606 self.assertIsNone(registry.findDataset("bias", instrument="Cam1", detector=1, collections=[])) 

3607 

3608 # Tests for registry.queryDatasets() 

3609 with self.assertRaises(NoDefaultCollectionError): 

3610 registry.queryDatasets("bias") 

3611 self.assertTrue(list(registry.queryDatasets("bias", collections=...))) 

3612 

3613 result = registry.queryDatasets("bias", collections=[]) 

3614 self.assertEqual(len(list(result)), 0) 

3615 messages = list(result.explain_no_results()) 

3616 self.assertTrue(messages) 

3617 self.assertTrue(any("because collection list is empty" in message for message in messages)) 

3618 

3619 # Tests for registry.queryDataIds() 

3620 with self.assertRaises(NoDefaultCollectionError): 

3621 registry.queryDataIds("detector", datasets="bias") 

3622 self.assertTrue(list(registry.queryDataIds("detector", datasets="bias", collections=...))) 

3623 

3624 result = registry.queryDataIds("detector", datasets="bias", collections=[]) 

3625 self.assertEqual(len(list(result)), 0) 

3626 messages = list(result.explain_no_results()) 

3627 self.assertTrue(messages) 

3628 self.assertTrue(any("because collection list is empty" in message for message in messages)) 

3629 

3630 # Tests for registry.queryDimensionRecords() 

3631 with self.assertRaises(NoDefaultCollectionError): 

3632 registry.queryDimensionRecords("detector", datasets="bias") 

3633 self.assertTrue(list(registry.queryDimensionRecords("detector", datasets="bias", collections=...))) 

3634 

3635 result = registry.queryDimensionRecords("detector", datasets="bias", collections=[]) 

3636 self.assertEqual(len(list(result)), 0) 

3637 messages = list(result.explain_no_results()) 

3638 self.assertTrue(messages) 

3639 self.assertTrue(any("because collection list is empty" in message for message in messages)) 

3640 

3641 def test_dataset_followup_spatial_joins(self) -> None: 

3642 """Test queryDataIds(...).findRelatedDatasets(...) where a spatial join 

3643 is involved. 

3644 """ 

3645 registry = self.makeRegistry() 

3646 self.loadData(registry, "base.yaml") 

3647 self.loadData(registry, "spatial.yaml") 

3648 pvi_dataset_type = DatasetType( 

3649 "pvi", {"visit", "detector"}, storageClass="StructuredDataDict", universe=registry.dimensions 

3650 ) 

3651 registry.registerDatasetType(pvi_dataset_type) 

3652 collection = "datasets" 

3653 registry.registerRun(collection) 

3654 (pvi1,) = registry.insertDatasets( 

3655 pvi_dataset_type, [{"instrument": "Cam1", "visit": 1, "detector": 1}], run=collection 

3656 ) 

3657 (pvi2,) = registry.insertDatasets( 

3658 pvi_dataset_type, [{"instrument": "Cam1", "visit": 1, "detector": 2}], run=collection 

3659 ) 

3660 (pvi3,) = registry.insertDatasets( 

3661 pvi_dataset_type, [{"instrument": "Cam1", "visit": 1, "detector": 3}], run=collection 

3662 ) 

3663 self.assertEqual( 

3664 set( 

3665 registry.queryDataIds(["patch"], skymap="SkyMap1", tract=0) 

3666 .expanded() 

3667 .findRelatedDatasets("pvi", [collection]) 

3668 ), 

3669 { 

3670 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=0), pvi1), 

3671 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=0), pvi2), 

3672 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=1), pvi2), 

3673 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=2), pvi1), 

3674 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=2), pvi2), 

3675 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=2), pvi3), 

3676 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=3), pvi2), 

3677 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=4), pvi3), 

3678 }, 

3679 )