Coverage for python/lsst/daf/butler/registry/tests/_registry.py: 5%

1513 statements  

« prev     ^ index     » next       coverage.py v7.3.1, created at 2023-10-02 08:00 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27from __future__ import annotations 

28 

29__all__ = ["RegistryTests"] 

30 

31import itertools 

32import logging 

33import os 

34import re 

35import unittest 

36import uuid 

37from abc import ABC, abstractmethod 

38from collections import defaultdict, namedtuple 

39from collections.abc import Iterator 

40from datetime import datetime, timedelta 

41from typing import TYPE_CHECKING 

42 

43import astropy.time 

44import sqlalchemy 

45 

46try: 

47 import numpy as np 

48except ImportError: 

49 np = None 

50 

51import lsst.sphgeom 

52from lsst.daf.relation import Relation, RelationalAlgebraError, Transfer, iteration, sql 

53 

54from ...core import ( 

55 DataCoordinate, 

56 DataCoordinateSet, 

57 DatasetAssociation, 

58 DatasetIdFactory, 

59 DatasetIdGenEnum, 

60 DatasetRef, 

61 DatasetType, 

62 DimensionGraph, 

63 NamedValueSet, 

64 SkyPixDimension, 

65 StorageClass, 

66 Timespan, 

67 ddl, 

68) 

69from .._collection_summary import CollectionSummary 

70from .._collectionType import CollectionType 

71from .._config import RegistryConfig 

72from .._exceptions import ( 

73 ArgumentError, 

74 CollectionError, 

75 CollectionTypeError, 

76 ConflictingDefinitionError, 

77 DataIdValueError, 

78 DatasetTypeError, 

79 InconsistentDataIdError, 

80 MissingCollectionError, 

81 MissingDatasetTypeError, 

82 NoDefaultCollectionError, 

83 OrphanedRecordError, 

84) 

85from ..interfaces import ButlerAttributeExistsError 

86 

87if TYPE_CHECKING: 

88 from .._registry import Registry 

89 

90 

91class RegistryTests(ABC): 

92 """Generic tests for the `Registry` class that can be subclassed to 

93 generate tests for different configurations. 

94 """ 

95 

96 collectionsManager: str | None = None 

97 """Name of the collections manager class, if subclass provides value for 

98 this member then it overrides name specified in default configuration 

99 (`str`). 

100 """ 

101 

102 datasetsManager: str | dict[str, str] | None = None 

103 """Name or configuration dictionary of the datasets manager class, if 

104 subclass provides value for this member then it overrides name specified 

105 in default configuration (`str` or `dict`). 

106 """ 

107 

108 @classmethod 

109 @abstractmethod 

110 def getDataDir(cls) -> str: 

111 """Return the root directory containing test data YAML files.""" 

112 raise NotImplementedError() 

113 

114 def makeRegistryConfig(self) -> RegistryConfig: 

115 """Create RegistryConfig used to create a registry. 

116 

117 This method should be called by a subclass from `makeRegistry`. 

118 Returned instance will be pre-configured based on the values of class 

119 members, and default-configured for all other parameters. Subclasses 

120 that need default configuration should just instantiate 

121 `RegistryConfig` directly. 

122 """ 

123 config = RegistryConfig() 

124 if self.collectionsManager: 

125 config["managers", "collections"] = self.collectionsManager 

126 if self.datasetsManager: 

127 config["managers", "datasets"] = self.datasetsManager 

128 return config 

129 

130 @abstractmethod 

131 def makeRegistry(self, share_repo_with: Registry | None = None) -> Registry | None: 

132 """Return the Registry instance to be tested. 

133 

134 Parameters 

135 ---------- 

136 share_repo_with : `Registry`, optional 

137 If provided, the new registry should point to the same data 

138 repository as this existing registry. 

139 

140 Returns 

141 ------- 

142 registry : `Registry` 

143 New `Registry` instance, or `None` *only* if `share_repo_with` is 

144 not `None` and this test case does not support that argument 

145 (e.g. it is impossible with in-memory SQLite DBs). 

146 """ 

147 raise NotImplementedError() 

148 

149 def loadData(self, registry: Registry, filename: str): 

150 """Load registry test data from ``getDataDir/<filename>``, 

151 which should be a YAML import/export file. 

152 """ 

153 from ...transfers import YamlRepoImportBackend 

154 

155 with open(os.path.join(self.getDataDir(), filename)) as stream: 

156 backend = YamlRepoImportBackend(stream, registry) 

157 backend.register() 

158 backend.load(datastore=None) 

159 

160 def checkQueryResults(self, results, expected): 

161 """Check that a query results object contains expected values. 

162 

163 Parameters 

164 ---------- 

165 results : `DataCoordinateQueryResults` or `DatasetQueryResults` 

166 A lazy-evaluation query results object. 

167 expected : `list` 

168 A list of `DataCoordinate` o `DatasetRef` objects that should be 

169 equal to results of the query, aside from ordering. 

170 """ 

171 self.assertCountEqual(list(results), expected) 

172 self.assertEqual(results.count(), len(expected)) 

173 if expected: 

174 self.assertTrue(results.any()) 

175 else: 

176 self.assertFalse(results.any()) 

177 

178 def testOpaque(self): 

179 """Tests for `Registry.registerOpaqueTable`, 

180 `Registry.insertOpaqueData`, `Registry.fetchOpaqueData`, and 

181 `Registry.deleteOpaqueData`. 

182 """ 

183 registry = self.makeRegistry() 

184 table = "opaque_table_for_testing" 

185 registry.registerOpaqueTable( 

186 table, 

187 spec=ddl.TableSpec( 

188 fields=[ 

189 ddl.FieldSpec("id", dtype=sqlalchemy.BigInteger, primaryKey=True), 

190 ddl.FieldSpec("name", dtype=sqlalchemy.String, length=16, nullable=False), 

191 ddl.FieldSpec("count", dtype=sqlalchemy.SmallInteger, nullable=True), 

192 ], 

193 ), 

194 ) 

195 rows = [ 

196 {"id": 1, "name": "one", "count": None}, 

197 {"id": 2, "name": "two", "count": 5}, 

198 {"id": 3, "name": "three", "count": 6}, 

199 ] 

200 registry.insertOpaqueData(table, *rows) 

201 self.assertCountEqual(rows, list(registry.fetchOpaqueData(table))) 

202 self.assertEqual(rows[0:1], list(registry.fetchOpaqueData(table, id=1))) 

203 self.assertEqual(rows[1:2], list(registry.fetchOpaqueData(table, name="two"))) 

204 self.assertEqual(rows[0:1], list(registry.fetchOpaqueData(table, id=(1, 3), name=("one", "two")))) 

205 self.assertEqual(rows, list(registry.fetchOpaqueData(table, id=(1, 2, 3)))) 

206 # Test very long IN clause which exceeds sqlite limit on number of 

207 # parameters. SQLite says the limit is 32k but it looks like it is 

208 # much higher. 

209 self.assertEqual(rows, list(registry.fetchOpaqueData(table, id=list(range(300_000))))) 

210 # Two IN clauses, each longer than 1k batch size, first with 

211 # duplicates, second has matching elements in different batches (after 

212 # sorting). 

213 self.assertEqual( 

214 rows[0:2], 

215 list( 

216 registry.fetchOpaqueData( 

217 table, 

218 id=list(range(1000)) + list(range(100, 0, -1)), 

219 name=["one"] + [f"q{i}" for i in range(2200)] + ["two"], 

220 ) 

221 ), 

222 ) 

223 self.assertEqual([], list(registry.fetchOpaqueData(table, id=1, name="two"))) 

224 registry.deleteOpaqueData(table, id=3) 

225 self.assertCountEqual(rows[:2], list(registry.fetchOpaqueData(table))) 

226 registry.deleteOpaqueData(table) 

227 self.assertEqual([], list(registry.fetchOpaqueData(table))) 

228 

229 def testDatasetType(self): 

230 """Tests for `Registry.registerDatasetType` and 

231 `Registry.getDatasetType`. 

232 """ 

233 registry = self.makeRegistry() 

234 # Check valid insert 

235 datasetTypeName = "test" 

236 storageClass = StorageClass("testDatasetType") 

237 registry.storageClasses.registerStorageClass(storageClass) 

238 dimensions = registry.dimensions.extract(("instrument", "visit")) 

239 differentDimensions = registry.dimensions.extract(("instrument", "patch")) 

240 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

241 # Inserting for the first time should return True 

242 self.assertTrue(registry.registerDatasetType(inDatasetType)) 

243 outDatasetType1 = registry.getDatasetType(datasetTypeName) 

244 self.assertEqual(outDatasetType1, inDatasetType) 

245 

246 # Re-inserting should work 

247 self.assertFalse(registry.registerDatasetType(inDatasetType)) 

248 # Except when they are not identical 

249 with self.assertRaises(ConflictingDefinitionError): 

250 nonIdenticalDatasetType = DatasetType(datasetTypeName, differentDimensions, storageClass) 

251 registry.registerDatasetType(nonIdenticalDatasetType) 

252 

253 # Template can be None 

254 datasetTypeName = "testNoneTemplate" 

255 storageClass = StorageClass("testDatasetType2") 

256 registry.storageClasses.registerStorageClass(storageClass) 

257 dimensions = registry.dimensions.extract(("instrument", "visit")) 

258 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

259 registry.registerDatasetType(inDatasetType) 

260 outDatasetType2 = registry.getDatasetType(datasetTypeName) 

261 self.assertEqual(outDatasetType2, inDatasetType) 

262 

263 allTypes = set(registry.queryDatasetTypes()) 

264 self.assertEqual(allTypes, {outDatasetType1, outDatasetType2}) 

265 

266 def testDimensions(self): 

267 """Tests for `Registry.insertDimensionData`, 

268 `Registry.syncDimensionData`, and `Registry.expandDataId`. 

269 """ 

270 registry = self.makeRegistry() 

271 dimensionName = "instrument" 

272 dimension = registry.dimensions[dimensionName] 

273 dimensionValue = { 

274 "name": "DummyCam", 

275 "visit_max": 10, 

276 "visit_system": 0, 

277 "exposure_max": 10, 

278 "detector_max": 2, 

279 "class_name": "lsst.pipe.base.Instrument", 

280 } 

281 registry.insertDimensionData(dimensionName, dimensionValue) 

282 # Inserting the same value twice should fail 

283 with self.assertRaises(sqlalchemy.exc.IntegrityError): 

284 registry.insertDimensionData(dimensionName, dimensionValue) 

285 # expandDataId should retrieve the record we just inserted 

286 self.assertEqual( 

287 registry.expandDataId(instrument="DummyCam", graph=dimension.graph) 

288 .records[dimensionName] 

289 .toDict(), 

290 dimensionValue, 

291 ) 

292 # expandDataId should raise if there is no record with the given ID. 

293 with self.assertRaises(DataIdValueError): 

294 registry.expandDataId({"instrument": "Unknown"}, graph=dimension.graph) 

295 # band doesn't have a table; insert should fail. 

296 with self.assertRaises(TypeError): 

297 registry.insertDimensionData("band", {"band": "i"}) 

298 dimensionName2 = "physical_filter" 

299 dimension2 = registry.dimensions[dimensionName2] 

300 dimensionValue2 = {"name": "DummyCam_i", "band": "i"} 

301 # Missing required dependency ("instrument") should fail 

302 with self.assertRaises(KeyError): 

303 registry.insertDimensionData(dimensionName2, dimensionValue2) 

304 # Adding required dependency should fix the failure 

305 dimensionValue2["instrument"] = "DummyCam" 

306 registry.insertDimensionData(dimensionName2, dimensionValue2) 

307 # expandDataId should retrieve the record we just inserted. 

308 self.assertEqual( 

309 registry.expandDataId(instrument="DummyCam", physical_filter="DummyCam_i", graph=dimension2.graph) 

310 .records[dimensionName2] 

311 .toDict(), 

312 dimensionValue2, 

313 ) 

314 # Use syncDimensionData to insert a new record successfully. 

315 dimensionName3 = "detector" 

316 dimensionValue3 = { 

317 "instrument": "DummyCam", 

318 "id": 1, 

319 "full_name": "one", 

320 "name_in_raft": "zero", 

321 "purpose": "SCIENCE", 

322 } 

323 self.assertTrue(registry.syncDimensionData(dimensionName3, dimensionValue3)) 

324 # Sync that again. Note that one field ("raft") is NULL, and that 

325 # should be okay. 

326 self.assertFalse(registry.syncDimensionData(dimensionName3, dimensionValue3)) 

327 # Now try that sync with the same primary key but a different value. 

328 # This should fail. 

329 with self.assertRaises(ConflictingDefinitionError): 

330 registry.syncDimensionData( 

331 dimensionName3, 

332 { 

333 "instrument": "DummyCam", 

334 "id": 1, 

335 "full_name": "one", 

336 "name_in_raft": "four", 

337 "purpose": "SCIENCE", 

338 }, 

339 ) 

340 

341 @unittest.skipIf(np is None, "numpy not available.") 

342 def testNumpyDataId(self): 

343 """Test that we can use a numpy int in a dataId.""" 

344 registry = self.makeRegistry() 

345 dimensionEntries = [ 

346 ("instrument", {"instrument": "DummyCam"}), 

347 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}), 

348 # Using an np.int64 here fails unless Records.fromDict is also 

349 # patched to look for numbers.Integral 

350 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}), 

351 ] 

352 for args in dimensionEntries: 

353 registry.insertDimensionData(*args) 

354 

355 # Try a normal integer and something that looks like an int but 

356 # is not. 

357 for visit_id in (42, np.int64(42)): 

358 with self.subTest(visit_id=visit_id, id_type=type(visit_id).__name__): 

359 expanded = registry.expandDataId({"instrument": "DummyCam", "visit": visit_id}) 

360 self.assertEqual(expanded["visit"], int(visit_id)) 

361 self.assertIsInstance(expanded["visit"], int) 

362 

363 def testDataIdRelationships(self): 

364 """Test that `Registry.expandDataId` raises an exception when the given 

365 keys are inconsistent. 

366 """ 

367 registry = self.makeRegistry() 

368 self.loadData(registry, "base.yaml") 

369 # Insert a few more dimension records for the next test. 

370 registry.insertDimensionData( 

371 "exposure", 

372 {"instrument": "Cam1", "id": 1, "obs_id": "one", "physical_filter": "Cam1-G"}, 

373 ) 

374 registry.insertDimensionData( 

375 "exposure", 

376 {"instrument": "Cam1", "id": 2, "obs_id": "two", "physical_filter": "Cam1-G"}, 

377 ) 

378 registry.insertDimensionData( 

379 "visit_system", 

380 {"instrument": "Cam1", "id": 0, "name": "one-to-one"}, 

381 ) 

382 registry.insertDimensionData( 

383 "visit", 

384 {"instrument": "Cam1", "id": 1, "name": "one", "physical_filter": "Cam1-G", "visit_system": 0}, 

385 ) 

386 registry.insertDimensionData( 

387 "visit_definition", 

388 {"instrument": "Cam1", "visit": 1, "exposure": 1, "visit_system": 0}, 

389 ) 

390 with self.assertRaises(InconsistentDataIdError): 

391 registry.expandDataId( 

392 {"instrument": "Cam1", "visit": 1, "exposure": 2}, 

393 ) 

394 

395 def testDataset(self): 

396 """Basic tests for `Registry.insertDatasets`, `Registry.getDataset`, 

397 and `Registry.removeDatasets`. 

398 """ 

399 registry = self.makeRegistry() 

400 self.loadData(registry, "base.yaml") 

401 run = "tésτ" 

402 registry.registerRun(run) 

403 datasetType = registry.getDatasetType("bias") 

404 dataId = {"instrument": "Cam1", "detector": 2} 

405 (ref,) = registry.insertDatasets(datasetType, dataIds=[dataId], run=run) 

406 outRef = registry.getDataset(ref.id) 

407 self.assertIsNotNone(ref.id) 

408 self.assertEqual(ref, outRef) 

409 with self.assertRaises(ConflictingDefinitionError): 

410 registry.insertDatasets(datasetType, dataIds=[dataId], run=run) 

411 registry.removeDatasets([ref]) 

412 self.assertIsNone(registry.findDataset(datasetType, dataId, collections=[run])) 

413 

414 def testFindDataset(self): 

415 """Tests for `Registry.findDataset`.""" 

416 registry = self.makeRegistry() 

417 self.loadData(registry, "base.yaml") 

418 run = "tésτ" 

419 datasetType = registry.getDatasetType("bias") 

420 dataId = {"instrument": "Cam1", "detector": 4} 

421 registry.registerRun(run) 

422 (inputRef,) = registry.insertDatasets(datasetType, dataIds=[dataId], run=run) 

423 outputRef = registry.findDataset(datasetType, dataId, collections=[run]) 

424 self.assertEqual(outputRef, inputRef) 

425 # Check that retrieval with invalid dataId raises 

426 with self.assertRaises(LookupError): 

427 dataId = {"instrument": "Cam1"} # no detector 

428 registry.findDataset(datasetType, dataId, collections=run) 

429 # Check that different dataIds match to different datasets 

430 dataId1 = {"instrument": "Cam1", "detector": 1} 

431 (inputRef1,) = registry.insertDatasets(datasetType, dataIds=[dataId1], run=run) 

432 dataId2 = {"instrument": "Cam1", "detector": 2} 

433 (inputRef2,) = registry.insertDatasets(datasetType, dataIds=[dataId2], run=run) 

434 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef1) 

435 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef2) 

436 self.assertNotEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef2) 

437 self.assertNotEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef1) 

438 # Check that requesting a non-existing dataId returns None 

439 nonExistingDataId = {"instrument": "Cam1", "detector": 3} 

440 self.assertIsNone(registry.findDataset(datasetType, nonExistingDataId, collections=run)) 

441 # Search more than one collection, in which two have the right 

442 # dataset type and another does not. 

443 registry.registerRun("empty") 

444 self.loadData(registry, "datasets-uuid.yaml") 

445 bias1 = registry.findDataset("bias", instrument="Cam1", detector=2, collections=["imported_g"]) 

446 self.assertIsNotNone(bias1) 

447 bias2 = registry.findDataset("bias", instrument="Cam1", detector=2, collections=["imported_r"]) 

448 self.assertIsNotNone(bias2) 

449 self.assertEqual( 

450 bias1, 

451 registry.findDataset( 

452 "bias", instrument="Cam1", detector=2, collections=["empty", "imported_g", "imported_r"] 

453 ), 

454 ) 

455 self.assertEqual( 

456 bias2, 

457 registry.findDataset( 

458 "bias", instrument="Cam1", detector=2, collections=["empty", "imported_r", "imported_g"] 

459 ), 

460 ) 

461 # Search more than one collection, with one of them a CALIBRATION 

462 # collection. 

463 registry.registerCollection("Cam1/calib", CollectionType.CALIBRATION) 

464 timespan = Timespan( 

465 begin=astropy.time.Time("2020-01-01T01:00:00", format="isot", scale="tai"), 

466 end=astropy.time.Time("2020-01-01T02:00:00", format="isot", scale="tai"), 

467 ) 

468 registry.certify("Cam1/calib", [bias2], timespan=timespan) 

469 self.assertEqual( 

470 bias1, 

471 registry.findDataset( 

472 "bias", 

473 instrument="Cam1", 

474 detector=2, 

475 collections=["empty", "imported_g", "Cam1/calib"], 

476 timespan=timespan, 

477 ), 

478 ) 

479 self.assertEqual( 

480 bias2, 

481 registry.findDataset( 

482 "bias", 

483 instrument="Cam1", 

484 detector=2, 

485 collections=["empty", "Cam1/calib", "imported_g"], 

486 timespan=timespan, 

487 ), 

488 ) 

489 # If we try to search those same collections without a timespan, it 

490 # should still work, since the CALIBRATION collection is ignored. 

491 self.assertEqual( 

492 bias1, 

493 registry.findDataset( 

494 "bias", instrument="Cam1", detector=2, collections=["empty", "imported_g", "Cam1/calib"] 

495 ), 

496 ) 

497 self.assertEqual( 

498 bias1, 

499 registry.findDataset( 

500 "bias", instrument="Cam1", detector=2, collections=["empty", "Cam1/calib", "imported_g"] 

501 ), 

502 ) 

503 

504 def testRemoveDatasetTypeSuccess(self): 

505 """Test that Registry.removeDatasetType works when there are no 

506 datasets of that type present. 

507 """ 

508 registry = self.makeRegistry() 

509 self.loadData(registry, "base.yaml") 

510 registry.removeDatasetType("flat") 

511 with self.assertRaises(MissingDatasetTypeError): 

512 registry.getDatasetType("flat") 

513 

514 def testRemoveDatasetTypeFailure(self): 

515 """Test that Registry.removeDatasetType raises when there are datasets 

516 of that type present or if the dataset type is for a component. 

517 """ 

518 registry = self.makeRegistry() 

519 self.loadData(registry, "base.yaml") 

520 self.loadData(registry, "datasets.yaml") 

521 with self.assertRaises(OrphanedRecordError): 

522 registry.removeDatasetType("flat") 

523 with self.assertRaises(ValueError): 

524 registry.removeDatasetType(DatasetType.nameWithComponent("flat", "image")) 

525 

526 def testImportDatasetsUUID(self): 

527 """Test for `Registry._importDatasets` with UUID dataset ID.""" 

528 if isinstance(self.datasetsManager, str): 

529 if not self.datasetsManager.endswith(".ByDimensionsDatasetRecordStorageManagerUUID"): 

530 self.skipTest(f"Unexpected dataset manager {self.datasetsManager}") 

531 elif isinstance(self.datasetsManager, dict) and not self.datasetsManager["cls"].endswith( 

532 ".ByDimensionsDatasetRecordStorageManagerUUID" 

533 ): 

534 self.skipTest(f"Unexpected dataset manager {self.datasetsManager['cls']}") 

535 

536 registry = self.makeRegistry() 

537 self.loadData(registry, "base.yaml") 

538 for run in range(6): 

539 registry.registerRun(f"run{run}") 

540 datasetTypeBias = registry.getDatasetType("bias") 

541 datasetTypeFlat = registry.getDatasetType("flat") 

542 dataIdBias1 = {"instrument": "Cam1", "detector": 1} 

543 dataIdBias2 = {"instrument": "Cam1", "detector": 2} 

544 dataIdFlat1 = {"instrument": "Cam1", "detector": 1, "physical_filter": "Cam1-G", "band": "g"} 

545 

546 ref = DatasetRef(datasetTypeBias, dataIdBias1, run="run0") 

547 (ref1,) = registry._importDatasets([ref]) 

548 # UUID is used without change 

549 self.assertEqual(ref.id, ref1.id) 

550 

551 # All different failure modes 

552 refs = ( 

553 # Importing same DatasetRef with different dataset ID is an error 

554 DatasetRef(datasetTypeBias, dataIdBias1, run="run0"), 

555 # Same DatasetId but different DataId 

556 DatasetRef(datasetTypeBias, dataIdBias2, id=ref1.id, run="run0"), 

557 DatasetRef(datasetTypeFlat, dataIdFlat1, id=ref1.id, run="run0"), 

558 # Same DatasetRef and DatasetId but different run 

559 DatasetRef(datasetTypeBias, dataIdBias1, id=ref1.id, run="run1"), 

560 ) 

561 for ref in refs: 

562 with self.assertRaises(ConflictingDefinitionError): 

563 registry._importDatasets([ref]) 

564 

565 # Test for non-unique IDs, they can be re-imported multiple times. 

566 for run, idGenMode in ((2, DatasetIdGenEnum.DATAID_TYPE), (4, DatasetIdGenEnum.DATAID_TYPE_RUN)): 

567 with self.subTest(idGenMode=idGenMode): 

568 # Make dataset ref with reproducible dataset ID. 

569 ref = DatasetRef(datasetTypeBias, dataIdBias1, run=f"run{run}", id_generation_mode=idGenMode) 

570 (ref1,) = registry._importDatasets([ref]) 

571 self.assertIsInstance(ref1.id, uuid.UUID) 

572 self.assertEqual(ref1.id.version, 5) 

573 self.assertEqual(ref1.id, ref.id) 

574 

575 # Importing it again is OK 

576 (ref2,) = registry._importDatasets([ref1]) 

577 self.assertEqual(ref2.id, ref1.id) 

578 

579 # Cannot import to different run with the same ID 

580 ref = DatasetRef(datasetTypeBias, dataIdBias1, id=ref1.id, run=f"run{run+1}") 

581 with self.assertRaises(ConflictingDefinitionError): 

582 registry._importDatasets([ref]) 

583 

584 ref = DatasetRef( 

585 datasetTypeBias, dataIdBias1, run=f"run{run+1}", id_generation_mode=idGenMode 

586 ) 

587 if idGenMode is DatasetIdGenEnum.DATAID_TYPE: 

588 # Cannot import same DATAID_TYPE ref into a new run 

589 with self.assertRaises(ConflictingDefinitionError): 

590 (ref2,) = registry._importDatasets([ref]) 

591 else: 

592 # DATAID_TYPE_RUN ref can be imported into a new run 

593 (ref2,) = registry._importDatasets([ref]) 

594 

595 def testDatasetTypeComponentQueries(self): 

596 """Test component options when querying for dataset types. 

597 

598 All of the behavior here is deprecated, so many of these tests are 

599 currently wrapped in a context to check that we get a warning whenever 

600 a component dataset is actually returned. 

601 """ 

602 registry = self.makeRegistry() 

603 self.loadData(registry, "base.yaml") 

604 self.loadData(registry, "datasets.yaml") 

605 # Test querying for dataset types with different inputs. 

606 # First query for all dataset types; components should only be included 

607 # when components=True. 

608 self.assertEqual({"bias", "flat"}, NamedValueSet(registry.queryDatasetTypes()).names) 

609 self.assertEqual({"bias", "flat"}, NamedValueSet(registry.queryDatasetTypes(components=False)).names) 

610 with self.assertWarns(FutureWarning): 

611 self.assertLess( 

612 {"bias", "flat", "bias.wcs", "flat.photoCalib"}, 

613 NamedValueSet(registry.queryDatasetTypes(components=True)).names, 

614 ) 

615 # Use a pattern that can match either parent or components. Again, 

616 # components are only returned if components=True. 

617 self.assertEqual({"bias"}, NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"))).names) 

618 self.assertEqual( 

619 {"bias"}, NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"), components=False)).names 

620 ) 

621 with self.assertWarns(FutureWarning): 

622 self.assertLess( 

623 {"bias", "bias.wcs"}, 

624 NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"), components=True)).names, 

625 ) 

626 # This pattern matches only a component. In this case we also return 

627 # that component dataset type if components=None. 

628 with self.assertWarns(FutureWarning): 

629 self.assertEqual( 

630 {"bias.wcs"}, 

631 NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"), components=None)).names, 

632 ) 

633 self.assertEqual( 

634 set(), 

635 NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"), components=False)).names, 

636 ) 

637 with self.assertWarns(FutureWarning): 

638 self.assertEqual( 

639 {"bias.wcs"}, 

640 NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"), components=True)).names, 

641 ) 

642 # Add a dataset type using a StorageClass that we'll then remove; check 

643 # that this does not affect our ability to query for dataset types 

644 # (though it will warn). 

645 tempStorageClass = StorageClass( 

646 name="TempStorageClass", 

647 components={ 

648 "data1": registry.storageClasses.getStorageClass("StructuredDataDict"), 

649 "data2": registry.storageClasses.getStorageClass("StructuredDataDict"), 

650 }, 

651 ) 

652 registry.storageClasses.registerStorageClass(tempStorageClass) 

653 datasetType = DatasetType( 

654 "temporary", 

655 dimensions=["instrument"], 

656 storageClass=tempStorageClass, 

657 universe=registry.dimensions, 

658 ) 

659 registry.registerDatasetType(datasetType) 

660 registry.storageClasses._unregisterStorageClass(tempStorageClass.name) 

661 datasetType._storageClass = None 

662 del tempStorageClass 

663 # Querying for all dataset types, including components, should include 

664 # at least all non-component dataset types (and I don't want to 

665 # enumerate all of the Exposure components for bias and flat here). 

666 with self.assertWarns(FutureWarning): 

667 with self.assertLogs("lsst.daf.butler.registry", logging.WARN) as cm: 

668 everything = NamedValueSet(registry.queryDatasetTypes(components=True)) 

669 self.assertIn("TempStorageClass", cm.output[0]) 

670 self.assertLess({"bias", "flat", "temporary"}, everything.names) 

671 # It should not include "temporary.columns", because we tried to remove 

672 # the storage class that would tell it about that. So if the next line 

673 # fails (i.e. "temporary.columns" _is_ in everything.names), it means 

674 # this part of the test isn't doing anything, because the _unregister 

675 # call about isn't simulating the real-life case we want it to 

676 # simulate, in which different versions of daf_butler in entirely 

677 # different Python processes interact with the same repo. 

678 self.assertNotIn("temporary.data", everything.names) 

679 # Query for dataset types that start with "temp". This should again 

680 # not include the component, and also not fail. 

681 with self.assertLogs("lsst.daf.butler.registry", logging.WARN) as cm: 

682 startsWithTemp = NamedValueSet(registry.queryDatasetTypes(re.compile("temp.*"), components=True)) 

683 self.assertIn("TempStorageClass", cm.output[0]) 

684 self.assertEqual({"temporary"}, startsWithTemp.names) 

685 # Querying with no components should not warn at all. 

686 with self.assertLogs("lsst.daf.butler.registries", logging.WARN) as cm: 

687 startsWithTemp = NamedValueSet(registry.queryDatasetTypes(re.compile("temp.*"), components=False)) 

688 # Must issue a warning of our own to be captured. 

689 logging.getLogger("lsst.daf.butler.registries").warning("test message") 

690 self.assertEqual(len(cm.output), 1) 

691 self.assertIn("test message", cm.output[0]) 

692 

693 def testComponentLookups(self): 

694 """Test searching for component datasets via their parents. 

695 

696 All of the behavior here is deprecated, so many of these tests are 

697 currently wrapped in a context to check that we get a warning whenever 

698 a component dataset is actually returned. 

699 """ 

700 registry = self.makeRegistry() 

701 self.loadData(registry, "base.yaml") 

702 self.loadData(registry, "datasets.yaml") 

703 # Test getting the child dataset type (which does still exist in the 

704 # Registry), and check for consistency with 

705 # DatasetRef.makeComponentRef. 

706 collection = "imported_g" 

707 parentType = registry.getDatasetType("bias") 

708 childType = registry.getDatasetType("bias.wcs") 

709 parentRefResolved = registry.findDataset( 

710 parentType, collections=collection, instrument="Cam1", detector=1 

711 ) 

712 self.assertIsInstance(parentRefResolved, DatasetRef) 

713 self.assertEqual(childType, parentRefResolved.makeComponentRef("wcs").datasetType) 

714 # Search for a single dataset with findDataset. 

715 childRef1 = registry.findDataset("bias.wcs", collections=collection, dataId=parentRefResolved.dataId) 

716 self.assertEqual(childRef1, parentRefResolved.makeComponentRef("wcs")) 

717 # Search for detector data IDs constrained by component dataset 

718 # existence with queryDataIds. 

719 with self.assertWarns(FutureWarning): 

720 dataIds = registry.queryDataIds( 

721 ["detector"], 

722 datasets=["bias.wcs"], 

723 collections=collection, 

724 ).toSet() 

725 self.assertEqual( 

726 dataIds, 

727 DataCoordinateSet( 

728 { 

729 DataCoordinate.standardize(instrument="Cam1", detector=d, graph=parentType.dimensions) 

730 for d in (1, 2, 3) 

731 }, 

732 parentType.dimensions, 

733 ), 

734 ) 

735 # Search for multiple datasets of a single type with queryDatasets. 

736 with self.assertWarns(FutureWarning): 

737 childRefs2 = set( 

738 registry.queryDatasets( 

739 "bias.wcs", 

740 collections=collection, 

741 ) 

742 ) 

743 self.assertEqual({ref.datasetType for ref in childRefs2}, {childType}) 

744 self.assertEqual({ref.dataId for ref in childRefs2}, set(dataIds)) 

745 

746 def testCollections(self): 

747 """Tests for registry methods that manage collections.""" 

748 registry = self.makeRegistry() 

749 other_registry = self.makeRegistry(share_repo_with=registry) 

750 self.loadData(registry, "base.yaml") 

751 self.loadData(registry, "datasets.yaml") 

752 run1 = "imported_g" 

753 run2 = "imported_r" 

754 # Test setting a collection docstring after it has been created. 

755 registry.setCollectionDocumentation(run1, "doc for run1") 

756 self.assertEqual(registry.getCollectionDocumentation(run1), "doc for run1") 

757 registry.setCollectionDocumentation(run1, None) 

758 self.assertIsNone(registry.getCollectionDocumentation(run1)) 

759 datasetType = "bias" 

760 # Find some datasets via their run's collection. 

761 dataId1 = {"instrument": "Cam1", "detector": 1} 

762 ref1 = registry.findDataset(datasetType, dataId1, collections=run1) 

763 self.assertIsNotNone(ref1) 

764 dataId2 = {"instrument": "Cam1", "detector": 2} 

765 ref2 = registry.findDataset(datasetType, dataId2, collections=run1) 

766 self.assertIsNotNone(ref2) 

767 # Associate those into a new collection, then look for them there. 

768 tag1 = "tag1" 

769 registry.registerCollection(tag1, type=CollectionType.TAGGED, doc="doc for tag1") 

770 # Check that we can query for old and new collections by type. 

771 self.assertEqual(set(registry.queryCollections(collectionTypes=CollectionType.RUN)), {run1, run2}) 

772 self.assertEqual( 

773 set(registry.queryCollections(collectionTypes={CollectionType.TAGGED, CollectionType.RUN})), 

774 {tag1, run1, run2}, 

775 ) 

776 self.assertEqual(registry.getCollectionDocumentation(tag1), "doc for tag1") 

777 registry.associate(tag1, [ref1, ref2]) 

778 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1) 

779 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

780 # Disassociate one and verify that we can't it there anymore... 

781 registry.disassociate(tag1, [ref1]) 

782 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=tag1)) 

783 # ...but we can still find ref2 in tag1, and ref1 in the run. 

784 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run1), ref1) 

785 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

786 collections = set(registry.queryCollections()) 

787 self.assertEqual(collections, {run1, run2, tag1}) 

788 # Associate both refs into tag1 again; ref2 is already there, but that 

789 # should be a harmless no-op. 

790 registry.associate(tag1, [ref1, ref2]) 

791 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1) 

792 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

793 # Get a different dataset (from a different run) that has the same 

794 # dataset type and data ID as ref2. 

795 ref2b = registry.findDataset(datasetType, dataId2, collections=run2) 

796 self.assertNotEqual(ref2, ref2b) 

797 # Attempting to associate that into tag1 should be an error. 

798 with self.assertRaises(ConflictingDefinitionError): 

799 registry.associate(tag1, [ref2b]) 

800 # That error shouldn't have messed up what we had before. 

801 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1) 

802 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

803 # Attempt to associate the conflicting dataset again, this time with 

804 # a dataset that isn't in the collection and won't cause a conflict. 

805 # Should also fail without modifying anything. 

806 dataId3 = {"instrument": "Cam1", "detector": 3} 

807 ref3 = registry.findDataset(datasetType, dataId3, collections=run1) 

808 with self.assertRaises(ConflictingDefinitionError): 

809 registry.associate(tag1, [ref3, ref2b]) 

810 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1) 

811 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

812 self.assertIsNone(registry.findDataset(datasetType, dataId3, collections=tag1)) 

813 # Register a chained collection that searches [tag1, run2] 

814 chain1 = "chain1" 

815 registry.registerCollection(chain1, type=CollectionType.CHAINED) 

816 self.assertIs(registry.getCollectionType(chain1), CollectionType.CHAINED) 

817 # Chained collection exists, but has no collections in it. 

818 self.assertFalse(registry.getCollectionChain(chain1)) 

819 # If we query for all collections, we should get the chained collection 

820 # only if we don't ask to flatten it (i.e. yield only its children). 

821 self.assertEqual(set(registry.queryCollections(flattenChains=False)), {tag1, run1, run2, chain1}) 

822 self.assertEqual(set(registry.queryCollections(flattenChains=True)), {tag1, run1, run2}) 

823 # Attempt to set its child collections to something circular; that 

824 # should fail. 

825 with self.assertRaises(ValueError): 

826 registry.setCollectionChain(chain1, [tag1, chain1]) 

827 # Add the child collections. 

828 registry.setCollectionChain(chain1, [tag1, run2]) 

829 self.assertEqual(list(registry.getCollectionChain(chain1)), [tag1, run2]) 

830 self.assertEqual(registry.getCollectionParentChains(tag1), {chain1}) 

831 self.assertEqual(registry.getCollectionParentChains(run2), {chain1}) 

832 # Refresh the other registry that points to the same repo, and make 

833 # sure it can see the things we've done (note that this does require 

834 # an explicit refresh(); that's the documented behavior, because 

835 # caching is ~impossible otherwise). 

836 if other_registry is not None: 

837 other_registry.refresh() 

838 self.assertEqual(list(other_registry.getCollectionChain(chain1)), [tag1, run2]) 

839 self.assertEqual(other_registry.getCollectionParentChains(tag1), {chain1}) 

840 self.assertEqual(other_registry.getCollectionParentChains(run2), {chain1}) 

841 # Searching for dataId1 or dataId2 in the chain should return ref1 and 

842 # ref2, because both are in tag1. 

843 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain1), ref1) 

844 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain1), ref2) 

845 # Now disassociate ref2 from tag1. The search (for bias) with 

846 # dataId2 in chain1 should then: 

847 # 1. not find it in tag1 

848 # 2. find a different dataset in run2 

849 registry.disassociate(tag1, [ref2]) 

850 ref2b = registry.findDataset(datasetType, dataId2, collections=chain1) 

851 self.assertNotEqual(ref2b, ref2) 

852 self.assertEqual(ref2b, registry.findDataset(datasetType, dataId2, collections=run2)) 

853 # Define a new chain so we can test recursive chains. 

854 chain2 = "chain2" 

855 registry.registerCollection(chain2, type=CollectionType.CHAINED) 

856 registry.setCollectionChain(chain2, [run2, chain1]) 

857 self.assertEqual(registry.getCollectionParentChains(chain1), {chain2}) 

858 self.assertEqual(registry.getCollectionParentChains(run2), {chain1, chain2}) 

859 # Query for collections matching a regex. 

860 self.assertCountEqual( 

861 list(registry.queryCollections(re.compile("imported_."), flattenChains=False)), 

862 ["imported_r", "imported_g"], 

863 ) 

864 # Query for collections matching a regex or an explicit str. 

865 self.assertCountEqual( 

866 list(registry.queryCollections([re.compile("imported_."), "chain1"], flattenChains=False)), 

867 ["imported_r", "imported_g", "chain1"], 

868 ) 

869 # Search for bias with dataId1 should find it via tag1 in chain2, 

870 # recursing, because is not in run1. 

871 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=run2)) 

872 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain2), ref1) 

873 # Search for bias with dataId2 should find it in run2 (ref2b). 

874 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain2), ref2b) 

875 # Search for a flat that is in run2. That should not be found 

876 # at the front of chain2, because of the restriction to bias 

877 # on run2 there, but it should be found in at the end of chain1. 

878 dataId4 = {"instrument": "Cam1", "detector": 3, "physical_filter": "Cam1-R2"} 

879 ref4 = registry.findDataset("flat", dataId4, collections=run2) 

880 self.assertIsNotNone(ref4) 

881 self.assertEqual(ref4, registry.findDataset("flat", dataId4, collections=chain2)) 

882 # Deleting a collection that's part of a CHAINED collection is not 

883 # allowed, and is exception-safe. 

884 with self.assertRaises(sqlalchemy.exc.IntegrityError): 

885 registry.removeCollection(run2) 

886 self.assertEqual(registry.getCollectionType(run2), CollectionType.RUN) 

887 with self.assertRaises(sqlalchemy.exc.IntegrityError): 

888 registry.removeCollection(chain1) 

889 self.assertEqual(registry.getCollectionType(chain1), CollectionType.CHAINED) 

890 # Actually remove chain2, test that it's gone by asking for its type. 

891 registry.removeCollection(chain2) 

892 with self.assertRaises(MissingCollectionError): 

893 registry.getCollectionType(chain2) 

894 # Actually remove run2 and chain1, which should work now. 

895 registry.removeCollection(chain1) 

896 registry.removeCollection(run2) 

897 with self.assertRaises(MissingCollectionError): 

898 registry.getCollectionType(run2) 

899 with self.assertRaises(MissingCollectionError): 

900 registry.getCollectionType(chain1) 

901 # Remove tag1 as well, just to test that we can remove TAGGED 

902 # collections. 

903 registry.removeCollection(tag1) 

904 with self.assertRaises(MissingCollectionError): 

905 registry.getCollectionType(tag1) 

906 

907 def testCollectionChainFlatten(self): 

908 """Test that Registry.setCollectionChain obeys its 'flatten' option.""" 

909 registry = self.makeRegistry() 

910 registry.registerCollection("inner", CollectionType.CHAINED) 

911 registry.registerCollection("innermost", CollectionType.RUN) 

912 registry.setCollectionChain("inner", ["innermost"]) 

913 registry.registerCollection("outer", CollectionType.CHAINED) 

914 registry.setCollectionChain("outer", ["inner"], flatten=False) 

915 self.assertEqual(list(registry.getCollectionChain("outer")), ["inner"]) 

916 registry.setCollectionChain("outer", ["inner"], flatten=True) 

917 self.assertEqual(list(registry.getCollectionChain("outer")), ["innermost"]) 

918 

919 def testBasicTransaction(self): 

920 """Test that all operations within a single transaction block are 

921 rolled back if an exception propagates out of the block. 

922 """ 

923 registry = self.makeRegistry() 

924 storageClass = StorageClass("testDatasetType") 

925 registry.storageClasses.registerStorageClass(storageClass) 

926 with registry.transaction(): 

927 registry.insertDimensionData("instrument", {"name": "Cam1", "class_name": "A"}) 

928 with self.assertRaises(ValueError): 

929 with registry.transaction(): 

930 registry.insertDimensionData("instrument", {"name": "Cam2"}) 

931 raise ValueError("Oops, something went wrong") 

932 # Cam1 should exist 

933 self.assertEqual(registry.expandDataId(instrument="Cam1").records["instrument"].class_name, "A") 

934 # But Cam2 and Cam3 should both not exist 

935 with self.assertRaises(DataIdValueError): 

936 registry.expandDataId(instrument="Cam2") 

937 with self.assertRaises(DataIdValueError): 

938 registry.expandDataId(instrument="Cam3") 

939 

940 def testNestedTransaction(self): 

941 """Test that operations within a transaction block are not rolled back 

942 if an exception propagates out of an inner transaction block and is 

943 then caught. 

944 """ 

945 registry = self.makeRegistry() 

946 dimension = registry.dimensions["instrument"] 

947 dataId1 = {"instrument": "DummyCam"} 

948 dataId2 = {"instrument": "DummyCam2"} 

949 checkpointReached = False 

950 with registry.transaction(): 

951 # This should be added and (ultimately) committed. 

952 registry.insertDimensionData(dimension, dataId1) 

953 with self.assertRaises(sqlalchemy.exc.IntegrityError): 

954 with registry.transaction(savepoint=True): 

955 # This does not conflict, and should succeed (but not 

956 # be committed). 

957 registry.insertDimensionData(dimension, dataId2) 

958 checkpointReached = True 

959 # This should conflict and raise, triggerring a rollback 

960 # of the previous insertion within the same transaction 

961 # context, but not the original insertion in the outer 

962 # block. 

963 registry.insertDimensionData(dimension, dataId1) 

964 self.assertTrue(checkpointReached) 

965 self.assertIsNotNone(registry.expandDataId(dataId1, graph=dimension.graph)) 

966 with self.assertRaises(DataIdValueError): 

967 registry.expandDataId(dataId2, graph=dimension.graph) 

968 

969 def testInstrumentDimensions(self): 

970 """Test queries involving only instrument dimensions, with no joins to 

971 skymap. 

972 """ 

973 registry = self.makeRegistry() 

974 

975 # need a bunch of dimensions and datasets for test 

976 registry.insertDimensionData( 

977 "instrument", dict(name="DummyCam", visit_max=25, exposure_max=300, detector_max=6) 

978 ) 

979 registry.insertDimensionData( 

980 "physical_filter", 

981 dict(instrument="DummyCam", name="dummy_r", band="r"), 

982 dict(instrument="DummyCam", name="dummy_i", band="i"), 

983 ) 

984 registry.insertDimensionData( 

985 "detector", *[dict(instrument="DummyCam", id=i, full_name=str(i)) for i in range(1, 6)] 

986 ) 

987 registry.insertDimensionData( 

988 "visit_system", 

989 dict(instrument="DummyCam", id=1, name="default"), 

990 ) 

991 registry.insertDimensionData( 

992 "visit", 

993 dict(instrument="DummyCam", id=10, name="ten", physical_filter="dummy_i", visit_system=1), 

994 dict(instrument="DummyCam", id=11, name="eleven", physical_filter="dummy_r", visit_system=1), 

995 dict(instrument="DummyCam", id=20, name="twelve", physical_filter="dummy_r", visit_system=1), 

996 ) 

997 for i in range(1, 6): 

998 registry.insertDimensionData( 

999 "visit_detector_region", 

1000 dict(instrument="DummyCam", visit=10, detector=i), 

1001 dict(instrument="DummyCam", visit=11, detector=i), 

1002 dict(instrument="DummyCam", visit=20, detector=i), 

1003 ) 

1004 registry.insertDimensionData( 

1005 "exposure", 

1006 dict(instrument="DummyCam", id=100, obs_id="100", physical_filter="dummy_i"), 

1007 dict(instrument="DummyCam", id=101, obs_id="101", physical_filter="dummy_i"), 

1008 dict(instrument="DummyCam", id=110, obs_id="110", physical_filter="dummy_r"), 

1009 dict(instrument="DummyCam", id=111, obs_id="111", physical_filter="dummy_r"), 

1010 dict(instrument="DummyCam", id=200, obs_id="200", physical_filter="dummy_r"), 

1011 dict(instrument="DummyCam", id=201, obs_id="201", physical_filter="dummy_r"), 

1012 ) 

1013 registry.insertDimensionData( 

1014 "visit_definition", 

1015 dict(instrument="DummyCam", exposure=100, visit_system=1, visit=10), 

1016 dict(instrument="DummyCam", exposure=101, visit_system=1, visit=10), 

1017 dict(instrument="DummyCam", exposure=110, visit_system=1, visit=11), 

1018 dict(instrument="DummyCam", exposure=111, visit_system=1, visit=11), 

1019 dict(instrument="DummyCam", exposure=200, visit_system=1, visit=20), 

1020 dict(instrument="DummyCam", exposure=201, visit_system=1, visit=20), 

1021 ) 

1022 # dataset types 

1023 run1 = "test1_r" 

1024 run2 = "test2_r" 

1025 tagged2 = "test2_t" 

1026 registry.registerRun(run1) 

1027 registry.registerRun(run2) 

1028 registry.registerCollection(tagged2) 

1029 storageClass = StorageClass("testDataset") 

1030 registry.storageClasses.registerStorageClass(storageClass) 

1031 rawType = DatasetType( 

1032 name="RAW", 

1033 dimensions=registry.dimensions.extract(("instrument", "exposure", "detector")), 

1034 storageClass=storageClass, 

1035 ) 

1036 registry.registerDatasetType(rawType) 

1037 calexpType = DatasetType( 

1038 name="CALEXP", 

1039 dimensions=registry.dimensions.extract(("instrument", "visit", "detector")), 

1040 storageClass=storageClass, 

1041 ) 

1042 registry.registerDatasetType(calexpType) 

1043 

1044 # add pre-existing datasets 

1045 for exposure in (100, 101, 110, 111): 

1046 for detector in (1, 2, 3): 

1047 # note that only 3 of 5 detectors have datasets 

1048 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector) 

1049 (ref,) = registry.insertDatasets(rawType, dataIds=[dataId], run=run1) 

1050 # exposures 100 and 101 appear in both run1 and tagged2. 

1051 # 100 has different datasets in the different collections 

1052 # 101 has the same dataset in both collections. 

1053 if exposure == 100: 

1054 (ref,) = registry.insertDatasets(rawType, dataIds=[dataId], run=run2) 

1055 if exposure in (100, 101): 

1056 registry.associate(tagged2, [ref]) 

1057 # Add pre-existing datasets to tagged2. 

1058 for exposure in (200, 201): 

1059 for detector in (3, 4, 5): 

1060 # note that only 3 of 5 detectors have datasets 

1061 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector) 

1062 (ref,) = registry.insertDatasets(rawType, dataIds=[dataId], run=run2) 

1063 registry.associate(tagged2, [ref]) 

1064 

1065 dimensions = DimensionGraph( 

1066 registry.dimensions, dimensions=(rawType.dimensions.required | calexpType.dimensions.required) 

1067 ) 

1068 # Test that single dim string works as well as list of str 

1069 rows = registry.queryDataIds("visit", datasets=rawType, collections=run1).expanded().toSet() 

1070 rowsI = registry.queryDataIds(["visit"], datasets=rawType, collections=run1).expanded().toSet() 

1071 self.assertEqual(rows, rowsI) 

1072 # with empty expression 

1073 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1).expanded().toSet() 

1074 self.assertEqual(len(rows), 4 * 3) # 4 exposures times 3 detectors 

1075 self.assertCountEqual({dataId["exposure"] for dataId in rows}, (100, 101, 110, 111)) 

1076 self.assertCountEqual({dataId["visit"] for dataId in rows}, (10, 11)) 

1077 self.assertCountEqual({dataId["detector"] for dataId in rows}, (1, 2, 3)) 

1078 

1079 # second collection 

1080 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=tagged2).toSet() 

1081 self.assertEqual(len(rows), 4 * 3) # 4 exposures times 3 detectors 

1082 for dataId in rows: 

1083 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit")) 

1084 self.assertCountEqual({dataId["exposure"] for dataId in rows}, (100, 101, 200, 201)) 

1085 self.assertCountEqual({dataId["visit"] for dataId in rows}, (10, 20)) 

1086 self.assertCountEqual({dataId["detector"] for dataId in rows}, (1, 2, 3, 4, 5)) 

1087 

1088 # with two input datasets 

1089 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=[run1, tagged2]).toSet() 

1090 self.assertEqual(len(set(rows)), 6 * 3) # 6 exposures times 3 detectors; set needed to de-dupe 

1091 for dataId in rows: 

1092 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit")) 

1093 self.assertCountEqual({dataId["exposure"] for dataId in rows}, (100, 101, 110, 111, 200, 201)) 

1094 self.assertCountEqual({dataId["visit"] for dataId in rows}, (10, 11, 20)) 

1095 self.assertCountEqual({dataId["detector"] for dataId in rows}, (1, 2, 3, 4, 5)) 

1096 

1097 # limit to single visit 

1098 rows = registry.queryDataIds( 

1099 dimensions, datasets=rawType, collections=run1, where="visit = 10", instrument="DummyCam" 

1100 ).toSet() 

1101 self.assertEqual(len(rows), 2 * 3) # 2 exposures times 3 detectors 

1102 self.assertCountEqual({dataId["exposure"] for dataId in rows}, (100, 101)) 

1103 self.assertCountEqual({dataId["visit"] for dataId in rows}, (10,)) 

1104 self.assertCountEqual({dataId["detector"] for dataId in rows}, (1, 2, 3)) 

1105 

1106 # more limiting expression, using link names instead of Table.column 

1107 rows = registry.queryDataIds( 

1108 dimensions, 

1109 datasets=rawType, 

1110 collections=run1, 

1111 where="visit = 10 and detector > 1 and 'DummyCam'=instrument", 

1112 ).toSet() 

1113 self.assertEqual(len(rows), 2 * 2) # 2 exposures times 2 detectors 

1114 self.assertCountEqual({dataId["exposure"] for dataId in rows}, (100, 101)) 

1115 self.assertCountEqual({dataId["visit"] for dataId in rows}, (10,)) 

1116 self.assertCountEqual({dataId["detector"] for dataId in rows}, (2, 3)) 

1117 

1118 # queryDataIds with only one of `datasets` and `collections` is an 

1119 # error. 

1120 with self.assertRaises(CollectionError): 

1121 registry.queryDataIds(dimensions, datasets=rawType) 

1122 with self.assertRaises(ArgumentError): 

1123 registry.queryDataIds(dimensions, collections=run1) 

1124 

1125 # expression excludes everything 

1126 rows = registry.queryDataIds( 

1127 dimensions, datasets=rawType, collections=run1, where="visit > 1000", instrument="DummyCam" 

1128 ).toSet() 

1129 self.assertEqual(len(rows), 0) 

1130 

1131 # Selecting by physical_filter, this is not in the dimensions, but it 

1132 # is a part of the full expression so it should work too. 

1133 rows = registry.queryDataIds( 

1134 dimensions, 

1135 datasets=rawType, 

1136 collections=run1, 

1137 where="physical_filter = 'dummy_r'", 

1138 instrument="DummyCam", 

1139 ).toSet() 

1140 self.assertEqual(len(rows), 2 * 3) # 2 exposures times 3 detectors 

1141 self.assertCountEqual({dataId["exposure"] for dataId in rows}, (110, 111)) 

1142 self.assertCountEqual({dataId["visit"] for dataId in rows}, (11,)) 

1143 self.assertCountEqual({dataId["detector"] for dataId in rows}, (1, 2, 3)) 

1144 

1145 def testSkyMapDimensions(self): 

1146 """Tests involving only skymap dimensions, no joins to instrument.""" 

1147 registry = self.makeRegistry() 

1148 

1149 # need a bunch of dimensions and datasets for test, we want 

1150 # "band" in the test so also have to add physical_filter 

1151 # dimensions 

1152 registry.insertDimensionData("instrument", dict(instrument="DummyCam")) 

1153 registry.insertDimensionData( 

1154 "physical_filter", 

1155 dict(instrument="DummyCam", name="dummy_r", band="r"), 

1156 dict(instrument="DummyCam", name="dummy_i", band="i"), 

1157 ) 

1158 registry.insertDimensionData("skymap", dict(name="DummyMap", hash=b"sha!")) 

1159 for tract in range(10): 

1160 registry.insertDimensionData("tract", dict(skymap="DummyMap", id=tract)) 

1161 registry.insertDimensionData( 

1162 "patch", 

1163 *[dict(skymap="DummyMap", tract=tract, id=patch, cell_x=0, cell_y=0) for patch in range(10)], 

1164 ) 

1165 

1166 # dataset types 

1167 run = "tésτ" 

1168 registry.registerRun(run) 

1169 storageClass = StorageClass("testDataset") 

1170 registry.storageClasses.registerStorageClass(storageClass) 

1171 calexpType = DatasetType( 

1172 name="deepCoadd_calexp", 

1173 dimensions=registry.dimensions.extract(("skymap", "tract", "patch", "band")), 

1174 storageClass=storageClass, 

1175 ) 

1176 registry.registerDatasetType(calexpType) 

1177 mergeType = DatasetType( 

1178 name="deepCoadd_mergeDet", 

1179 dimensions=registry.dimensions.extract(("skymap", "tract", "patch")), 

1180 storageClass=storageClass, 

1181 ) 

1182 registry.registerDatasetType(mergeType) 

1183 measType = DatasetType( 

1184 name="deepCoadd_meas", 

1185 dimensions=registry.dimensions.extract(("skymap", "tract", "patch", "band")), 

1186 storageClass=storageClass, 

1187 ) 

1188 registry.registerDatasetType(measType) 

1189 

1190 dimensions = DimensionGraph( 

1191 registry.dimensions, 

1192 dimensions=( 

1193 calexpType.dimensions.required | mergeType.dimensions.required | measType.dimensions.required 

1194 ), 

1195 ) 

1196 

1197 # add pre-existing datasets 

1198 for tract in (1, 3, 5): 

1199 for patch in (2, 4, 6, 7): 

1200 dataId = dict(skymap="DummyMap", tract=tract, patch=patch) 

1201 registry.insertDatasets(mergeType, dataIds=[dataId], run=run) 

1202 for aFilter in ("i", "r"): 

1203 dataId = dict(skymap="DummyMap", tract=tract, patch=patch, band=aFilter) 

1204 registry.insertDatasets(calexpType, dataIds=[dataId], run=run) 

1205 

1206 # with empty expression 

1207 rows = registry.queryDataIds(dimensions, datasets=[calexpType, mergeType], collections=run).toSet() 

1208 self.assertEqual(len(rows), 3 * 4 * 2) # 4 tracts x 4 patches x 2 filters 

1209 for dataId in rows: 

1210 self.assertCountEqual(dataId.keys(), ("skymap", "tract", "patch", "band")) 

1211 self.assertCountEqual({dataId["tract"] for dataId in rows}, (1, 3, 5)) 

1212 self.assertCountEqual({dataId["patch"] for dataId in rows}, (2, 4, 6, 7)) 

1213 self.assertCountEqual({dataId["band"] for dataId in rows}, ("i", "r")) 

1214 

1215 # limit to 2 tracts and 2 patches 

1216 rows = registry.queryDataIds( 

1217 dimensions, 

1218 datasets=[calexpType, mergeType], 

1219 collections=run, 

1220 where="tract IN (1, 5) AND patch IN (2, 7)", 

1221 skymap="DummyMap", 

1222 ).toSet() 

1223 self.assertEqual(len(rows), 2 * 2 * 2) # 2 tracts x 2 patches x 2 filters 

1224 self.assertCountEqual({dataId["tract"] for dataId in rows}, (1, 5)) 

1225 self.assertCountEqual({dataId["patch"] for dataId in rows}, (2, 7)) 

1226 self.assertCountEqual({dataId["band"] for dataId in rows}, ("i", "r")) 

1227 

1228 # limit to single filter 

1229 rows = registry.queryDataIds( 

1230 dimensions, datasets=[calexpType, mergeType], collections=run, where="band = 'i'" 

1231 ).toSet() 

1232 self.assertEqual(len(rows), 3 * 4 * 1) # 4 tracts x 4 patches x 2 filters 

1233 self.assertCountEqual({dataId["tract"] for dataId in rows}, (1, 3, 5)) 

1234 self.assertCountEqual({dataId["patch"] for dataId in rows}, (2, 4, 6, 7)) 

1235 self.assertCountEqual({dataId["band"] for dataId in rows}, ("i",)) 

1236 

1237 # Specifying non-existing skymap is an exception 

1238 with self.assertRaisesRegex(DataIdValueError, "Unknown values specified for governor dimension"): 

1239 rows = registry.queryDataIds( 

1240 dimensions, datasets=[calexpType, mergeType], collections=run, where="skymap = 'Mars'" 

1241 ).toSet() 

1242 

1243 def testSpatialJoin(self): 

1244 """Test queries that involve spatial overlap joins.""" 

1245 registry = self.makeRegistry() 

1246 self.loadData(registry, "hsc-rc2-subset.yaml") 

1247 

1248 # Dictionary of spatial DatabaseDimensionElements, keyed by the name of 

1249 # the TopologicalFamily they belong to. We'll relate all elements in 

1250 # each family to all of the elements in each other family. 

1251 families = defaultdict(set) 

1252 # Dictionary of {element.name: {dataId: region}}. 

1253 regions = {} 

1254 for element in registry.dimensions.getDatabaseElements(): 

1255 if element.spatial is not None: 

1256 families[element.spatial.name].add(element) 

1257 regions[element.name] = { 

1258 record.dataId: record.region for record in registry.queryDimensionRecords(element) 

1259 } 

1260 

1261 # If this check fails, it's not necessarily a problem - it may just be 

1262 # a reasonable change to the default dimension definitions - but the 

1263 # test below depends on there being more than one family to do anything 

1264 # useful. 

1265 self.assertEqual(len(families), 2) 

1266 

1267 # Overlap DatabaseDimensionElements with each other. 

1268 for family1, family2 in itertools.combinations(families, 2): 

1269 for element1, element2 in itertools.product(families[family1], families[family2]): 

1270 graph = DimensionGraph.union(element1.graph, element2.graph) 

1271 # Construct expected set of overlapping data IDs via a 

1272 # brute-force comparison of the regions we've already fetched. 

1273 expected = { 

1274 DataCoordinate.standardize({**dataId1.byName(), **dataId2.byName()}, graph=graph) 

1275 for (dataId1, region1), (dataId2, region2) in itertools.product( 

1276 regions[element1.name].items(), regions[element2.name].items() 

1277 ) 

1278 if not region1.isDisjointFrom(region2) 

1279 } 

1280 self.assertGreater(len(expected), 2, msg="Test that we aren't just comparing empty sets.") 

1281 queried = set(registry.queryDataIds(graph)) 

1282 self.assertEqual(expected, queried) 

1283 

1284 # Overlap each DatabaseDimensionElement with the commonSkyPix system. 

1285 commonSkyPix = registry.dimensions.commonSkyPix 

1286 for elementName, these_regions in regions.items(): 

1287 graph = DimensionGraph.union(registry.dimensions[elementName].graph, commonSkyPix.graph) 

1288 expected = set() 

1289 for dataId, region in these_regions.items(): 

1290 for begin, end in commonSkyPix.pixelization.envelope(region): 

1291 expected.update( 

1292 DataCoordinate.standardize({commonSkyPix.name: index, **dataId.byName()}, graph=graph) 

1293 for index in range(begin, end) 

1294 ) 

1295 self.assertGreater(len(expected), 2, msg="Test that we aren't just comparing empty sets.") 

1296 queried = set(registry.queryDataIds(graph)) 

1297 self.assertEqual(expected, queried) 

1298 

1299 def testAbstractQuery(self): 

1300 """Test that we can run a query that just lists the known 

1301 bands. This is tricky because band is 

1302 backed by a query against physical_filter. 

1303 """ 

1304 registry = self.makeRegistry() 

1305 registry.insertDimensionData("instrument", dict(name="DummyCam")) 

1306 registry.insertDimensionData( 

1307 "physical_filter", 

1308 dict(instrument="DummyCam", name="dummy_i", band="i"), 

1309 dict(instrument="DummyCam", name="dummy_i2", band="i"), 

1310 dict(instrument="DummyCam", name="dummy_r", band="r"), 

1311 ) 

1312 rows = registry.queryDataIds(["band"]).toSet() 

1313 self.assertCountEqual( 

1314 rows, 

1315 [ 

1316 DataCoordinate.standardize(band="i", universe=registry.dimensions), 

1317 DataCoordinate.standardize(band="r", universe=registry.dimensions), 

1318 ], 

1319 ) 

1320 

1321 def testAttributeManager(self): 

1322 """Test basic functionality of attribute manager.""" 

1323 # number of attributes with schema versions in a fresh database, 

1324 # 6 managers with 2 records per manager, plus config for dimensions 

1325 VERSION_COUNT = 6 * 2 + 1 

1326 

1327 registry = self.makeRegistry() 

1328 attributes = registry._managers.attributes 

1329 

1330 # check what get() returns for non-existing key 

1331 self.assertIsNone(attributes.get("attr")) 

1332 self.assertEqual(attributes.get("attr", ""), "") 

1333 self.assertEqual(attributes.get("attr", "Value"), "Value") 

1334 self.assertEqual(len(list(attributes.items())), VERSION_COUNT) 

1335 

1336 # cannot store empty key or value 

1337 with self.assertRaises(ValueError): 

1338 attributes.set("", "value") 

1339 with self.assertRaises(ValueError): 

1340 attributes.set("attr", "") 

1341 

1342 # set value of non-existing key 

1343 attributes.set("attr", "value") 

1344 self.assertEqual(len(list(attributes.items())), VERSION_COUNT + 1) 

1345 self.assertEqual(attributes.get("attr"), "value") 

1346 

1347 # update value of existing key 

1348 with self.assertRaises(ButlerAttributeExistsError): 

1349 attributes.set("attr", "value2") 

1350 

1351 attributes.set("attr", "value2", force=True) 

1352 self.assertEqual(len(list(attributes.items())), VERSION_COUNT + 1) 

1353 self.assertEqual(attributes.get("attr"), "value2") 

1354 

1355 # delete existing key 

1356 self.assertTrue(attributes.delete("attr")) 

1357 self.assertEqual(len(list(attributes.items())), VERSION_COUNT) 

1358 

1359 # delete non-existing key 

1360 self.assertFalse(attributes.delete("non-attr")) 

1361 

1362 # store bunch of keys and get the list back 

1363 data = [ 

1364 ("version.core", "1.2.3"), 

1365 ("version.dimensions", "3.2.1"), 

1366 ("config.managers.opaque", "ByNameOpaqueTableStorageManager"), 

1367 ] 

1368 for key, value in data: 

1369 attributes.set(key, value) 

1370 items = dict(attributes.items()) 

1371 for key, value in data: 

1372 self.assertEqual(items[key], value) 

1373 

1374 def testQueryDatasetsDeduplication(self): 

1375 """Test that the findFirst option to queryDatasets selects datasets 

1376 from collections in the order given". 

1377 """ 

1378 registry = self.makeRegistry() 

1379 self.loadData(registry, "base.yaml") 

1380 self.loadData(registry, "datasets.yaml") 

1381 self.assertCountEqual( 

1382 list(registry.queryDatasets("bias", collections=["imported_g", "imported_r"])), 

1383 [ 

1384 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"), 

1385 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g"), 

1386 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g"), 

1387 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r"), 

1388 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r"), 

1389 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

1390 ], 

1391 ) 

1392 self.assertCountEqual( 

1393 list(registry.queryDatasets("bias", collections=["imported_g", "imported_r"], findFirst=True)), 

1394 [ 

1395 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"), 

1396 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g"), 

1397 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g"), 

1398 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

1399 ], 

1400 ) 

1401 self.assertCountEqual( 

1402 list(registry.queryDatasets("bias", collections=["imported_r", "imported_g"], findFirst=True)), 

1403 [ 

1404 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"), 

1405 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r"), 

1406 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r"), 

1407 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

1408 ], 

1409 ) 

1410 

1411 def testQueryResults(self): 

1412 """Test querying for data IDs and then manipulating the QueryResults 

1413 object returned to perform other queries. 

1414 """ 

1415 registry = self.makeRegistry() 

1416 self.loadData(registry, "base.yaml") 

1417 self.loadData(registry, "datasets.yaml") 

1418 bias = registry.getDatasetType("bias") 

1419 flat = registry.getDatasetType("flat") 

1420 # Obtain expected results from methods other than those we're testing 

1421 # here. That includes: 

1422 # - the dimensions of the data IDs we want to query: 

1423 expectedGraph = DimensionGraph(registry.dimensions, names=["detector", "physical_filter"]) 

1424 # - the dimensions of some other data IDs we'll extract from that: 

1425 expectedSubsetGraph = DimensionGraph(registry.dimensions, names=["detector"]) 

1426 # - the data IDs we expect to obtain from the first queries: 

1427 expectedDataIds = DataCoordinateSet( 

1428 { 

1429 DataCoordinate.standardize( 

1430 instrument="Cam1", detector=d, physical_filter=p, universe=registry.dimensions 

1431 ) 

1432 for d, p in itertools.product({1, 2, 3}, {"Cam1-G", "Cam1-R1", "Cam1-R2"}) 

1433 }, 

1434 graph=expectedGraph, 

1435 hasFull=False, 

1436 hasRecords=False, 

1437 ) 

1438 # - the flat datasets we expect to find from those data IDs, in just 

1439 # one collection (so deduplication is irrelevant): 

1440 expectedFlats = [ 

1441 registry.findDataset( 

1442 flat, instrument="Cam1", detector=1, physical_filter="Cam1-R1", collections="imported_r" 

1443 ), 

1444 registry.findDataset( 

1445 flat, instrument="Cam1", detector=2, physical_filter="Cam1-R1", collections="imported_r" 

1446 ), 

1447 registry.findDataset( 

1448 flat, instrument="Cam1", detector=3, physical_filter="Cam1-R2", collections="imported_r" 

1449 ), 

1450 ] 

1451 # - the data IDs we expect to extract from that: 

1452 expectedSubsetDataIds = expectedDataIds.subset(expectedSubsetGraph) 

1453 # - the bias datasets we expect to find from those data IDs, after we 

1454 # subset-out the physical_filter dimension, both with duplicates: 

1455 expectedAllBiases = [ 

1456 registry.findDataset(bias, instrument="Cam1", detector=1, collections="imported_g"), 

1457 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_g"), 

1458 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_g"), 

1459 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_r"), 

1460 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_r"), 

1461 ] 

1462 # - ...and without duplicates: 

1463 expectedDeduplicatedBiases = [ 

1464 registry.findDataset(bias, instrument="Cam1", detector=1, collections="imported_g"), 

1465 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_r"), 

1466 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_r"), 

1467 ] 

1468 # Test against those expected results, using a "lazy" query for the 

1469 # data IDs (which re-executes that query each time we use it to do 

1470 # something new). 

1471 dataIds = registry.queryDataIds( 

1472 ["detector", "physical_filter"], 

1473 where="detector.purpose = 'SCIENCE'", # this rejects detector=4 

1474 instrument="Cam1", 

1475 ) 

1476 self.assertEqual(dataIds.graph, expectedGraph) 

1477 self.assertEqual(dataIds.toSet(), expectedDataIds) 

1478 self.assertCountEqual( 

1479 list( 

1480 dataIds.findDatasets( 

1481 flat, 

1482 collections=["imported_r"], 

1483 ) 

1484 ), 

1485 expectedFlats, 

1486 ) 

1487 subsetDataIds = dataIds.subset(expectedSubsetGraph, unique=True) 

1488 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph) 

1489 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds) 

1490 self.assertCountEqual( 

1491 list(subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=False)), 

1492 expectedAllBiases, 

1493 ) 

1494 self.assertCountEqual( 

1495 list(subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=True)), 

1496 expectedDeduplicatedBiases, 

1497 ) 

1498 

1499 # Searching for a dataset with dimensions we had projected away 

1500 # restores those dimensions. 

1501 self.assertCountEqual( 

1502 list(subsetDataIds.findDatasets("flat", collections=["imported_r"], findFirst=True)), 

1503 expectedFlats, 

1504 ) 

1505 

1506 # Use a component dataset type. 

1507 self.assertCountEqual( 

1508 [ 

1509 ref.makeComponentRef("image") 

1510 for ref in subsetDataIds.findDatasets( 

1511 bias, 

1512 collections=["imported_r", "imported_g"], 

1513 findFirst=False, 

1514 ) 

1515 ], 

1516 [ref.makeComponentRef("image") for ref in expectedAllBiases], 

1517 ) 

1518 

1519 # Use a named dataset type that does not exist and a dataset type 

1520 # object that does not exist. 

1521 unknown_type = DatasetType("not_known", dimensions=bias.dimensions, storageClass="Exposure") 

1522 

1523 # Test both string name and dataset type object. 

1524 test_type: str | DatasetType 

1525 for test_type, test_type_name in ( 

1526 (unknown_type, unknown_type.name), 

1527 (unknown_type.name, unknown_type.name), 

1528 ): 

1529 with self.assertRaisesRegex(DatasetTypeError, expected_regex=test_type_name): 

1530 list( 

1531 subsetDataIds.findDatasets( 

1532 test_type, collections=["imported_r", "imported_g"], findFirst=True 

1533 ) 

1534 ) 

1535 

1536 # Materialize the bias dataset queries (only) by putting the results 

1537 # into temporary tables, then repeat those tests. 

1538 with subsetDataIds.findDatasets( 

1539 bias, collections=["imported_r", "imported_g"], findFirst=False 

1540 ).materialize() as biases: 

1541 self.assertCountEqual(list(biases), expectedAllBiases) 

1542 with subsetDataIds.findDatasets( 

1543 bias, collections=["imported_r", "imported_g"], findFirst=True 

1544 ).materialize() as biases: 

1545 self.assertCountEqual(list(biases), expectedDeduplicatedBiases) 

1546 # Materialize the data ID subset query, but not the dataset queries. 

1547 with subsetDataIds.materialize() as subsetDataIds: 

1548 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph) 

1549 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds) 

1550 self.assertCountEqual( 

1551 list( 

1552 subsetDataIds.findDatasets( 

1553 bias, collections=["imported_r", "imported_g"], findFirst=False 

1554 ) 

1555 ), 

1556 expectedAllBiases, 

1557 ) 

1558 self.assertCountEqual( 

1559 list( 

1560 subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=True) 

1561 ), 

1562 expectedDeduplicatedBiases, 

1563 ) 

1564 # Materialize the dataset queries, too. 

1565 with subsetDataIds.findDatasets( 

1566 bias, collections=["imported_r", "imported_g"], findFirst=False 

1567 ).materialize() as biases: 

1568 self.assertCountEqual(list(biases), expectedAllBiases) 

1569 with subsetDataIds.findDatasets( 

1570 bias, collections=["imported_r", "imported_g"], findFirst=True 

1571 ).materialize() as biases: 

1572 self.assertCountEqual(list(biases), expectedDeduplicatedBiases) 

1573 # Materialize the original query, but none of the follow-up queries. 

1574 with dataIds.materialize() as dataIds: 

1575 self.assertEqual(dataIds.graph, expectedGraph) 

1576 self.assertEqual(dataIds.toSet(), expectedDataIds) 

1577 self.assertCountEqual( 

1578 list( 

1579 dataIds.findDatasets( 

1580 flat, 

1581 collections=["imported_r"], 

1582 ) 

1583 ), 

1584 expectedFlats, 

1585 ) 

1586 subsetDataIds = dataIds.subset(expectedSubsetGraph, unique=True) 

1587 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph) 

1588 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds) 

1589 self.assertCountEqual( 

1590 list( 

1591 subsetDataIds.findDatasets( 

1592 bias, collections=["imported_r", "imported_g"], findFirst=False 

1593 ) 

1594 ), 

1595 expectedAllBiases, 

1596 ) 

1597 self.assertCountEqual( 

1598 list( 

1599 subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=True) 

1600 ), 

1601 expectedDeduplicatedBiases, 

1602 ) 

1603 # Materialize just the bias dataset queries. 

1604 with subsetDataIds.findDatasets( 

1605 bias, collections=["imported_r", "imported_g"], findFirst=False 

1606 ).materialize() as biases: 

1607 self.assertCountEqual(list(biases), expectedAllBiases) 

1608 with subsetDataIds.findDatasets( 

1609 bias, collections=["imported_r", "imported_g"], findFirst=True 

1610 ).materialize() as biases: 

1611 self.assertCountEqual(list(biases), expectedDeduplicatedBiases) 

1612 # Materialize the subset data ID query, but not the dataset 

1613 # queries. 

1614 with subsetDataIds.materialize() as subsetDataIds: 

1615 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph) 

1616 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds) 

1617 self.assertCountEqual( 

1618 list( 

1619 subsetDataIds.findDatasets( 

1620 bias, collections=["imported_r", "imported_g"], findFirst=False 

1621 ) 

1622 ), 

1623 expectedAllBiases, 

1624 ) 

1625 self.assertCountEqual( 

1626 list( 

1627 subsetDataIds.findDatasets( 

1628 bias, collections=["imported_r", "imported_g"], findFirst=True 

1629 ) 

1630 ), 

1631 expectedDeduplicatedBiases, 

1632 ) 

1633 # Materialize the bias dataset queries, too, so now we're 

1634 # materializing every single step. 

1635 with subsetDataIds.findDatasets( 

1636 bias, collections=["imported_r", "imported_g"], findFirst=False 

1637 ).materialize() as biases: 

1638 self.assertCountEqual(list(biases), expectedAllBiases) 

1639 with subsetDataIds.findDatasets( 

1640 bias, collections=["imported_r", "imported_g"], findFirst=True 

1641 ).materialize() as biases: 

1642 self.assertCountEqual(list(biases), expectedDeduplicatedBiases) 

1643 

1644 def testStorageClassPropagation(self): 

1645 """Test that queries for datasets respect the storage class passed in 

1646 as part of a full dataset type. 

1647 """ 

1648 registry = self.makeRegistry() 

1649 self.loadData(registry, "base.yaml") 

1650 dataset_type_in_registry = DatasetType( 

1651 "tbl", dimensions=["instrument"], storageClass="Packages", universe=registry.dimensions 

1652 ) 

1653 registry.registerDatasetType(dataset_type_in_registry) 

1654 run = "run1" 

1655 registry.registerRun(run) 

1656 (inserted_ref,) = registry.insertDatasets( 

1657 dataset_type_in_registry, [registry.expandDataId(instrument="Cam1")], run=run 

1658 ) 

1659 self.assertEqual(inserted_ref.datasetType, dataset_type_in_registry) 

1660 query_dataset_type = DatasetType( 

1661 "tbl", dimensions=["instrument"], storageClass="StructuredDataDict", universe=registry.dimensions 

1662 ) 

1663 self.assertNotEqual(dataset_type_in_registry, query_dataset_type) 

1664 query_datasets_result = registry.queryDatasets(query_dataset_type, collections=[run]) 

1665 self.assertEqual(query_datasets_result.parentDatasetType, query_dataset_type) # type: ignore 

1666 (query_datasets_ref,) = query_datasets_result 

1667 self.assertEqual(query_datasets_ref.datasetType, query_dataset_type) 

1668 query_data_ids_find_datasets_result = registry.queryDataIds(["instrument"]).findDatasets( 

1669 query_dataset_type, collections=[run] 

1670 ) 

1671 self.assertEqual(query_data_ids_find_datasets_result.parentDatasetType, query_dataset_type) 

1672 (query_data_ids_find_datasets_ref,) = query_data_ids_find_datasets_result 

1673 self.assertEqual(query_data_ids_find_datasets_ref.datasetType, query_dataset_type) 

1674 query_dataset_types_result = registry.queryDatasetTypes(query_dataset_type) 

1675 self.assertEqual(list(query_dataset_types_result), [query_dataset_type]) 

1676 find_dataset_ref = registry.findDataset(query_dataset_type, instrument="Cam1", collections=[run]) 

1677 self.assertEqual(find_dataset_ref.datasetType, query_dataset_type) 

1678 

1679 def testEmptyDimensionsQueries(self): 

1680 """Test Query and QueryResults objects in the case where there are no 

1681 dimensions. 

1682 """ 

1683 # Set up test data: one dataset type, two runs, one dataset in each. 

1684 registry = self.makeRegistry() 

1685 self.loadData(registry, "base.yaml") 

1686 schema = DatasetType("schema", dimensions=registry.dimensions.empty, storageClass="Catalog") 

1687 registry.registerDatasetType(schema) 

1688 dataId = DataCoordinate.makeEmpty(registry.dimensions) 

1689 run1 = "run1" 

1690 run2 = "run2" 

1691 registry.registerRun(run1) 

1692 registry.registerRun(run2) 

1693 (dataset1,) = registry.insertDatasets(schema, dataIds=[dataId], run=run1) 

1694 (dataset2,) = registry.insertDatasets(schema, dataIds=[dataId], run=run2) 

1695 # Query directly for both of the datasets, and each one, one at a time. 

1696 self.checkQueryResults( 

1697 registry.queryDatasets(schema, collections=[run1, run2], findFirst=False), [dataset1, dataset2] 

1698 ) 

1699 self.checkQueryResults( 

1700 registry.queryDatasets(schema, collections=[run1, run2], findFirst=True), 

1701 [dataset1], 

1702 ) 

1703 self.checkQueryResults( 

1704 registry.queryDatasets(schema, collections=[run2, run1], findFirst=True), 

1705 [dataset2], 

1706 ) 

1707 # Query for data IDs with no dimensions. 

1708 dataIds = registry.queryDataIds([]) 

1709 self.checkQueryResults(dataIds, [dataId]) 

1710 # Use queried data IDs to find the datasets. 

1711 self.checkQueryResults( 

1712 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False), 

1713 [dataset1, dataset2], 

1714 ) 

1715 self.checkQueryResults( 

1716 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), 

1717 [dataset1], 

1718 ) 

1719 self.checkQueryResults( 

1720 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), 

1721 [dataset2], 

1722 ) 

1723 # Now materialize the data ID query results and repeat those tests. 

1724 with dataIds.materialize() as dataIds: 

1725 self.checkQueryResults(dataIds, [dataId]) 

1726 self.checkQueryResults( 

1727 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), 

1728 [dataset1], 

1729 ) 

1730 self.checkQueryResults( 

1731 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), 

1732 [dataset2], 

1733 ) 

1734 # Query for non-empty data IDs, then subset that to get the empty one. 

1735 # Repeat the above tests starting from that. 

1736 dataIds = registry.queryDataIds(["instrument"]).subset(registry.dimensions.empty, unique=True) 

1737 self.checkQueryResults(dataIds, [dataId]) 

1738 self.checkQueryResults( 

1739 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False), 

1740 [dataset1, dataset2], 

1741 ) 

1742 self.checkQueryResults( 

1743 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), 

1744 [dataset1], 

1745 ) 

1746 self.checkQueryResults( 

1747 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), 

1748 [dataset2], 

1749 ) 

1750 with dataIds.materialize() as dataIds: 

1751 self.checkQueryResults(dataIds, [dataId]) 

1752 self.checkQueryResults( 

1753 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False), 

1754 [dataset1, dataset2], 

1755 ) 

1756 self.checkQueryResults( 

1757 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), 

1758 [dataset1], 

1759 ) 

1760 self.checkQueryResults( 

1761 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), 

1762 [dataset2], 

1763 ) 

1764 # Query for non-empty data IDs, then materialize, then subset to get 

1765 # the empty one. Repeat again. 

1766 with registry.queryDataIds(["instrument"]).materialize() as nonEmptyDataIds: 

1767 dataIds = nonEmptyDataIds.subset(registry.dimensions.empty, unique=True) 

1768 self.checkQueryResults(dataIds, [dataId]) 

1769 self.checkQueryResults( 

1770 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False), 

1771 [dataset1, dataset2], 

1772 ) 

1773 self.checkQueryResults( 

1774 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), 

1775 [dataset1], 

1776 ) 

1777 self.checkQueryResults( 

1778 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), 

1779 [dataset2], 

1780 ) 

1781 with dataIds.materialize() as dataIds: 

1782 self.checkQueryResults(dataIds, [dataId]) 

1783 self.checkQueryResults( 

1784 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False), 

1785 [dataset1, dataset2], 

1786 ) 

1787 self.checkQueryResults( 

1788 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), 

1789 [dataset1], 

1790 ) 

1791 self.checkQueryResults( 

1792 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), 

1793 [dataset2], 

1794 ) 

1795 # Query for non-empty data IDs with a constraint on an empty-data-ID 

1796 # dataset that exists. 

1797 dataIds = registry.queryDataIds(["instrument"], datasets="schema", collections=...) 

1798 self.checkQueryResults( 

1799 dataIds.subset(unique=True), 

1800 [DataCoordinate.standardize(instrument="Cam1", universe=registry.dimensions)], 

1801 ) 

1802 # Again query for non-empty data IDs with a constraint on empty-data-ID 

1803 # datasets, but when the datasets don't exist. We delete the existing 

1804 # dataset and query just that collection rather than creating a new 

1805 # empty collection because this is a bit less likely for our build-time 

1806 # logic to shortcut-out (via the collection summaries), and such a 

1807 # shortcut would make this test a bit more trivial than we'd like. 

1808 registry.removeDatasets([dataset2]) 

1809 dataIds = registry.queryDataIds(["instrument"], datasets="schema", collections=run2) 

1810 self.checkQueryResults(dataIds, []) 

1811 

1812 def testDimensionDataModifications(self): 

1813 """Test that modifying dimension records via: 

1814 syncDimensionData(..., update=True) and 

1815 insertDimensionData(..., replace=True) works as expected, even in the 

1816 presence of datasets using those dimensions and spatial overlap 

1817 relationships. 

1818 """ 

1819 

1820 def unpack_range_set(ranges: lsst.sphgeom.RangeSet) -> Iterator[int]: 

1821 """Unpack a sphgeom.RangeSet into the integers it contains.""" 

1822 for begin, end in ranges: 

1823 yield from range(begin, end) 

1824 

1825 def range_set_hull( 

1826 ranges: lsst.sphgeom.RangeSet, 

1827 pixelization: lsst.sphgeom.HtmPixelization, 

1828 ) -> lsst.sphgeom.ConvexPolygon: 

1829 """Create a ConvexPolygon hull of the region defined by a set of 

1830 HTM pixelization index ranges. 

1831 """ 

1832 points = [] 

1833 for index in unpack_range_set(ranges): 

1834 points.extend(pixelization.triangle(index).getVertices()) 

1835 return lsst.sphgeom.ConvexPolygon(points) 

1836 

1837 # Use HTM to set up an initial parent region (one arbitrary trixel) 

1838 # and four child regions (the trixels within the parent at the next 

1839 # level. We'll use the parent as a tract/visit region and the children 

1840 # as its patch/visit_detector regions. 

1841 registry = self.makeRegistry() 

1842 htm6 = registry.dimensions.skypix["htm"][6].pixelization 

1843 commonSkyPix = registry.dimensions.commonSkyPix.pixelization 

1844 index = 12288 

1845 child_ranges_small = lsst.sphgeom.RangeSet(index).scaled(4) 

1846 assert htm6.universe().contains(child_ranges_small) 

1847 child_regions_small = [htm6.triangle(i) for i in unpack_range_set(child_ranges_small)] 

1848 parent_region_small = lsst.sphgeom.ConvexPolygon( 

1849 list(itertools.chain.from_iterable(c.getVertices() for c in child_regions_small)) 

1850 ) 

1851 assert all(parent_region_small.contains(c) for c in child_regions_small) 

1852 # Make a larger version of each child region, defined to be the set of 

1853 # htm6 trixels that overlap the original's bounding circle. Make a new 

1854 # parent that's the convex hull of the new children. 

1855 child_regions_large = [ 

1856 range_set_hull(htm6.envelope(c.getBoundingCircle()), htm6) for c in child_regions_small 

1857 ] 

1858 assert all( 

1859 large.contains(small) 

1860 for large, small in zip(child_regions_large, child_regions_small, strict=True) 

1861 ) 

1862 parent_region_large = lsst.sphgeom.ConvexPolygon( 

1863 list(itertools.chain.from_iterable(c.getVertices() for c in child_regions_large)) 

1864 ) 

1865 assert all(parent_region_large.contains(c) for c in child_regions_large) 

1866 assert parent_region_large.contains(parent_region_small) 

1867 assert not parent_region_small.contains(parent_region_large) 

1868 assert not all(parent_region_small.contains(c) for c in child_regions_large) 

1869 # Find some commonSkyPix indices that overlap the large regions but not 

1870 # overlap the small regions. We use commonSkyPix here to make sure the 

1871 # real tests later involve what's in the database, not just post-query 

1872 # filtering of regions. 

1873 child_difference_indices = [] 

1874 for large, small in zip(child_regions_large, child_regions_small, strict=True): 

1875 difference = list(unpack_range_set(commonSkyPix.envelope(large) - commonSkyPix.envelope(small))) 

1876 assert difference, "if this is empty, we can't test anything useful with these regions" 

1877 assert all( 

1878 not commonSkyPix.triangle(d).isDisjointFrom(large) 

1879 and commonSkyPix.triangle(d).isDisjointFrom(small) 

1880 for d in difference 

1881 ) 

1882 child_difference_indices.append(difference) 

1883 parent_difference_indices = list( 

1884 unpack_range_set( 

1885 commonSkyPix.envelope(parent_region_large) - commonSkyPix.envelope(parent_region_small) 

1886 ) 

1887 ) 

1888 assert parent_difference_indices, "if this is empty, we can't test anything useful with these regions" 

1889 assert all( 

1890 ( 

1891 not commonSkyPix.triangle(d).isDisjointFrom(parent_region_large) 

1892 and commonSkyPix.triangle(d).isDisjointFrom(parent_region_small) 

1893 ) 

1894 for d in parent_difference_indices 

1895 ) 

1896 # Now that we've finally got those regions, we'll insert the large ones 

1897 # as tract/patch dimension records. 

1898 skymap_name = "testing_v1" 

1899 registry.insertDimensionData( 

1900 "skymap", 

1901 { 

1902 "name": skymap_name, 

1903 "hash": bytes([42]), 

1904 "tract_max": 1, 

1905 "patch_nx_max": 2, 

1906 "patch_ny_max": 2, 

1907 }, 

1908 ) 

1909 registry.insertDimensionData("tract", {"skymap": skymap_name, "id": 0, "region": parent_region_large}) 

1910 registry.insertDimensionData( 

1911 "patch", 

1912 *[ 

1913 {"skymap": skymap_name, "tract": 0, "id": n, "cell_x": n % 2, "cell_y": n // 2, "region": c} 

1914 for n, c in enumerate(child_regions_large) 

1915 ], 

1916 ) 

1917 # Add at dataset that uses these dimensions to make sure that modifying 

1918 # them doesn't disrupt foreign keys (need to make sure DB doesn't 

1919 # implement insert with replace=True as delete-then-insert). 

1920 dataset_type = DatasetType( 

1921 "coadd", 

1922 dimensions=["tract", "patch"], 

1923 universe=registry.dimensions, 

1924 storageClass="Exposure", 

1925 ) 

1926 registry.registerDatasetType(dataset_type) 

1927 registry.registerCollection("the_run", CollectionType.RUN) 

1928 registry.insertDatasets( 

1929 dataset_type, 

1930 [{"skymap": skymap_name, "tract": 0, "patch": 2}], 

1931 run="the_run", 

1932 ) 

1933 # Query for tracts and patches that overlap some "difference" htm9 

1934 # pixels; there should be overlaps, because the database has 

1935 # the "large" suite of regions. 

1936 self.assertEqual( 

1937 {0}, 

1938 { 

1939 data_id["tract"] 

1940 for data_id in registry.queryDataIds( 

1941 ["tract"], 

1942 skymap=skymap_name, 

1943 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]}, 

1944 ) 

1945 }, 

1946 ) 

1947 for patch_id, patch_difference_indices in enumerate(child_difference_indices): 

1948 self.assertIn( 

1949 patch_id, 

1950 { 

1951 data_id["patch"] 

1952 for data_id in registry.queryDataIds( 

1953 ["patch"], 

1954 skymap=skymap_name, 

1955 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]}, 

1956 ) 

1957 }, 

1958 ) 

1959 # Use sync to update the tract region and insert to update the regions 

1960 # of the patches, to the "small" suite. 

1961 updated = registry.syncDimensionData( 

1962 "tract", 

1963 {"skymap": skymap_name, "id": 0, "region": parent_region_small}, 

1964 update=True, 

1965 ) 

1966 self.assertEqual(updated, {"region": parent_region_large}) 

1967 registry.insertDimensionData( 

1968 "patch", 

1969 *[ 

1970 {"skymap": skymap_name, "tract": 0, "id": n, "cell_x": n % 2, "cell_y": n // 2, "region": c} 

1971 for n, c in enumerate(child_regions_small) 

1972 ], 

1973 replace=True, 

1974 ) 

1975 # Query again; there now should be no such overlaps, because the 

1976 # database has the "small" suite of regions. 

1977 self.assertFalse( 

1978 set( 

1979 registry.queryDataIds( 

1980 ["tract"], 

1981 skymap=skymap_name, 

1982 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]}, 

1983 ) 

1984 ) 

1985 ) 

1986 for patch_id, patch_difference_indices in enumerate(child_difference_indices): 

1987 self.assertNotIn( 

1988 patch_id, 

1989 { 

1990 data_id["patch"] 

1991 for data_id in registry.queryDataIds( 

1992 ["patch"], 

1993 skymap=skymap_name, 

1994 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]}, 

1995 ) 

1996 }, 

1997 ) 

1998 # Update back to the large regions and query one more time. 

1999 updated = registry.syncDimensionData( 

2000 "tract", 

2001 {"skymap": skymap_name, "id": 0, "region": parent_region_large}, 

2002 update=True, 

2003 ) 

2004 self.assertEqual(updated, {"region": parent_region_small}) 

2005 registry.insertDimensionData( 

2006 "patch", 

2007 *[ 

2008 {"skymap": skymap_name, "tract": 0, "id": n, "cell_x": n % 2, "cell_y": n // 2, "region": c} 

2009 for n, c in enumerate(child_regions_large) 

2010 ], 

2011 replace=True, 

2012 ) 

2013 self.assertEqual( 

2014 {0}, 

2015 { 

2016 data_id["tract"] 

2017 for data_id in registry.queryDataIds( 

2018 ["tract"], 

2019 skymap=skymap_name, 

2020 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]}, 

2021 ) 

2022 }, 

2023 ) 

2024 for patch_id, patch_difference_indices in enumerate(child_difference_indices): 

2025 self.assertIn( 

2026 patch_id, 

2027 { 

2028 data_id["patch"] 

2029 for data_id in registry.queryDataIds( 

2030 ["patch"], 

2031 skymap=skymap_name, 

2032 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]}, 

2033 ) 

2034 }, 

2035 ) 

2036 

2037 def testCalibrationCollections(self): 

2038 """Test operations on `~CollectionType.CALIBRATION` collections, 

2039 including `Registry.certify`, `Registry.decertify`, 

2040 `Registry.findDataset`, and 

2041 `DataCoordinateQueryResults.findRelatedDatasets`. 

2042 """ 

2043 # Setup - make a Registry, fill it with some datasets in 

2044 # non-calibration collections. 

2045 registry = self.makeRegistry() 

2046 self.loadData(registry, "base.yaml") 

2047 self.loadData(registry, "datasets.yaml") 

2048 # Set up some timestamps. 

2049 t1 = astropy.time.Time("2020-01-01T01:00:00", format="isot", scale="tai") 

2050 t2 = astropy.time.Time("2020-01-01T02:00:00", format="isot", scale="tai") 

2051 t3 = astropy.time.Time("2020-01-01T03:00:00", format="isot", scale="tai") 

2052 t4 = astropy.time.Time("2020-01-01T04:00:00", format="isot", scale="tai") 

2053 t5 = astropy.time.Time("2020-01-01T05:00:00", format="isot", scale="tai") 

2054 allTimespans = [ 

2055 Timespan(a, b) for a, b in itertools.combinations([None, t1, t2, t3, t4, t5, None], r=2) 

2056 ] 

2057 # Insert some exposure records with timespans between each sequential 

2058 # pair of those. 

2059 registry.insertDimensionData( 

2060 "exposure", 

2061 { 

2062 "instrument": "Cam1", 

2063 "id": 0, 

2064 "obs_id": "zero", 

2065 "physical_filter": "Cam1-G", 

2066 "timespan": Timespan(t1, t2), 

2067 }, 

2068 { 

2069 "instrument": "Cam1", 

2070 "id": 1, 

2071 "obs_id": "one", 

2072 "physical_filter": "Cam1-G", 

2073 "timespan": Timespan(t2, t3), 

2074 }, 

2075 { 

2076 "instrument": "Cam1", 

2077 "id": 2, 

2078 "obs_id": "two", 

2079 "physical_filter": "Cam1-G", 

2080 "timespan": Timespan(t3, t4), 

2081 }, 

2082 { 

2083 "instrument": "Cam1", 

2084 "id": 3, 

2085 "obs_id": "three", 

2086 "physical_filter": "Cam1-G", 

2087 "timespan": Timespan(t4, t5), 

2088 }, 

2089 ) 

2090 # Get references to some datasets. 

2091 bias2a = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g") 

2092 bias3a = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g") 

2093 bias2b = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r") 

2094 bias3b = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r") 

2095 # Register the main calibration collection we'll be working with. 

2096 collection = "Cam1/calibs/default" 

2097 registry.registerCollection(collection, type=CollectionType.CALIBRATION) 

2098 # Cannot associate into a calibration collection (no timespan). 

2099 with self.assertRaises(CollectionTypeError): 

2100 registry.associate(collection, [bias2a]) 

2101 # Certify 2a dataset with [t2, t4) validity. 

2102 registry.certify(collection, [bias2a], Timespan(begin=t2, end=t4)) 

2103 # Test that we can query for this dataset via the new collection, both 

2104 # on its own and with a RUN collection. 

2105 self.assertEqual( 

2106 set(registry.queryDatasets("bias", findFirst=False, collections=collection)), 

2107 {bias2a}, 

2108 ) 

2109 self.assertEqual( 

2110 set(registry.queryDatasets("bias", findFirst=False, collections=[collection, "imported_r"])), 

2111 { 

2112 bias2a, 

2113 bias2b, 

2114 bias3b, 

2115 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

2116 }, 

2117 ) 

2118 self.assertEqual( 

2119 set(registry.queryDataIds("detector", datasets="bias", collections=collection)), 

2120 {registry.expandDataId(instrument="Cam1", detector=2)}, 

2121 ) 

2122 self.assertEqual( 

2123 set(registry.queryDataIds("detector", datasets="bias", collections=[collection, "imported_r"])), 

2124 { 

2125 registry.expandDataId(instrument="Cam1", detector=2), 

2126 registry.expandDataId(instrument="Cam1", detector=3), 

2127 registry.expandDataId(instrument="Cam1", detector=4), 

2128 }, 

2129 ) 

2130 self.assertEqual( 

2131 set( 

2132 registry.queryDataIds(["exposure", "detector"]).findRelatedDatasets( 

2133 "bias", findFirst=True, collections=[collection] 

2134 ) 

2135 ), 

2136 { 

2137 (registry.expandDataId(instrument="Cam1", detector=2, exposure=1), bias2a), 

2138 (registry.expandDataId(instrument="Cam1", detector=2, exposure=2), bias2a), 

2139 }, 

2140 ) 

2141 self.assertEqual( 

2142 set( 

2143 registry.queryDataIds( 

2144 ["exposure", "detector"], instrument="Cam1", detector=2 

2145 ).findRelatedDatasets("bias", findFirst=True, collections=[collection, "imported_r"]) 

2146 ), 

2147 { 

2148 (registry.expandDataId(instrument="Cam1", detector=2, exposure=1), bias2a), 

2149 (registry.expandDataId(instrument="Cam1", detector=2, exposure=2), bias2a), 

2150 (registry.expandDataId(instrument="Cam1", detector=2, exposure=0), bias2b), 

2151 (registry.expandDataId(instrument="Cam1", detector=2, exposure=3), bias2b), 

2152 }, 

2153 ) 

2154 

2155 # We should not be able to certify 2b with anything overlapping that 

2156 # window. 

2157 with self.assertRaises(ConflictingDefinitionError): 

2158 registry.certify(collection, [bias2b], Timespan(begin=None, end=t3)) 

2159 with self.assertRaises(ConflictingDefinitionError): 

2160 registry.certify(collection, [bias2b], Timespan(begin=None, end=t5)) 

2161 with self.assertRaises(ConflictingDefinitionError): 

2162 registry.certify(collection, [bias2b], Timespan(begin=t1, end=t3)) 

2163 with self.assertRaises(ConflictingDefinitionError): 

2164 registry.certify(collection, [bias2b], Timespan(begin=t1, end=t5)) 

2165 with self.assertRaises(ConflictingDefinitionError): 

2166 registry.certify(collection, [bias2b], Timespan(begin=t1, end=None)) 

2167 with self.assertRaises(ConflictingDefinitionError): 

2168 registry.certify(collection, [bias2b], Timespan(begin=t2, end=t3)) 

2169 with self.assertRaises(ConflictingDefinitionError): 

2170 registry.certify(collection, [bias2b], Timespan(begin=t2, end=t5)) 

2171 with self.assertRaises(ConflictingDefinitionError): 

2172 registry.certify(collection, [bias2b], Timespan(begin=t2, end=None)) 

2173 # We should be able to certify 3a with a range overlapping that window, 

2174 # because it's for a different detector. 

2175 # We'll certify 3a over [t1, t3). 

2176 registry.certify(collection, [bias3a], Timespan(begin=t1, end=t3)) 

2177 # Now we'll certify 2b and 3b together over [t4, ∞). 

2178 registry.certify(collection, [bias2b, bias3b], Timespan(begin=t4, end=None)) 

2179 

2180 # Fetch all associations and check that they are what we expect. 

2181 self.assertCountEqual( 

2182 list( 

2183 registry.queryDatasetAssociations( 

2184 "bias", 

2185 collections=[collection, "imported_g", "imported_r"], 

2186 ) 

2187 ), 

2188 [ 

2189 DatasetAssociation( 

2190 ref=registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"), 

2191 collection="imported_g", 

2192 timespan=None, 

2193 ), 

2194 DatasetAssociation( 

2195 ref=registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

2196 collection="imported_r", 

2197 timespan=None, 

2198 ), 

2199 DatasetAssociation(ref=bias2a, collection="imported_g", timespan=None), 

2200 DatasetAssociation(ref=bias3a, collection="imported_g", timespan=None), 

2201 DatasetAssociation(ref=bias2b, collection="imported_r", timespan=None), 

2202 DatasetAssociation(ref=bias3b, collection="imported_r", timespan=None), 

2203 DatasetAssociation(ref=bias2a, collection=collection, timespan=Timespan(begin=t2, end=t4)), 

2204 DatasetAssociation(ref=bias3a, collection=collection, timespan=Timespan(begin=t1, end=t3)), 

2205 DatasetAssociation(ref=bias2b, collection=collection, timespan=Timespan(begin=t4, end=None)), 

2206 DatasetAssociation(ref=bias3b, collection=collection, timespan=Timespan(begin=t4, end=None)), 

2207 ], 

2208 ) 

2209 

2210 class Ambiguous: 

2211 """Tag class to denote lookups that should be ambiguous.""" 

2212 

2213 pass 

2214 

2215 def assertLookup( 

2216 detector: int, timespan: Timespan, expected: DatasetRef | type[Ambiguous] | None 

2217 ) -> None: 

2218 """Local function that asserts that a bias lookup returns the given 

2219 expected result. 

2220 """ 

2221 if expected is Ambiguous: 

2222 with self.assertRaises((DatasetTypeError, LookupError)): 

2223 registry.findDataset( 

2224 "bias", 

2225 collections=collection, 

2226 instrument="Cam1", 

2227 detector=detector, 

2228 timespan=timespan, 

2229 ) 

2230 else: 

2231 self.assertEqual( 

2232 expected, 

2233 registry.findDataset( 

2234 "bias", 

2235 collections=collection, 

2236 instrument="Cam1", 

2237 detector=detector, 

2238 timespan=timespan, 

2239 ), 

2240 ) 

2241 

2242 # Systematically test lookups against expected results. 

2243 assertLookup(detector=2, timespan=Timespan(None, t1), expected=None) 

2244 assertLookup(detector=2, timespan=Timespan(None, t2), expected=None) 

2245 assertLookup(detector=2, timespan=Timespan(None, t3), expected=bias2a) 

2246 assertLookup(detector=2, timespan=Timespan(None, t4), expected=bias2a) 

2247 assertLookup(detector=2, timespan=Timespan(None, t5), expected=Ambiguous) 

2248 assertLookup(detector=2, timespan=Timespan(None, None), expected=Ambiguous) 

2249 assertLookup(detector=2, timespan=Timespan(t1, t2), expected=None) 

2250 assertLookup(detector=2, timespan=Timespan(t1, t3), expected=bias2a) 

2251 assertLookup(detector=2, timespan=Timespan(t1, t4), expected=bias2a) 

2252 assertLookup(detector=2, timespan=Timespan(t1, t5), expected=Ambiguous) 

2253 assertLookup(detector=2, timespan=Timespan(t1, None), expected=Ambiguous) 

2254 assertLookup(detector=2, timespan=Timespan(t2, t3), expected=bias2a) 

2255 assertLookup(detector=2, timespan=Timespan(t2, t4), expected=bias2a) 

2256 assertLookup(detector=2, timespan=Timespan(t2, t5), expected=Ambiguous) 

2257 assertLookup(detector=2, timespan=Timespan(t2, None), expected=Ambiguous) 

2258 assertLookup(detector=2, timespan=Timespan(t3, t4), expected=bias2a) 

2259 assertLookup(detector=2, timespan=Timespan(t3, t5), expected=Ambiguous) 

2260 assertLookup(detector=2, timespan=Timespan(t3, None), expected=Ambiguous) 

2261 assertLookup(detector=2, timespan=Timespan(t4, t5), expected=bias2b) 

2262 assertLookup(detector=2, timespan=Timespan(t4, None), expected=bias2b) 

2263 assertLookup(detector=2, timespan=Timespan(t5, None), expected=bias2b) 

2264 assertLookup(detector=3, timespan=Timespan(None, t1), expected=None) 

2265 assertLookup(detector=3, timespan=Timespan(None, t2), expected=bias3a) 

2266 assertLookup(detector=3, timespan=Timespan(None, t3), expected=bias3a) 

2267 assertLookup(detector=3, timespan=Timespan(None, t4), expected=bias3a) 

2268 assertLookup(detector=3, timespan=Timespan(None, t5), expected=Ambiguous) 

2269 assertLookup(detector=3, timespan=Timespan(None, None), expected=Ambiguous) 

2270 assertLookup(detector=3, timespan=Timespan(t1, t2), expected=bias3a) 

2271 assertLookup(detector=3, timespan=Timespan(t1, t3), expected=bias3a) 

2272 assertLookup(detector=3, timespan=Timespan(t1, t4), expected=bias3a) 

2273 assertLookup(detector=3, timespan=Timespan(t1, t5), expected=Ambiguous) 

2274 assertLookup(detector=3, timespan=Timespan(t1, None), expected=Ambiguous) 

2275 assertLookup(detector=3, timespan=Timespan(t2, t3), expected=bias3a) 

2276 assertLookup(detector=3, timespan=Timespan(t2, t4), expected=bias3a) 

2277 assertLookup(detector=3, timespan=Timespan(t2, t5), expected=Ambiguous) 

2278 assertLookup(detector=3, timespan=Timespan(t2, None), expected=Ambiguous) 

2279 assertLookup(detector=3, timespan=Timespan(t3, t4), expected=None) 

2280 assertLookup(detector=3, timespan=Timespan(t3, t5), expected=bias3b) 

2281 assertLookup(detector=3, timespan=Timespan(t3, None), expected=bias3b) 

2282 assertLookup(detector=3, timespan=Timespan(t4, t5), expected=bias3b) 

2283 assertLookup(detector=3, timespan=Timespan(t4, None), expected=bias3b) 

2284 assertLookup(detector=3, timespan=Timespan(t5, None), expected=bias3b) 

2285 

2286 # Test lookups via temporal joins to exposures. 

2287 self.assertEqual( 

2288 set( 

2289 registry.queryDataIds( 

2290 ["exposure", "detector"], instrument="Cam1", detector=2 

2291 ).findRelatedDatasets("bias", collections=[collection]) 

2292 ), 

2293 { 

2294 (registry.expandDataId(instrument="Cam1", exposure=1, detector=2), bias2a), 

2295 (registry.expandDataId(instrument="Cam1", exposure=2, detector=2), bias2a), 

2296 (registry.expandDataId(instrument="Cam1", exposure=3, detector=2), bias2b), 

2297 }, 

2298 ) 

2299 self.assertEqual( 

2300 set( 

2301 registry.queryDataIds( 

2302 ["exposure", "detector"], instrument="Cam1", detector=3 

2303 ).findRelatedDatasets("bias", collections=[collection]) 

2304 ), 

2305 { 

2306 (registry.expandDataId(instrument="Cam1", exposure=0, detector=3), bias3a), 

2307 (registry.expandDataId(instrument="Cam1", exposure=1, detector=3), bias3a), 

2308 (registry.expandDataId(instrument="Cam1", exposure=3, detector=3), bias3b), 

2309 }, 

2310 ) 

2311 self.assertEqual( 

2312 set( 

2313 registry.queryDataIds( 

2314 ["exposure", "detector"], instrument="Cam1", detector=2 

2315 ).findRelatedDatasets("bias", collections=[collection, "imported_g"]) 

2316 ), 

2317 { 

2318 (registry.expandDataId(instrument="Cam1", exposure=0, detector=2), bias2a), 

2319 (registry.expandDataId(instrument="Cam1", exposure=1, detector=2), bias2a), 

2320 (registry.expandDataId(instrument="Cam1", exposure=2, detector=2), bias2a), 

2321 (registry.expandDataId(instrument="Cam1", exposure=3, detector=2), bias2b), 

2322 }, 

2323 ) 

2324 self.assertEqual( 

2325 set( 

2326 registry.queryDataIds( 

2327 ["exposure", "detector"], instrument="Cam1", detector=3 

2328 ).findRelatedDatasets("bias", collections=[collection, "imported_g"]) 

2329 ), 

2330 { 

2331 (registry.expandDataId(instrument="Cam1", exposure=0, detector=3), bias3a), 

2332 (registry.expandDataId(instrument="Cam1", exposure=1, detector=3), bias3a), 

2333 (registry.expandDataId(instrument="Cam1", exposure=2, detector=3), bias3a), 

2334 (registry.expandDataId(instrument="Cam1", exposure=3, detector=3), bias3b), 

2335 }, 

2336 ) 

2337 

2338 # Decertify [t3, t5) for all data IDs, and do test lookups again. 

2339 # This should truncate bias2a to [t2, t3), leave bias3a unchanged at 

2340 # [t1, t3), and truncate bias2b and bias3b to [t5, ∞). 

2341 registry.decertify(collection=collection, datasetType="bias", timespan=Timespan(t3, t5)) 

2342 assertLookup(detector=2, timespan=Timespan(None, t1), expected=None) 

2343 assertLookup(detector=2, timespan=Timespan(None, t2), expected=None) 

2344 assertLookup(detector=2, timespan=Timespan(None, t3), expected=bias2a) 

2345 assertLookup(detector=2, timespan=Timespan(None, t4), expected=bias2a) 

2346 assertLookup(detector=2, timespan=Timespan(None, t5), expected=bias2a) 

2347 assertLookup(detector=2, timespan=Timespan(None, None), expected=Ambiguous) 

2348 assertLookup(detector=2, timespan=Timespan(t1, t2), expected=None) 

2349 assertLookup(detector=2, timespan=Timespan(t1, t3), expected=bias2a) 

2350 assertLookup(detector=2, timespan=Timespan(t1, t4), expected=bias2a) 

2351 assertLookup(detector=2, timespan=Timespan(t1, t5), expected=bias2a) 

2352 assertLookup(detector=2, timespan=Timespan(t1, None), expected=Ambiguous) 

2353 assertLookup(detector=2, timespan=Timespan(t2, t3), expected=bias2a) 

2354 assertLookup(detector=2, timespan=Timespan(t2, t4), expected=bias2a) 

2355 assertLookup(detector=2, timespan=Timespan(t2, t5), expected=bias2a) 

2356 assertLookup(detector=2, timespan=Timespan(t2, None), expected=Ambiguous) 

2357 assertLookup(detector=2, timespan=Timespan(t3, t4), expected=None) 

2358 assertLookup(detector=2, timespan=Timespan(t3, t5), expected=None) 

2359 assertLookup(detector=2, timespan=Timespan(t3, None), expected=bias2b) 

2360 assertLookup(detector=2, timespan=Timespan(t4, t5), expected=None) 

2361 assertLookup(detector=2, timespan=Timespan(t4, None), expected=bias2b) 

2362 assertLookup(detector=2, timespan=Timespan(t5, None), expected=bias2b) 

2363 assertLookup(detector=3, timespan=Timespan(None, t1), expected=None) 

2364 assertLookup(detector=3, timespan=Timespan(None, t2), expected=bias3a) 

2365 assertLookup(detector=3, timespan=Timespan(None, t3), expected=bias3a) 

2366 assertLookup(detector=3, timespan=Timespan(None, t4), expected=bias3a) 

2367 assertLookup(detector=3, timespan=Timespan(None, t5), expected=bias3a) 

2368 assertLookup(detector=3, timespan=Timespan(None, None), expected=Ambiguous) 

2369 assertLookup(detector=3, timespan=Timespan(t1, t2), expected=bias3a) 

2370 assertLookup(detector=3, timespan=Timespan(t1, t3), expected=bias3a) 

2371 assertLookup(detector=3, timespan=Timespan(t1, t4), expected=bias3a) 

2372 assertLookup(detector=3, timespan=Timespan(t1, t5), expected=bias3a) 

2373 assertLookup(detector=3, timespan=Timespan(t1, None), expected=Ambiguous) 

2374 assertLookup(detector=3, timespan=Timespan(t2, t3), expected=bias3a) 

2375 assertLookup(detector=3, timespan=Timespan(t2, t4), expected=bias3a) 

2376 assertLookup(detector=3, timespan=Timespan(t2, t5), expected=bias3a) 

2377 assertLookup(detector=3, timespan=Timespan(t2, None), expected=Ambiguous) 

2378 assertLookup(detector=3, timespan=Timespan(t3, t4), expected=None) 

2379 assertLookup(detector=3, timespan=Timespan(t3, t5), expected=None) 

2380 assertLookup(detector=3, timespan=Timespan(t3, None), expected=bias3b) 

2381 assertLookup(detector=3, timespan=Timespan(t4, t5), expected=None) 

2382 assertLookup(detector=3, timespan=Timespan(t4, None), expected=bias3b) 

2383 assertLookup(detector=3, timespan=Timespan(t5, None), expected=bias3b) 

2384 

2385 # Decertify everything, this time with explicit data IDs, then check 

2386 # that no lookups succeed. 

2387 registry.decertify( 

2388 collection, 

2389 "bias", 

2390 Timespan(None, None), 

2391 dataIds=[ 

2392 dict(instrument="Cam1", detector=2), 

2393 dict(instrument="Cam1", detector=3), 

2394 ], 

2395 ) 

2396 for detector in (2, 3): 

2397 for timespan in allTimespans: 

2398 assertLookup(detector=detector, timespan=timespan, expected=None) 

2399 # Certify bias2a and bias3a over (-∞, ∞), check that all lookups return 

2400 # those. 

2401 registry.certify( 

2402 collection, 

2403 [bias2a, bias3a], 

2404 Timespan(None, None), 

2405 ) 

2406 for timespan in allTimespans: 

2407 assertLookup(detector=2, timespan=timespan, expected=bias2a) 

2408 assertLookup(detector=3, timespan=timespan, expected=bias3a) 

2409 # Decertify just bias2 over [t2, t4). 

2410 # This should split a single certification row into two (and leave the 

2411 # other existing row, for bias3a, alone). 

2412 registry.decertify( 

2413 collection, "bias", Timespan(t2, t4), dataIds=[dict(instrument="Cam1", detector=2)] 

2414 ) 

2415 for timespan in allTimespans: 

2416 assertLookup(detector=3, timespan=timespan, expected=bias3a) 

2417 overlapsBefore = timespan.overlaps(Timespan(None, t2)) 

2418 overlapsAfter = timespan.overlaps(Timespan(t4, None)) 

2419 if overlapsBefore and overlapsAfter: 

2420 expected = Ambiguous 

2421 elif overlapsBefore or overlapsAfter: 

2422 expected = bias2a 

2423 else: 

2424 expected = None 

2425 assertLookup(detector=2, timespan=timespan, expected=expected) 

2426 

2427 def testSkipCalibs(self): 

2428 """Test how queries handle skipping of calibration collections.""" 

2429 registry = self.makeRegistry() 

2430 self.loadData(registry, "base.yaml") 

2431 self.loadData(registry, "datasets.yaml") 

2432 

2433 coll_calib = "Cam1/calibs/default" 

2434 registry.registerCollection(coll_calib, type=CollectionType.CALIBRATION) 

2435 

2436 # Add all biases to the calibration collection. 

2437 # Without this, the logic that prunes dataset subqueries based on 

2438 # datasetType-collection summary information will fire before the logic 

2439 # we want to test below. This is a good thing (it avoids the dreaded 

2440 # NotImplementedError a bit more often) everywhere but here. 

2441 registry.certify(coll_calib, registry.queryDatasets("bias", collections=...), Timespan(None, None)) 

2442 

2443 coll_list = [coll_calib, "imported_g", "imported_r"] 

2444 chain = "Cam1/chain" 

2445 registry.registerCollection(chain, type=CollectionType.CHAINED) 

2446 registry.setCollectionChain(chain, coll_list) 

2447 

2448 # explicit list will raise if findFirst=True or there are temporal 

2449 # dimensions 

2450 with self.assertRaises(NotImplementedError): 

2451 registry.queryDatasets("bias", collections=coll_list, findFirst=True) 

2452 with self.assertRaises(NotImplementedError): 

2453 registry.queryDataIds( 

2454 ["instrument", "detector", "exposure"], datasets="bias", collections=coll_list 

2455 ).count() 

2456 

2457 # chain will skip 

2458 datasets = list(registry.queryDatasets("bias", collections=chain)) 

2459 self.assertGreater(len(datasets), 0) 

2460 

2461 dataIds = list(registry.queryDataIds(["instrument", "detector"], datasets="bias", collections=chain)) 

2462 self.assertGreater(len(dataIds), 0) 

2463 

2464 # glob will skip too 

2465 datasets = list(registry.queryDatasets("bias", collections="*d*")) 

2466 self.assertGreater(len(datasets), 0) 

2467 

2468 # regular expression will skip too 

2469 pattern = re.compile(".*") 

2470 datasets = list(registry.queryDatasets("bias", collections=pattern)) 

2471 self.assertGreater(len(datasets), 0) 

2472 

2473 # ellipsis should work as usual 

2474 datasets = list(registry.queryDatasets("bias", collections=...)) 

2475 self.assertGreater(len(datasets), 0) 

2476 

2477 # few tests with findFirst 

2478 datasets = list(registry.queryDatasets("bias", collections=chain, findFirst=True)) 

2479 self.assertGreater(len(datasets), 0) 

2480 

2481 def testIngestTimeQuery(self): 

2482 registry = self.makeRegistry() 

2483 self.loadData(registry, "base.yaml") 

2484 dt0 = datetime.utcnow() 

2485 self.loadData(registry, "datasets.yaml") 

2486 dt1 = datetime.utcnow() 

2487 

2488 datasets = list(registry.queryDatasets(..., collections=...)) 

2489 len0 = len(datasets) 

2490 self.assertGreater(len0, 0) 

2491 

2492 where = "ingest_date > T'2000-01-01'" 

2493 datasets = list(registry.queryDatasets(..., collections=..., where=where)) 

2494 len1 = len(datasets) 

2495 self.assertEqual(len0, len1) 

2496 

2497 # no one will ever use this piece of software in 30 years 

2498 where = "ingest_date > T'2050-01-01'" 

2499 datasets = list(registry.queryDatasets(..., collections=..., where=where)) 

2500 len2 = len(datasets) 

2501 self.assertEqual(len2, 0) 

2502 

2503 # Check more exact timing to make sure there is no 37 seconds offset 

2504 # (after fixing DM-30124). SQLite time precision is 1 second, make 

2505 # sure that we don't test with higher precision. 

2506 tests = [ 

2507 # format: (timestamp, operator, expected_len) 

2508 (dt0 - timedelta(seconds=1), ">", len0), 

2509 (dt0 - timedelta(seconds=1), "<", 0), 

2510 (dt1 + timedelta(seconds=1), "<", len0), 

2511 (dt1 + timedelta(seconds=1), ">", 0), 

2512 ] 

2513 for dt, op, expect_len in tests: 

2514 dt_str = dt.isoformat(sep=" ") 

2515 

2516 where = f"ingest_date {op} T'{dt_str}'" 

2517 datasets = list(registry.queryDatasets(..., collections=..., where=where)) 

2518 self.assertEqual(len(datasets), expect_len) 

2519 

2520 # same with bind using datetime or astropy Time 

2521 where = f"ingest_date {op} ingest_time" 

2522 datasets = list( 

2523 registry.queryDatasets(..., collections=..., where=where, bind={"ingest_time": dt}) 

2524 ) 

2525 self.assertEqual(len(datasets), expect_len) 

2526 

2527 dt_astropy = astropy.time.Time(dt, format="datetime") 

2528 datasets = list( 

2529 registry.queryDatasets(..., collections=..., where=where, bind={"ingest_time": dt_astropy}) 

2530 ) 

2531 self.assertEqual(len(datasets), expect_len) 

2532 

2533 def testTimespanQueries(self): 

2534 """Test query expressions involving timespans.""" 

2535 registry = self.makeRegistry() 

2536 self.loadData(registry, "hsc-rc2-subset.yaml") 

2537 # All exposures in the database; mapping from ID to timespan. 

2538 visits = {record.id: record.timespan for record in registry.queryDimensionRecords("visit")} 

2539 # Just those IDs, sorted (which is also temporal sorting, because HSC 

2540 # exposure IDs are monotonically increasing). 

2541 ids = sorted(visits.keys()) 

2542 self.assertGreater(len(ids), 20) 

2543 # Pick some quasi-random indexes into `ids` to play with. 

2544 i1 = int(len(ids) * 0.1) 

2545 i2 = int(len(ids) * 0.3) 

2546 i3 = int(len(ids) * 0.6) 

2547 i4 = int(len(ids) * 0.8) 

2548 # Extract some times from those: just before the beginning of i1 (which 

2549 # should be after the end of the exposure before), exactly the 

2550 # beginning of i2, just after the beginning of i3 (and before its end), 

2551 # and the exact end of i4. 

2552 t1 = visits[ids[i1]].begin - astropy.time.TimeDelta(1.0, format="sec") 

2553 self.assertGreater(t1, visits[ids[i1 - 1]].end) 

2554 t2 = visits[ids[i2]].begin 

2555 t3 = visits[ids[i3]].begin + astropy.time.TimeDelta(1.0, format="sec") 

2556 self.assertLess(t3, visits[ids[i3]].end) 

2557 t4 = visits[ids[i4]].end 

2558 # Make sure those are actually in order. 

2559 self.assertEqual([t1, t2, t3, t4], sorted([t4, t3, t2, t1])) 

2560 

2561 bind = { 

2562 "t1": t1, 

2563 "t2": t2, 

2564 "t3": t3, 

2565 "t4": t4, 

2566 "ts23": Timespan(t2, t3), 

2567 } 

2568 

2569 def query(where): 

2570 """Return results as a sorted, deduplicated list of visit IDs.""" 

2571 return sorted( 

2572 { 

2573 dataId["visit"] 

2574 for dataId in registry.queryDataIds("visit", instrument="HSC", bind=bind, where=where) 

2575 } 

2576 ) 

2577 

2578 # Try a bunch of timespan queries, mixing up the bounds themselves, 

2579 # where they appear in the expression, and how we get the timespan into 

2580 # the expression. 

2581 

2582 # t1 is before the start of i1, so this should not include i1. 

2583 self.assertEqual(ids[:i1], query("visit.timespan OVERLAPS (null, t1)")) 

2584 # t2 is exactly at the start of i2, but ends are exclusive, so these 

2585 # should not include i2. 

2586 self.assertEqual(ids[i1:i2], query("(t1, t2) OVERLAPS visit.timespan")) 

2587 self.assertEqual(ids[:i2], query("visit.timespan < (t2, t4)")) 

2588 # t3 is in the middle of i3, so this should include i3. 

2589 self.assertEqual(ids[i2 : i3 + 1], query("visit.timespan OVERLAPS ts23")) 

2590 # This one should not include t3 by the same reasoning. 

2591 self.assertEqual(ids[i3 + 1 :], query("visit.timespan > (t1, t3)")) 

2592 # t4 is exactly at the end of i4, so this should include i4. 

2593 self.assertEqual(ids[i3 : i4 + 1], query(f"visit.timespan OVERLAPS (T'{t3.tai.isot}', t4)")) 

2594 # i4's upper bound of t4 is exclusive so this should not include t4. 

2595 self.assertEqual(ids[i4 + 1 :], query("visit.timespan OVERLAPS (t4, NULL)")) 

2596 

2597 # Now some timespan vs. time scalar queries. 

2598 self.assertEqual(ids[:i2], query("visit.timespan < t2")) 

2599 self.assertEqual(ids[:i2], query("t2 > visit.timespan")) 

2600 self.assertEqual(ids[i3 + 1 :], query("visit.timespan > t3")) 

2601 self.assertEqual(ids[i3 + 1 :], query("t3 < visit.timespan")) 

2602 self.assertEqual(ids[i3 : i3 + 1], query("visit.timespan OVERLAPS t3")) 

2603 self.assertEqual(ids[i3 : i3 + 1], query(f"T'{t3.tai.isot}' OVERLAPS visit.timespan")) 

2604 

2605 # Empty timespans should not overlap anything. 

2606 self.assertEqual([], query("visit.timespan OVERLAPS (t3, t2)")) 

2607 

2608 def testCollectionSummaries(self): 

2609 """Test recording and retrieval of collection summaries.""" 

2610 self.maxDiff = None 

2611 registry = self.makeRegistry() 

2612 # Importing datasets from yaml should go through the code path where 

2613 # we update collection summaries as we insert datasets. 

2614 self.loadData(registry, "base.yaml") 

2615 self.loadData(registry, "datasets.yaml") 

2616 flat = registry.getDatasetType("flat") 

2617 expected1 = CollectionSummary() 

2618 expected1.dataset_types.add(registry.getDatasetType("bias")) 

2619 expected1.add_data_ids( 

2620 flat, [DataCoordinate.standardize(instrument="Cam1", universe=registry.dimensions)] 

2621 ) 

2622 self.assertEqual(registry.getCollectionSummary("imported_g"), expected1) 

2623 self.assertEqual(registry.getCollectionSummary("imported_r"), expected1) 

2624 # Create a chained collection with both of the imported runs; the 

2625 # summary should be the same, because it's a union with itself. 

2626 chain = "chain" 

2627 registry.registerCollection(chain, CollectionType.CHAINED) 

2628 registry.setCollectionChain(chain, ["imported_r", "imported_g"]) 

2629 self.assertEqual(registry.getCollectionSummary(chain), expected1) 

2630 # Associate flats only into a tagged collection and a calibration 

2631 # collection to check summaries of those. 

2632 tag = "tag" 

2633 registry.registerCollection(tag, CollectionType.TAGGED) 

2634 registry.associate(tag, registry.queryDatasets(flat, collections="imported_g")) 

2635 calibs = "calibs" 

2636 registry.registerCollection(calibs, CollectionType.CALIBRATION) 

2637 registry.certify( 

2638 calibs, registry.queryDatasets(flat, collections="imported_g"), timespan=Timespan(None, None) 

2639 ) 

2640 expected2 = expected1.copy() 

2641 expected2.dataset_types.discard("bias") 

2642 self.assertEqual(registry.getCollectionSummary(tag), expected2) 

2643 self.assertEqual(registry.getCollectionSummary(calibs), expected2) 

2644 # Explicitly calling Registry.refresh() should load those same 

2645 # summaries, via a totally different code path. 

2646 registry.refresh() 

2647 self.assertEqual(registry.getCollectionSummary("imported_g"), expected1) 

2648 self.assertEqual(registry.getCollectionSummary("imported_r"), expected1) 

2649 self.assertEqual(registry.getCollectionSummary(tag), expected2) 

2650 self.assertEqual(registry.getCollectionSummary(calibs), expected2) 

2651 

2652 def testBindInQueryDatasets(self): 

2653 """Test that the bind parameter is correctly forwarded in 

2654 queryDatasets recursion. 

2655 """ 

2656 registry = self.makeRegistry() 

2657 # Importing datasets from yaml should go through the code path where 

2658 # we update collection summaries as we insert datasets. 

2659 self.loadData(registry, "base.yaml") 

2660 self.loadData(registry, "datasets.yaml") 

2661 self.assertEqual( 

2662 set(registry.queryDatasets("flat", band="r", collections=...)), 

2663 set(registry.queryDatasets("flat", where="band=my_band", bind={"my_band": "r"}, collections=...)), 

2664 ) 

2665 

2666 def testQueryIntRangeExpressions(self): 

2667 """Test integer range expressions in ``where`` arguments. 

2668 

2669 Note that our expressions use inclusive stop values, unlike Python's. 

2670 """ 

2671 registry = self.makeRegistry() 

2672 self.loadData(registry, "base.yaml") 

2673 self.assertEqual( 

2674 set(registry.queryDataIds(["detector"], instrument="Cam1", where="detector IN (1..2)")), 

2675 {registry.expandDataId(instrument="Cam1", detector=n) for n in [1, 2]}, 

2676 ) 

2677 self.assertEqual( 

2678 set(registry.queryDataIds(["detector"], instrument="Cam1", where="detector IN (1..4:2)")), 

2679 {registry.expandDataId(instrument="Cam1", detector=n) for n in [1, 3]}, 

2680 ) 

2681 self.assertEqual( 

2682 set(registry.queryDataIds(["detector"], instrument="Cam1", where="detector IN (2..4:2)")), 

2683 {registry.expandDataId(instrument="Cam1", detector=n) for n in [2, 4]}, 

2684 ) 

2685 

2686 def testQueryResultSummaries(self): 

2687 """Test summary methods like `count`, `any`, and `explain_no_results` 

2688 on `DataCoordinateQueryResults` and `DatasetQueryResults`. 

2689 """ 

2690 registry = self.makeRegistry() 

2691 self.loadData(registry, "base.yaml") 

2692 self.loadData(registry, "datasets.yaml") 

2693 self.loadData(registry, "spatial.yaml") 

2694 # Default test dataset has two collections, each with both flats and 

2695 # biases. Add a new collection with only biases. 

2696 registry.registerCollection("biases", CollectionType.TAGGED) 

2697 registry.associate("biases", registry.queryDatasets("bias", collections=["imported_g"])) 

2698 # First query yields two results, and involves no postprocessing. 

2699 query1 = registry.queryDataIds(["physical_filter"], band="r") 

2700 self.assertTrue(query1.any(execute=False, exact=False)) 

2701 self.assertTrue(query1.any(execute=True, exact=False)) 

2702 self.assertTrue(query1.any(execute=True, exact=True)) 

2703 self.assertEqual(query1.count(exact=False), 2) 

2704 self.assertEqual(query1.count(exact=True), 2) 

2705 self.assertFalse(list(query1.explain_no_results())) 

2706 # Second query should yield no results, which we should see when 

2707 # we attempt to expand the data ID. 

2708 query2 = registry.queryDataIds(["physical_filter"], band="h") 

2709 # There's no execute=False, exact=Fals test here because the behavior 

2710 # not something we want to guarantee in this case (and exact=False 

2711 # says either answer is legal). 

2712 self.assertFalse(query2.any(execute=True, exact=False)) 

2713 self.assertFalse(query2.any(execute=True, exact=True)) 

2714 self.assertEqual(query2.count(exact=False), 0) 

2715 self.assertEqual(query2.count(exact=True), 0) 

2716 self.assertTrue(list(query2.explain_no_results())) 

2717 # These queries yield no results due to various problems that can be 

2718 # spotted prior to execution, yielding helpful diagnostics. 

2719 base_query = registry.queryDataIds(["detector", "physical_filter"]) 

2720 queries_and_snippets = [ 

2721 ( 

2722 # Dataset type name doesn't match any existing dataset types. 

2723 registry.queryDatasets("nonexistent", collections=...), 

2724 ["nonexistent"], 

2725 ), 

2726 ( 

2727 # Dataset type object isn't registered. 

2728 registry.queryDatasets( 

2729 DatasetType( 

2730 "nonexistent", 

2731 dimensions=["instrument"], 

2732 universe=registry.dimensions, 

2733 storageClass="Image", 

2734 ), 

2735 collections=..., 

2736 ), 

2737 ["nonexistent"], 

2738 ), 

2739 ( 

2740 # No datasets of this type in this collection. 

2741 registry.queryDatasets("flat", collections=["biases"]), 

2742 ["flat", "biases"], 

2743 ), 

2744 ( 

2745 # No datasets of this type in this collection. 

2746 base_query.findDatasets("flat", collections=["biases"]), 

2747 ["flat", "biases"], 

2748 ), 

2749 ( 

2750 # No collections matching at all. 

2751 registry.queryDatasets("flat", collections=re.compile("potato.+")), 

2752 ["potato"], 

2753 ), 

2754 ] 

2755 # The behavior of these additional queries is slated to change in the 

2756 # future, so we also check for deprecation warnings. 

2757 with self.assertWarns(FutureWarning): 

2758 queries_and_snippets.append( 

2759 ( 

2760 # Dataset type name doesn't match any existing dataset 

2761 # types. 

2762 registry.queryDataIds(["detector"], datasets=["nonexistent"], collections=...), 

2763 ["nonexistent"], 

2764 ) 

2765 ) 

2766 with self.assertWarns(FutureWarning): 

2767 queries_and_snippets.append( 

2768 ( 

2769 # Dataset type name doesn't match any existing dataset 

2770 # types. 

2771 registry.queryDimensionRecords("detector", datasets=["nonexistent"], collections=...), 

2772 ["nonexistent"], 

2773 ) 

2774 ) 

2775 for query, snippets in queries_and_snippets: 

2776 self.assertFalse(query.any(execute=False, exact=False)) 

2777 self.assertFalse(query.any(execute=True, exact=False)) 

2778 self.assertFalse(query.any(execute=True, exact=True)) 

2779 self.assertEqual(query.count(exact=False), 0) 

2780 self.assertEqual(query.count(exact=True), 0) 

2781 messages = list(query.explain_no_results()) 

2782 self.assertTrue(messages) 

2783 # Want all expected snippets to appear in at least one message. 

2784 self.assertTrue( 

2785 any( 

2786 all(snippet in message for snippet in snippets) for message in query.explain_no_results() 

2787 ), 

2788 messages, 

2789 ) 

2790 

2791 # This query does yield results, but should also emit a warning because 

2792 # dataset type patterns to queryDataIds is deprecated; just look for 

2793 # the warning. 

2794 with self.assertWarns(FutureWarning): 

2795 registry.queryDataIds(["detector"], datasets=re.compile("^nonexistent$"), collections=...) 

2796 

2797 # These queries yield no results due to problems that can be identified 

2798 # by cheap follow-up queries, yielding helpful diagnostics. 

2799 for query, snippets in [ 

2800 ( 

2801 # No records for one of the involved dimensions. 

2802 registry.queryDataIds(["subfilter"]), 

2803 ["no rows", "subfilter"], 

2804 ), 

2805 ( 

2806 # No records for one of the involved dimensions. 

2807 registry.queryDimensionRecords("subfilter"), 

2808 ["no rows", "subfilter"], 

2809 ), 

2810 ]: 

2811 self.assertFalse(query.any(execute=True, exact=False)) 

2812 self.assertFalse(query.any(execute=True, exact=True)) 

2813 self.assertEqual(query.count(exact=True), 0) 

2814 messages = list(query.explain_no_results()) 

2815 self.assertTrue(messages) 

2816 # Want all expected snippets to appear in at least one message. 

2817 self.assertTrue( 

2818 any( 

2819 all(snippet in message for snippet in snippets) for message in query.explain_no_results() 

2820 ), 

2821 messages, 

2822 ) 

2823 

2824 # This query yields four overlaps in the database, but one is filtered 

2825 # out in postprocessing. The count queries aren't accurate because 

2826 # they don't account for duplication that happens due to an internal 

2827 # join against commonSkyPix. 

2828 query3 = registry.queryDataIds(["visit", "tract"], instrument="Cam1", skymap="SkyMap1") 

2829 self.assertEqual( 

2830 { 

2831 DataCoordinate.standardize( 

2832 instrument="Cam1", 

2833 skymap="SkyMap1", 

2834 visit=v, 

2835 tract=t, 

2836 universe=registry.dimensions, 

2837 ) 

2838 for v, t in [(1, 0), (2, 0), (2, 1)] 

2839 }, 

2840 set(query3), 

2841 ) 

2842 self.assertTrue(query3.any(execute=False, exact=False)) 

2843 self.assertTrue(query3.any(execute=True, exact=False)) 

2844 self.assertTrue(query3.any(execute=True, exact=True)) 

2845 self.assertGreaterEqual(query3.count(exact=False), 4) 

2846 self.assertGreaterEqual(query3.count(exact=True, discard=True), 3) 

2847 self.assertFalse(list(query3.explain_no_results())) 

2848 # This query yields overlaps in the database, but all are filtered 

2849 # out in postprocessing. The count queries again aren't very useful. 

2850 # We have to use `where=` here to avoid an optimization that 

2851 # (currently) skips the spatial postprocess-filtering because it 

2852 # recognizes that no spatial join is necessary. That's not ideal, but 

2853 # fixing it is out of scope for this ticket. 

2854 query4 = registry.queryDataIds( 

2855 ["visit", "tract"], 

2856 instrument="Cam1", 

2857 skymap="SkyMap1", 

2858 where="visit=1 AND detector=1 AND tract=0 AND patch=4", 

2859 ) 

2860 self.assertFalse(set(query4)) 

2861 self.assertTrue(query4.any(execute=False, exact=False)) 

2862 self.assertTrue(query4.any(execute=True, exact=False)) 

2863 self.assertFalse(query4.any(execute=True, exact=True)) 

2864 self.assertGreaterEqual(query4.count(exact=False), 1) 

2865 self.assertEqual(query4.count(exact=True, discard=True), 0) 

2866 messages = query4.explain_no_results() 

2867 self.assertTrue(messages) 

2868 self.assertTrue(any("overlap" in message for message in messages)) 

2869 # This query should yield results from one dataset type but not the 

2870 # other, which is not registered. 

2871 query5 = registry.queryDatasets(["bias", "nonexistent"], collections=["biases"]) 

2872 self.assertTrue(set(query5)) 

2873 self.assertTrue(query5.any(execute=False, exact=False)) 

2874 self.assertTrue(query5.any(execute=True, exact=False)) 

2875 self.assertTrue(query5.any(execute=True, exact=True)) 

2876 self.assertGreaterEqual(query5.count(exact=False), 1) 

2877 self.assertGreaterEqual(query5.count(exact=True), 1) 

2878 self.assertFalse(list(query5.explain_no_results())) 

2879 # This query applies a selection that yields no results, fully in the 

2880 # database. Explaining why it fails involves traversing the relation 

2881 # tree and running a LIMIT 1 query at each level that has the potential 

2882 # to remove rows. 

2883 query6 = registry.queryDimensionRecords( 

2884 "detector", where="detector.purpose = 'no-purpose'", instrument="Cam1" 

2885 ) 

2886 self.assertEqual(query6.count(exact=True), 0) 

2887 messages = query6.explain_no_results() 

2888 self.assertTrue(messages) 

2889 self.assertTrue(any("no-purpose" in message for message in messages)) 

2890 

2891 def testQueryDataIdsExpressionError(self): 

2892 """Test error checking of 'where' expressions in queryDataIds.""" 

2893 registry = self.makeRegistry() 

2894 self.loadData(registry, "base.yaml") 

2895 bind = {"time": astropy.time.Time("2020-01-01T01:00:00", format="isot", scale="tai")} 

2896 with self.assertRaisesRegex(LookupError, r"No dimension element with name 'foo' in 'foo\.bar'\."): 

2897 registry.queryDataIds(["detector"], where="foo.bar = 12") 

2898 with self.assertRaisesRegex( 

2899 LookupError, "Dimension element name cannot be inferred in this context." 

2900 ): 

2901 registry.queryDataIds(["detector"], where="timespan.end < time", bind=bind) 

2902 

2903 def testQueryDataIdsOrderBy(self): 

2904 """Test order_by and limit on result returned by queryDataIds().""" 

2905 registry = self.makeRegistry() 

2906 self.loadData(registry, "base.yaml") 

2907 self.loadData(registry, "datasets.yaml") 

2908 self.loadData(registry, "spatial.yaml") 

2909 

2910 def do_query(dimensions=("visit", "tract"), datasets=None, collections=None): 

2911 return registry.queryDataIds( 

2912 dimensions, datasets=datasets, collections=collections, instrument="Cam1", skymap="SkyMap1" 

2913 ) 

2914 

2915 Test = namedtuple( 

2916 "testQueryDataIdsOrderByTest", 

2917 ("order_by", "keys", "result", "limit", "datasets", "collections"), 

2918 defaults=(None, None, None), 

2919 ) 

2920 

2921 test_data = ( 

2922 Test("tract,visit", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))), 

2923 Test("-tract,visit", "tract,visit", ((1, 2), (1, 2), (0, 1), (0, 1), (0, 2), (0, 2))), 

2924 Test("tract,-visit", "tract,visit", ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2))), 

2925 Test("-tract,-visit", "tract,visit", ((1, 2), (1, 2), (0, 2), (0, 2), (0, 1), (0, 1))), 

2926 Test( 

2927 "tract.id,visit.id", 

2928 "tract,visit", 

2929 ((0, 1), (0, 1), (0, 2)), 

2930 limit=(3,), 

2931 ), 

2932 Test("-tract,-visit", "tract,visit", ((1, 2), (1, 2), (0, 2)), limit=(3,)), 

2933 Test("tract,visit", "tract,visit", ((0, 2), (1, 2), (1, 2)), limit=(3, 3)), 

2934 Test("-tract,-visit", "tract,visit", ((0, 1),), limit=(3, 5)), 

2935 Test( 

2936 "tract,visit.exposure_time", "tract,visit", ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2)) 

2937 ), 

2938 Test( 

2939 "-tract,-visit.exposure_time", "tract,visit", ((1, 2), (1, 2), (0, 1), (0, 1), (0, 2), (0, 2)) 

2940 ), 

2941 Test("tract,-exposure_time", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))), 

2942 Test("tract,visit.name", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))), 

2943 Test( 

2944 "tract,-timespan.begin,timespan.end", 

2945 "tract,visit", 

2946 ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2)), 

2947 ), 

2948 Test("visit.day_obs,exposure.day_obs", "visit,exposure", ()), 

2949 Test("visit.timespan.begin,-exposure.timespan.begin", "visit,exposure", ()), 

2950 Test( 

2951 "tract,detector", 

2952 "tract,detector", 

2953 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)), 

2954 datasets="flat", 

2955 collections="imported_r", 

2956 ), 

2957 Test( 

2958 "tract,detector.full_name", 

2959 "tract,detector", 

2960 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)), 

2961 datasets="flat", 

2962 collections="imported_r", 

2963 ), 

2964 Test( 

2965 "tract,detector.raft,detector.name_in_raft", 

2966 "tract,detector", 

2967 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)), 

2968 datasets="flat", 

2969 collections="imported_r", 

2970 ), 

2971 ) 

2972 

2973 for test in test_data: 

2974 order_by = test.order_by.split(",") 

2975 keys = test.keys.split(",") 

2976 query = do_query(keys, test.datasets, test.collections).order_by(*order_by) 

2977 if test.limit is not None: 

2978 query = query.limit(*test.limit) 

2979 dataIds = tuple(tuple(dataId[k] for k in keys) for dataId in query) 

2980 self.assertEqual(dataIds, test.result) 

2981 

2982 # and materialize 

2983 query = do_query(keys).order_by(*order_by) 

2984 if test.limit is not None: 

2985 query = query.limit(*test.limit) 

2986 with self.assertRaises(RelationalAlgebraError): 

2987 with query.materialize(): 

2988 pass 

2989 

2990 # errors in a name 

2991 for order_by in ("", "-"): 

2992 with self.assertRaisesRegex(ValueError, "Empty dimension name in ORDER BY"): 

2993 list(do_query().order_by(order_by)) 

2994 

2995 for order_by in ("undimension.name", "-undimension.name"): 

2996 with self.assertRaisesRegex(ValueError, "Unknown dimension element 'undimension'"): 

2997 list(do_query().order_by(order_by)) 

2998 

2999 for order_by in ("attract", "-attract"): 

3000 with self.assertRaisesRegex(ValueError, "Metadata 'attract' cannot be found in any dimension"): 

3001 list(do_query().order_by(order_by)) 

3002 

3003 with self.assertRaisesRegex(ValueError, "Metadata 'exposure_time' exists in more than one dimension"): 

3004 list(do_query(("exposure", "visit")).order_by("exposure_time")) 

3005 

3006 with self.assertRaisesRegex( 

3007 ValueError, 

3008 r"Timespan exists in more than one dimension element \(exposure, visit\); " 

3009 r"qualify timespan with specific dimension name\.", 

3010 ): 

3011 list(do_query(("exposure", "visit")).order_by("timespan.begin")) 

3012 

3013 with self.assertRaisesRegex( 

3014 ValueError, "Cannot find any temporal dimension element for 'timespan.begin'" 

3015 ): 

3016 list(do_query("tract").order_by("timespan.begin")) 

3017 

3018 with self.assertRaisesRegex(ValueError, "Cannot use 'timespan.begin' with non-temporal element"): 

3019 list(do_query("tract").order_by("tract.timespan.begin")) 

3020 

3021 with self.assertRaisesRegex(ValueError, "Field 'name' does not exist in 'tract'."): 

3022 list(do_query("tract").order_by("tract.name")) 

3023 

3024 with self.assertRaisesRegex( 

3025 ValueError, r"Unknown dimension element 'timestamp'; perhaps you meant 'timespan.begin'\?" 

3026 ): 

3027 list(do_query("visit").order_by("timestamp.begin")) 

3028 

3029 def testQueryDataIdsGovernorExceptions(self): 

3030 """Test exceptions raised by queryDataIds() for incorrect governors.""" 

3031 registry = self.makeRegistry() 

3032 self.loadData(registry, "base.yaml") 

3033 self.loadData(registry, "datasets.yaml") 

3034 self.loadData(registry, "spatial.yaml") 

3035 

3036 def do_query(dimensions, dataId=None, where="", bind=None, **kwargs): 

3037 return registry.queryDataIds(dimensions, dataId=dataId, where=where, bind=bind, **kwargs) 

3038 

3039 Test = namedtuple( 

3040 "testQueryDataIdExceptionsTest", 

3041 ("dimensions", "dataId", "where", "bind", "kwargs", "exception", "count"), 

3042 defaults=(None, None, None, {}, None, 0), 

3043 ) 

3044 

3045 test_data = ( 

3046 Test("tract,visit", count=6), 

3047 Test("tract,visit", kwargs={"instrument": "Cam1", "skymap": "SkyMap1"}, count=6), 

3048 Test( 

3049 "tract,visit", kwargs={"instrument": "Cam2", "skymap": "SkyMap1"}, exception=DataIdValueError 

3050 ), 

3051 Test("tract,visit", dataId={"instrument": "Cam1", "skymap": "SkyMap1"}, count=6), 

3052 Test( 

3053 "tract,visit", dataId={"instrument": "Cam1", "skymap": "SkyMap2"}, exception=DataIdValueError 

3054 ), 

3055 Test("tract,visit", where="instrument='Cam1' AND skymap='SkyMap1'", count=6), 

3056 Test("tract,visit", where="instrument='Cam1' AND skymap='SkyMap5'", exception=DataIdValueError), 

3057 Test( 

3058 "tract,visit", 

3059 where="instrument=cam AND skymap=map", 

3060 bind={"cam": "Cam1", "map": "SkyMap1"}, 

3061 count=6, 

3062 ), 

3063 Test( 

3064 "tract,visit", 

3065 where="instrument=cam AND skymap=map", 

3066 bind={"cam": "Cam", "map": "SkyMap"}, 

3067 exception=DataIdValueError, 

3068 ), 

3069 ) 

3070 

3071 for test in test_data: 

3072 dimensions = test.dimensions.split(",") 

3073 if test.exception: 

3074 with self.assertRaises(test.exception): 

3075 do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs).count() 

3076 else: 

3077 query = do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs) 

3078 self.assertEqual(query.count(discard=True), test.count) 

3079 

3080 # and materialize 

3081 if test.exception: 

3082 with self.assertRaises(test.exception): 

3083 query = do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs) 

3084 with query.materialize() as materialized: 

3085 materialized.count(discard=True) 

3086 else: 

3087 query = do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs) 

3088 with query.materialize() as materialized: 

3089 self.assertEqual(materialized.count(discard=True), test.count) 

3090 

3091 def testQueryDimensionRecordsOrderBy(self): 

3092 """Test order_by and limit on result returned by 

3093 queryDimensionRecords(). 

3094 """ 

3095 registry = self.makeRegistry() 

3096 self.loadData(registry, "base.yaml") 

3097 self.loadData(registry, "datasets.yaml") 

3098 self.loadData(registry, "spatial.yaml") 

3099 

3100 def do_query(element, datasets=None, collections=None): 

3101 return registry.queryDimensionRecords( 

3102 element, instrument="Cam1", datasets=datasets, collections=collections 

3103 ) 

3104 

3105 query = do_query("detector") 

3106 self.assertEqual(len(list(query)), 4) 

3107 

3108 Test = namedtuple( 

3109 "testQueryDataIdsOrderByTest", 

3110 ("element", "order_by", "result", "limit", "datasets", "collections"), 

3111 defaults=(None, None, None), 

3112 ) 

3113 

3114 test_data = ( 

3115 Test("detector", "detector", (1, 2, 3, 4)), 

3116 Test("detector", "-detector", (4, 3, 2, 1)), 

3117 Test("detector", "raft,-name_in_raft", (2, 1, 4, 3)), 

3118 Test("detector", "-detector.purpose", (4,), limit=(1,)), 

3119 Test("detector", "-purpose,detector.raft,name_in_raft", (2, 3), limit=(2, 2)), 

3120 Test("visit", "visit", (1, 2)), 

3121 Test("visit", "-visit.id", (2, 1)), 

3122 Test("visit", "zenith_angle", (1, 2)), 

3123 Test("visit", "-visit.name", (2, 1)), 

3124 Test("visit", "day_obs,-timespan.begin", (2, 1)), 

3125 ) 

3126 

3127 for test in test_data: 

3128 order_by = test.order_by.split(",") 

3129 query = do_query(test.element).order_by(*order_by) 

3130 if test.limit is not None: 

3131 query = query.limit(*test.limit) 

3132 dataIds = tuple(rec.id for rec in query) 

3133 self.assertEqual(dataIds, test.result) 

3134 

3135 # errors in a name 

3136 for order_by in ("", "-"): 

3137 with self.assertRaisesRegex(ValueError, "Empty dimension name in ORDER BY"): 

3138 list(do_query("detector").order_by(order_by)) 

3139 

3140 for order_by in ("undimension.name", "-undimension.name"): 

3141 with self.assertRaisesRegex(ValueError, "Element name mismatch: 'undimension'"): 

3142 list(do_query("detector").order_by(order_by)) 

3143 

3144 for order_by in ("attract", "-attract"): 

3145 with self.assertRaisesRegex(ValueError, "Field 'attract' does not exist in 'detector'."): 

3146 list(do_query("detector").order_by(order_by)) 

3147 

3148 for order_by in ("timestamp.begin", "-timestamp.begin"): 

3149 with self.assertRaisesRegex( 

3150 ValueError, 

3151 r"Element name mismatch: 'timestamp' instead of 'visit'; " 

3152 r"perhaps you meant 'timespan.begin'\?", 

3153 ): 

3154 list(do_query("visit").order_by(order_by)) 

3155 

3156 def testQueryDimensionRecordsExceptions(self): 

3157 """Test exceptions raised by queryDimensionRecords().""" 

3158 registry = self.makeRegistry() 

3159 self.loadData(registry, "base.yaml") 

3160 self.loadData(registry, "datasets.yaml") 

3161 self.loadData(registry, "spatial.yaml") 

3162 

3163 result = registry.queryDimensionRecords("detector") 

3164 self.assertEqual(result.count(), 4) 

3165 result = registry.queryDimensionRecords("detector", instrument="Cam1") 

3166 self.assertEqual(result.count(), 4) 

3167 result = registry.queryDimensionRecords("detector", dataId={"instrument": "Cam1"}) 

3168 self.assertEqual(result.count(), 4) 

3169 result = registry.queryDimensionRecords("detector", where="instrument='Cam1'") 

3170 self.assertEqual(result.count(), 4) 

3171 result = registry.queryDimensionRecords("detector", where="instrument=instr", bind={"instr": "Cam1"}) 

3172 self.assertEqual(result.count(), 4) 

3173 

3174 with self.assertRaisesRegex(DataIdValueError, "dimension instrument"): 

3175 result = registry.queryDimensionRecords("detector", instrument="NotCam1") 

3176 result.count() 

3177 

3178 with self.assertRaisesRegex(DataIdValueError, "dimension instrument"): 

3179 result = registry.queryDimensionRecords("detector", dataId={"instrument": "NotCam1"}) 

3180 result.count() 

3181 

3182 with self.assertRaisesRegex(DataIdValueError, "Unknown values specified for governor dimension"): 

3183 result = registry.queryDimensionRecords("detector", where="instrument='NotCam1'") 

3184 result.count() 

3185 

3186 with self.assertRaisesRegex(DataIdValueError, "Unknown values specified for governor dimension"): 

3187 result = registry.queryDimensionRecords( 

3188 "detector", where="instrument=instr", bind={"instr": "NotCam1"} 

3189 ) 

3190 result.count() 

3191 

3192 def testDatasetConstrainedDimensionRecordQueries(self): 

3193 """Test that queryDimensionRecords works even when given a dataset 

3194 constraint whose dimensions extend beyond the requested dimension 

3195 element's. 

3196 """ 

3197 registry = self.makeRegistry() 

3198 self.loadData(registry, "base.yaml") 

3199 self.loadData(registry, "datasets.yaml") 

3200 # Query for physical_filter dimension records, using a dataset that 

3201 # has both physical_filter and dataset dimensions. 

3202 records = registry.queryDimensionRecords( 

3203 "physical_filter", 

3204 datasets=["flat"], 

3205 collections="imported_r", 

3206 ) 

3207 self.assertEqual({record.name for record in records}, {"Cam1-R1", "Cam1-R2"}) 

3208 # Trying to constrain by all dataset types is an error. 

3209 with self.assertRaises(TypeError): 

3210 list(registry.queryDimensionRecords("physical_filter", datasets=..., collections="imported_r")) 

3211 

3212 def testSkyPixDatasetQueries(self): 

3213 """Test that we can build queries involving skypix dimensions as long 

3214 as a dataset type that uses those dimensions is included. 

3215 """ 

3216 registry = self.makeRegistry() 

3217 self.loadData(registry, "base.yaml") 

3218 dataset_type = DatasetType( 

3219 "a", dimensions=["htm7", "instrument"], universe=registry.dimensions, storageClass="int" 

3220 ) 

3221 registry.registerDatasetType(dataset_type) 

3222 run = "r" 

3223 registry.registerRun(run) 

3224 # First try queries where there are no datasets; the concern is whether 

3225 # we can even build and execute these queries without raising, even 

3226 # when "doomed" query shortcuts are in play. 

3227 self.assertFalse( 

3228 list(registry.queryDataIds(["htm7", "instrument"], datasets=dataset_type, collections=run)) 

3229 ) 

3230 self.assertFalse(list(registry.queryDatasets(dataset_type, collections=run))) 

3231 # Now add a dataset and see that we can get it back. 

3232 htm7 = registry.dimensions.skypix["htm"][7].pixelization 

3233 data_id = registry.expandDataId(instrument="Cam1", htm7=htm7.universe()[0][0]) 

3234 (ref,) = registry.insertDatasets(dataset_type, [data_id], run=run) 

3235 self.assertEqual( 

3236 set(registry.queryDataIds(["htm7", "instrument"], datasets=dataset_type, collections=run)), 

3237 {data_id}, 

3238 ) 

3239 self.assertEqual(set(registry.queryDatasets(dataset_type, collections=run)), {ref}) 

3240 

3241 def testDatasetIdFactory(self): 

3242 """Simple test for DatasetIdFactory, mostly to catch potential changes 

3243 in its API. 

3244 """ 

3245 registry = self.makeRegistry() 

3246 factory = DatasetIdFactory() 

3247 dataset_type = DatasetType( 

3248 "datasetType", 

3249 dimensions=["detector", "instrument"], 

3250 universe=registry.dimensions, 

3251 storageClass="int", 

3252 ) 

3253 run = "run" 

3254 data_id = DataCoordinate.standardize(instrument="Cam1", detector=1, graph=dataset_type.dimensions) 

3255 

3256 datasetId = factory.makeDatasetId(run, dataset_type, data_id, DatasetIdGenEnum.UNIQUE) 

3257 self.assertIsInstance(datasetId, uuid.UUID) 

3258 self.assertEqual(datasetId.version, 4) 

3259 

3260 datasetId = factory.makeDatasetId(run, dataset_type, data_id, DatasetIdGenEnum.DATAID_TYPE) 

3261 self.assertIsInstance(datasetId, uuid.UUID) 

3262 self.assertEqual(datasetId.version, 5) 

3263 

3264 datasetId = factory.makeDatasetId(run, dataset_type, data_id, DatasetIdGenEnum.DATAID_TYPE_RUN) 

3265 self.assertIsInstance(datasetId, uuid.UUID) 

3266 self.assertEqual(datasetId.version, 5) 

3267 

3268 def testExposureQueries(self): 

3269 """Test query methods using arguments sourced from the exposure log 

3270 service. 

3271 

3272 The most complete test dataset currently available to daf_butler tests 

3273 is hsc-rc2-subset.yaml export (which is unfortunately distinct from the 

3274 the lsst/rc2_subset GitHub repo), but that does not have 'exposure' 

3275 dimension records as it was focused on providing nontrivial spatial 

3276 overlaps between visit+detector and tract+patch. So in this test we 

3277 need to translate queries that originally used the exposure dimension 

3278 to use the (very similar) visit dimension instead. 

3279 """ 

3280 registry = self.makeRegistry() 

3281 self.loadData(registry, "hsc-rc2-subset.yaml") 

3282 self.assertEqual( 

3283 [ 

3284 record.id 

3285 for record in registry.queryDimensionRecords("visit", instrument="HSC") 

3286 .order_by("id") 

3287 .limit(5) 

3288 ], 

3289 [318, 322, 326, 330, 332], 

3290 ) 

3291 self.assertEqual( 

3292 [ 

3293 data_id["visit"] 

3294 for data_id in registry.queryDataIds(["visit"], instrument="HSC").order_by("id").limit(5) 

3295 ], 

3296 [318, 322, 326, 330, 332], 

3297 ) 

3298 self.assertEqual( 

3299 [ 

3300 record.id 

3301 for record in registry.queryDimensionRecords("detector", instrument="HSC") 

3302 .order_by("full_name") 

3303 .limit(5) 

3304 ], 

3305 [73, 72, 71, 70, 65], 

3306 ) 

3307 self.assertEqual( 

3308 [ 

3309 data_id["detector"] 

3310 for data_id in registry.queryDataIds(["detector"], instrument="HSC") 

3311 .order_by("full_name") 

3312 .limit(5) 

3313 ], 

3314 [73, 72, 71, 70, 65], 

3315 ) 

3316 

3317 def test_long_query_names(self) -> None: 

3318 """Test that queries involving very long names are handled correctly. 

3319 

3320 This is especially important for PostgreSQL, which truncates symbols 

3321 longer than 64 chars, but it's worth testing for all DBs. 

3322 """ 

3323 registry = self.makeRegistry() 

3324 name = "abcd" * 17 

3325 registry.registerDatasetType( 

3326 DatasetType( 

3327 name, 

3328 dimensions=(), 

3329 storageClass="Exposure", 

3330 universe=registry.dimensions, 

3331 ) 

3332 ) 

3333 # Need to search more than one collection actually containing a 

3334 # matching dataset to avoid optimizations that sidestep bugs due to 

3335 # truncation by making findFirst=True a no-op. 

3336 run1 = "run1" 

3337 registry.registerRun(run1) 

3338 run2 = "run2" 

3339 registry.registerRun(run2) 

3340 (ref1,) = registry.insertDatasets(name, [DataCoordinate.makeEmpty(registry.dimensions)], run1) 

3341 registry.insertDatasets(name, [DataCoordinate.makeEmpty(registry.dimensions)], run2) 

3342 self.assertEqual( 

3343 set(registry.queryDatasets(name, collections=[run1, run2], findFirst=True)), 

3344 {ref1}, 

3345 ) 

3346 

3347 def test_skypix_constraint_queries(self) -> None: 

3348 """Test queries spatially constrained by a skypix data ID.""" 

3349 registry = self.makeRegistry() 

3350 self.loadData(registry, "hsc-rc2-subset.yaml") 

3351 patch_regions = { 

3352 (data_id["tract"], data_id["patch"]): data_id.region 

3353 for data_id in registry.queryDataIds(["patch"]).expanded() 

3354 } 

3355 skypix_dimension: SkyPixDimension = registry.dimensions["htm11"] 

3356 # This check ensures the test doesn't become trivial due to a config 

3357 # change; if it does, just pick a different HTML level. 

3358 self.assertNotEqual(skypix_dimension, registry.dimensions.commonSkyPix) 

3359 # Gather all skypix IDs that definitely overlap at least one of these 

3360 # patches. 

3361 relevant_skypix_ids = lsst.sphgeom.RangeSet() 

3362 for patch_region in patch_regions.values(): 

3363 relevant_skypix_ids |= skypix_dimension.pixelization.interior(patch_region) 

3364 # Look for a "nontrivial" skypix_id that overlaps at least one patch 

3365 # and does not overlap at least one other patch. 

3366 for skypix_id in itertools.chain.from_iterable( 

3367 range(begin, end) for begin, end in relevant_skypix_ids 

3368 ): 

3369 skypix_region = skypix_dimension.pixelization.pixel(skypix_id) 

3370 overlapping_patches = { 

3371 patch_key 

3372 for patch_key, patch_region in patch_regions.items() 

3373 if not patch_region.isDisjointFrom(skypix_region) 

3374 } 

3375 if overlapping_patches and overlapping_patches != patch_regions.keys(): 

3376 break 

3377 else: 

3378 raise RuntimeError("Could not find usable skypix ID for this dimension configuration.") 

3379 self.assertEqual( 

3380 { 

3381 (data_id["tract"], data_id["patch"]) 

3382 for data_id in registry.queryDataIds( 

3383 ["patch"], 

3384 dataId={skypix_dimension.name: skypix_id}, 

3385 ) 

3386 }, 

3387 overlapping_patches, 

3388 ) 

3389 # Test that a three-way join that includes the common skypix system in 

3390 # the dimensions doesn't generate redundant join terms in the query. 

3391 full_data_ids = set( 

3392 registry.queryDataIds( 

3393 ["tract", "visit", "htm7"], skymap="hsc_rings_v1", instrument="HSC" 

3394 ).expanded() 

3395 ) 

3396 self.assertGreater(len(full_data_ids), 0) 

3397 for data_id in full_data_ids: 

3398 self.assertFalse(data_id.records["tract"].region.isDisjointFrom(data_id.records["htm7"].region)) 

3399 self.assertFalse(data_id.records["visit"].region.isDisjointFrom(data_id.records["htm7"].region)) 

3400 

3401 def test_spatial_constraint_queries(self) -> None: 

3402 """Test queries in which one spatial dimension in the constraint (data 

3403 ID or ``where`` string) constrains a different spatial dimension in the 

3404 query result columns. 

3405 """ 

3406 registry = self.makeRegistry() 

3407 self.loadData(registry, "hsc-rc2-subset.yaml") 

3408 patch_regions = { 

3409 (data_id["tract"], data_id["patch"]): data_id.region 

3410 for data_id in registry.queryDataIds(["patch"]).expanded() 

3411 } 

3412 observation_regions = { 

3413 (data_id["visit"], data_id["detector"]): data_id.region 

3414 for data_id in registry.queryDataIds(["visit", "detector"]).expanded() 

3415 } 

3416 all_combos = { 

3417 (patch_key, observation_key) 

3418 for patch_key, observation_key in itertools.product(patch_regions, observation_regions) 

3419 } 

3420 overlapping_combos = { 

3421 (patch_key, observation_key) 

3422 for patch_key, observation_key in all_combos 

3423 if not patch_regions[patch_key].isDisjointFrom(observation_regions[observation_key]) 

3424 } 

3425 # Check a direct spatial join with no constraint first. 

3426 self.assertEqual( 

3427 { 

3428 ((data_id["tract"], data_id["patch"]), (data_id["visit"], data_id["detector"])) 

3429 for data_id in registry.queryDataIds(["patch", "visit", "detector"]) 

3430 }, 

3431 overlapping_combos, 

3432 ) 

3433 overlaps_by_patch: defaultdict[tuple[int, int], set[tuple[str, str]]] = defaultdict(set) 

3434 overlaps_by_observation: defaultdict[tuple[int, int], set[tuple[str, str]]] = defaultdict(set) 

3435 for patch_key, observation_key in overlapping_combos: 

3436 overlaps_by_patch[patch_key].add(observation_key) 

3437 overlaps_by_observation[observation_key].add(patch_key) 

3438 # Find patches and observations that overlap at least one of the other 

3439 # but not all of the other. 

3440 nontrivial_patch = next( 

3441 iter( 

3442 patch_key 

3443 for patch_key, observation_keys in overlaps_by_patch.items() 

3444 if observation_keys and observation_keys != observation_regions.keys() 

3445 ) 

3446 ) 

3447 nontrivial_observation = next( 

3448 iter( 

3449 observation_key 

3450 for observation_key, patch_keys in overlaps_by_observation.items() 

3451 if patch_keys and patch_keys != patch_regions.keys() 

3452 ) 

3453 ) 

3454 # Use the nontrivial patches and observations as constraints on the 

3455 # other dimensions in various ways, first via a 'where' expression. 

3456 # It's better in general to us 'bind' instead of f-strings, but these 

3457 # all integers so there are no quoting concerns. 

3458 self.assertEqual( 

3459 { 

3460 (data_id["visit"], data_id["detector"]) 

3461 for data_id in registry.queryDataIds( 

3462 ["visit", "detector"], 

3463 where=f"tract={nontrivial_patch[0]} AND patch={nontrivial_patch[1]}", 

3464 skymap="hsc_rings_v1", 

3465 ) 

3466 }, 

3467 overlaps_by_patch[nontrivial_patch], 

3468 ) 

3469 self.assertEqual( 

3470 { 

3471 (data_id["tract"], data_id["patch"]) 

3472 for data_id in registry.queryDataIds( 

3473 ["patch"], 

3474 where=f"visit={nontrivial_observation[0]} AND detector={nontrivial_observation[1]}", 

3475 instrument="HSC", 

3476 ) 

3477 }, 

3478 overlaps_by_observation[nontrivial_observation], 

3479 ) 

3480 # and then via the dataId argument. 

3481 self.assertEqual( 

3482 { 

3483 (data_id["visit"], data_id["detector"]) 

3484 for data_id in registry.queryDataIds( 

3485 ["visit", "detector"], 

3486 dataId={ 

3487 "tract": nontrivial_patch[0], 

3488 "patch": nontrivial_patch[1], 

3489 }, 

3490 skymap="hsc_rings_v1", 

3491 ) 

3492 }, 

3493 overlaps_by_patch[nontrivial_patch], 

3494 ) 

3495 self.assertEqual( 

3496 { 

3497 (data_id["tract"], data_id["patch"]) 

3498 for data_id in registry.queryDataIds( 

3499 ["patch"], 

3500 dataId={ 

3501 "visit": nontrivial_observation[0], 

3502 "detector": nontrivial_observation[1], 

3503 }, 

3504 instrument="HSC", 

3505 ) 

3506 }, 

3507 overlaps_by_observation[nontrivial_observation], 

3508 ) 

3509 

3510 def test_query_projection_drop_postprocessing(self) -> None: 

3511 """Test that projections and deduplications on query objects can 

3512 drop post-query region filtering to ensure the query remains in 

3513 the SQL engine. 

3514 """ 

3515 registry = self.makeRegistry() 

3516 self.loadData(registry, "base.yaml") 

3517 self.loadData(registry, "spatial.yaml") 

3518 

3519 def pop_transfer(tree: Relation) -> Relation: 

3520 """If a relation tree terminates with a transfer to a new engine, 

3521 return the relation prior to that transfer. If not, return the 

3522 original relation. 

3523 """ 

3524 match tree: 

3525 case Transfer(target=target): 

3526 return target 

3527 case _: 

3528 return tree 

3529 

3530 # There's no public way to get a Query object yet, so we get one from a 

3531 # DataCoordinateQueryResults private attribute. When a public API is 

3532 # available this test should use it. 

3533 query = registry.queryDataIds(["visit", "detector", "tract", "patch"])._query 

3534 # We expect this query to terminate in the iteration engine originally, 

3535 # because region-filtering is necessary. 

3536 self.assertIsInstance(pop_transfer(query.relation).engine, iteration.Engine) 

3537 # If we deduplicate, we usually have to do that downstream of the 

3538 # filtering. That means the deduplication has to happen in the 

3539 # iteration engine. 

3540 self.assertIsInstance(pop_transfer(query.projected(unique=True).relation).engine, iteration.Engine) 

3541 # If we pass drop_postprocessing, we instead drop the region filtering 

3542 # so the deduplication can happen in SQL (though there might still be 

3543 # transfer to iteration at the tail of the tree that we can ignore; 

3544 # that's what the pop_transfer takes care of here). 

3545 self.assertIsInstance( 

3546 pop_transfer(query.projected(unique=True, drop_postprocessing=True).relation).engine, 

3547 sql.Engine, 

3548 ) 

3549 

3550 def test_query_find_datasets_drop_postprocessing(self) -> None: 

3551 """Test that DataCoordinateQueryResults.findDatasets avoids commutator 

3552 problems with the FindFirstDataset relation operation. 

3553 """ 

3554 # Setup: load some visit, tract, and patch records, and insert two 

3555 # datasets with dimensions {visit, patch}, with one in each of two 

3556 # RUN collections. 

3557 registry = self.makeRegistry() 

3558 self.loadData(registry, "base.yaml") 

3559 self.loadData(registry, "spatial.yaml") 

3560 storage_class = StorageClass("Warpy") 

3561 registry.storageClasses.registerStorageClass(storage_class) 

3562 dataset_type = DatasetType( 

3563 "warp", {"visit", "patch"}, storageClass=storage_class, universe=registry.dimensions 

3564 ) 

3565 registry.registerDatasetType(dataset_type) 

3566 (data_id,) = registry.queryDataIds(["visit", "patch"]).limit(1) 

3567 registry.registerRun("run1") 

3568 registry.registerRun("run2") 

3569 (ref1,) = registry.insertDatasets(dataset_type, [data_id], run="run1") 

3570 (ref2,) = registry.insertDatasets(dataset_type, [data_id], run="run2") 

3571 # Query for the dataset using queryDataIds(...).findDatasets(...) 

3572 # against only one of the two collections. This should work even 

3573 # though the relation returned by queryDataIds ends with 

3574 # iteration-engine region-filtering, because we can recognize before 

3575 # running the query that there is only one collecton to search and 

3576 # hence the (default) findFirst=True is irrelevant, and joining in the 

3577 # dataset query commutes past the iteration-engine postprocessing. 

3578 query1 = registry.queryDataIds( 

3579 {"visit", "patch"}, visit=data_id["visit"], instrument=data_id["instrument"] 

3580 ) 

3581 self.assertEqual( 

3582 set(query1.findDatasets(dataset_type.name, collections=["run1"])), 

3583 {ref1}, 

3584 ) 

3585 # Query for the dataset using queryDataIds(...).findDatasets(...) 

3586 # against both collections. This can only work if the FindFirstDataset 

3587 # operation can be commuted past the iteration-engine options into SQL. 

3588 query2 = registry.queryDataIds( 

3589 {"visit", "patch"}, visit=data_id["visit"], instrument=data_id["instrument"] 

3590 ) 

3591 self.assertEqual( 

3592 set(query2.findDatasets(dataset_type.name, collections=["run2", "run1"])), 

3593 {ref2}, 

3594 ) 

3595 

3596 def test_query_empty_collections(self) -> None: 

3597 """Test for registry query methods with empty collections. The methods 

3598 should return empty result set (or None when applicable) and provide 

3599 "doomed" diagnostics. 

3600 """ 

3601 registry = self.makeRegistry() 

3602 self.loadData(registry, "base.yaml") 

3603 self.loadData(registry, "datasets.yaml") 

3604 

3605 # Tests for registry.findDataset() 

3606 with self.assertRaises(NoDefaultCollectionError): 

3607 registry.findDataset("bias", instrument="Cam1", detector=1) 

3608 self.assertIsNotNone(registry.findDataset("bias", instrument="Cam1", detector=1, collections=...)) 

3609 self.assertIsNone(registry.findDataset("bias", instrument="Cam1", detector=1, collections=[])) 

3610 

3611 # Tests for registry.queryDatasets() 

3612 with self.assertRaises(NoDefaultCollectionError): 

3613 registry.queryDatasets("bias") 

3614 self.assertTrue(list(registry.queryDatasets("bias", collections=...))) 

3615 

3616 result = registry.queryDatasets("bias", collections=[]) 

3617 self.assertEqual(len(list(result)), 0) 

3618 messages = list(result.explain_no_results()) 

3619 self.assertTrue(messages) 

3620 self.assertTrue(any("because collection list is empty" in message for message in messages)) 

3621 

3622 # Tests for registry.queryDataIds() 

3623 with self.assertRaises(NoDefaultCollectionError): 

3624 registry.queryDataIds("detector", datasets="bias") 

3625 self.assertTrue(list(registry.queryDataIds("detector", datasets="bias", collections=...))) 

3626 

3627 result = registry.queryDataIds("detector", datasets="bias", collections=[]) 

3628 self.assertEqual(len(list(result)), 0) 

3629 messages = list(result.explain_no_results()) 

3630 self.assertTrue(messages) 

3631 self.assertTrue(any("because collection list is empty" in message for message in messages)) 

3632 

3633 # Tests for registry.queryDimensionRecords() 

3634 with self.assertRaises(NoDefaultCollectionError): 

3635 registry.queryDimensionRecords("detector", datasets="bias") 

3636 self.assertTrue(list(registry.queryDimensionRecords("detector", datasets="bias", collections=...))) 

3637 

3638 result = registry.queryDimensionRecords("detector", datasets="bias", collections=[]) 

3639 self.assertEqual(len(list(result)), 0) 

3640 messages = list(result.explain_no_results()) 

3641 self.assertTrue(messages) 

3642 self.assertTrue(any("because collection list is empty" in message for message in messages)) 

3643 

3644 def test_dataset_followup_spatial_joins(self) -> None: 

3645 """Test queryDataIds(...).findRelatedDatasets(...) where a spatial join 

3646 is involved. 

3647 """ 

3648 registry = self.makeRegistry() 

3649 self.loadData(registry, "base.yaml") 

3650 self.loadData(registry, "spatial.yaml") 

3651 pvi_dataset_type = DatasetType( 

3652 "pvi", {"visit", "detector"}, storageClass="StructuredDataDict", universe=registry.dimensions 

3653 ) 

3654 registry.registerDatasetType(pvi_dataset_type) 

3655 collection = "datasets" 

3656 registry.registerRun(collection) 

3657 (pvi1,) = registry.insertDatasets( 

3658 pvi_dataset_type, [{"instrument": "Cam1", "visit": 1, "detector": 1}], run=collection 

3659 ) 

3660 (pvi2,) = registry.insertDatasets( 

3661 pvi_dataset_type, [{"instrument": "Cam1", "visit": 1, "detector": 2}], run=collection 

3662 ) 

3663 (pvi3,) = registry.insertDatasets( 

3664 pvi_dataset_type, [{"instrument": "Cam1", "visit": 1, "detector": 3}], run=collection 

3665 ) 

3666 self.assertEqual( 

3667 set( 

3668 registry.queryDataIds(["patch"], skymap="SkyMap1", tract=0) 

3669 .expanded() 

3670 .findRelatedDatasets("pvi", [collection]) 

3671 ), 

3672 { 

3673 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=0), pvi1), 

3674 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=0), pvi2), 

3675 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=1), pvi2), 

3676 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=2), pvi1), 

3677 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=2), pvi2), 

3678 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=2), pvi3), 

3679 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=3), pvi2), 

3680 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=4), pvi3), 

3681 }, 

3682 )