Coverage for python/lsst/daf/butler/registry/tests/_registry.py: 5%

1495 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-10-25 15:14 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = ["RegistryTests"] 

24 

25import itertools 

26import logging 

27import os 

28import re 

29import unittest 

30import uuid 

31from abc import ABC, abstractmethod 

32from collections import defaultdict, namedtuple 

33from collections.abc import Iterator 

34from datetime import datetime, timedelta 

35from typing import TYPE_CHECKING 

36 

37import astropy.time 

38import sqlalchemy 

39 

40try: 

41 import numpy as np 

42except ImportError: 

43 np = None 

44 

45import lsst.sphgeom 

46from lsst.daf.relation import Relation, RelationalAlgebraError, Transfer, iteration, sql 

47 

48from ...core import ( 

49 DataCoordinate, 

50 DataCoordinateSet, 

51 DatasetAssociation, 

52 DatasetIdFactory, 

53 DatasetIdGenEnum, 

54 DatasetRef, 

55 DatasetType, 

56 DimensionGraph, 

57 NamedValueSet, 

58 SkyPixDimension, 

59 StorageClass, 

60 Timespan, 

61 ddl, 

62) 

63from .._collection_summary import CollectionSummary 

64from .._collectionType import CollectionType 

65from .._config import RegistryConfig 

66from .._exceptions import ( 

67 ArgumentError, 

68 CollectionError, 

69 CollectionTypeError, 

70 ConflictingDefinitionError, 

71 DataIdValueError, 

72 DatasetTypeError, 

73 InconsistentDataIdError, 

74 MissingCollectionError, 

75 MissingDatasetTypeError, 

76 NoDefaultCollectionError, 

77 OrphanedRecordError, 

78) 

79from ..interfaces import ButlerAttributeExistsError 

80 

81if TYPE_CHECKING: 

82 from .._registry import Registry 

83 

84 

85class RegistryTests(ABC): 

86 """Generic tests for the `Registry` class that can be subclassed to 

87 generate tests for different configurations. 

88 """ 

89 

90 collectionsManager: str | None = None 

91 """Name of the collections manager class, if subclass provides value for 

92 this member then it overrides name specified in default configuration 

93 (`str`). 

94 """ 

95 

96 datasetsManager: str | dict[str, str] | None = None 

97 """Name or configuration dictionary of the datasets manager class, if 

98 subclass provides value for this member then it overrides name specified 

99 in default configuration (`str` or `dict`). 

100 """ 

101 

102 @classmethod 

103 @abstractmethod 

104 def getDataDir(cls) -> str: 

105 """Return the root directory containing test data YAML files.""" 

106 raise NotImplementedError() 

107 

108 def makeRegistryConfig(self) -> RegistryConfig: 

109 """Create RegistryConfig used to create a registry. 

110 

111 This method should be called by a subclass from `makeRegistry`. 

112 Returned instance will be pre-configured based on the values of class 

113 members, and default-configured for all other parameters. Subclasses 

114 that need default configuration should just instantiate 

115 `RegistryConfig` directly. 

116 """ 

117 config = RegistryConfig() 

118 if self.collectionsManager: 

119 config["managers", "collections"] = self.collectionsManager 

120 if self.datasetsManager: 

121 config["managers", "datasets"] = self.datasetsManager 

122 return config 

123 

124 @abstractmethod 

125 def makeRegistry(self, share_repo_with: Registry | None = None) -> Registry | None: 

126 """Return the Registry instance to be tested. 

127 

128 Parameters 

129 ---------- 

130 share_repo_with : `Registry`, optional 

131 If provided, the new registry should point to the same data 

132 repository as this existing registry. 

133 

134 Returns 

135 ------- 

136 registry : `Registry` 

137 New `Registry` instance, or `None` *only* if `share_repo_with` is 

138 not `None` and this test case does not support that argument 

139 (e.g. it is impossible with in-memory SQLite DBs). 

140 """ 

141 raise NotImplementedError() 

142 

143 def loadData(self, registry: Registry, filename: str): 

144 """Load registry test data from ``getDataDir/<filename>``, 

145 which should be a YAML import/export file. 

146 """ 

147 from ...transfers import YamlRepoImportBackend 

148 

149 with open(os.path.join(self.getDataDir(), filename)) as stream: 

150 backend = YamlRepoImportBackend(stream, registry) 

151 backend.register() 

152 backend.load(datastore=None) 

153 

154 def checkQueryResults(self, results, expected): 

155 """Check that a query results object contains expected values. 

156 

157 Parameters 

158 ---------- 

159 results : `DataCoordinateQueryResults` or `DatasetQueryResults` 

160 A lazy-evaluation query results object. 

161 expected : `list` 

162 A list of `DataCoordinate` o `DatasetRef` objects that should be 

163 equal to results of the query, aside from ordering. 

164 """ 

165 self.assertCountEqual(list(results), expected) 

166 self.assertEqual(results.count(), len(expected)) 

167 if expected: 

168 self.assertTrue(results.any()) 

169 else: 

170 self.assertFalse(results.any()) 

171 

172 def testOpaque(self): 

173 """Tests for `Registry.registerOpaqueTable`, 

174 `Registry.insertOpaqueData`, `Registry.fetchOpaqueData`, and 

175 `Registry.deleteOpaqueData`. 

176 """ 

177 registry = self.makeRegistry() 

178 table = "opaque_table_for_testing" 

179 registry.registerOpaqueTable( 

180 table, 

181 spec=ddl.TableSpec( 

182 fields=[ 

183 ddl.FieldSpec("id", dtype=sqlalchemy.BigInteger, primaryKey=True), 

184 ddl.FieldSpec("name", dtype=sqlalchemy.String, length=16, nullable=False), 

185 ddl.FieldSpec("count", dtype=sqlalchemy.SmallInteger, nullable=True), 

186 ], 

187 ), 

188 ) 

189 rows = [ 

190 {"id": 1, "name": "one", "count": None}, 

191 {"id": 2, "name": "two", "count": 5}, 

192 {"id": 3, "name": "three", "count": 6}, 

193 ] 

194 registry.insertOpaqueData(table, *rows) 

195 self.assertCountEqual(rows, list(registry.fetchOpaqueData(table))) 

196 self.assertEqual(rows[0:1], list(registry.fetchOpaqueData(table, id=1))) 

197 self.assertEqual(rows[1:2], list(registry.fetchOpaqueData(table, name="two"))) 

198 self.assertEqual(rows[0:1], list(registry.fetchOpaqueData(table, id=(1, 3), name=("one", "two")))) 

199 self.assertEqual(rows, list(registry.fetchOpaqueData(table, id=(1, 2, 3)))) 

200 # Test very long IN clause which exceeds sqlite limit on number of 

201 # parameters. SQLite says the limit is 32k but it looks like it is 

202 # much higher. 

203 self.assertEqual(rows, list(registry.fetchOpaqueData(table, id=list(range(300_000))))) 

204 # Two IN clauses, each longer than 1k batch size, first with 

205 # duplicates, second has matching elements in different batches (after 

206 # sorting). 

207 self.assertEqual( 

208 rows[0:2], 

209 list( 

210 registry.fetchOpaqueData( 

211 table, 

212 id=list(range(1000)) + list(range(100, 0, -1)), 

213 name=["one"] + [f"q{i}" for i in range(2200)] + ["two"], 

214 ) 

215 ), 

216 ) 

217 self.assertEqual([], list(registry.fetchOpaqueData(table, id=1, name="two"))) 

218 registry.deleteOpaqueData(table, id=3) 

219 self.assertCountEqual(rows[:2], list(registry.fetchOpaqueData(table))) 

220 registry.deleteOpaqueData(table) 

221 self.assertEqual([], list(registry.fetchOpaqueData(table))) 

222 

223 def testDatasetType(self): 

224 """Tests for `Registry.registerDatasetType` and 

225 `Registry.getDatasetType`. 

226 """ 

227 registry = self.makeRegistry() 

228 # Check valid insert 

229 datasetTypeName = "test" 

230 storageClass = StorageClass("testDatasetType") 

231 registry.storageClasses.registerStorageClass(storageClass) 

232 dimensions = registry.dimensions.extract(("instrument", "visit")) 

233 differentDimensions = registry.dimensions.extract(("instrument", "patch")) 

234 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

235 # Inserting for the first time should return True 

236 self.assertTrue(registry.registerDatasetType(inDatasetType)) 

237 outDatasetType1 = registry.getDatasetType(datasetTypeName) 

238 self.assertEqual(outDatasetType1, inDatasetType) 

239 

240 # Re-inserting should work 

241 self.assertFalse(registry.registerDatasetType(inDatasetType)) 

242 # Except when they are not identical 

243 with self.assertRaises(ConflictingDefinitionError): 

244 nonIdenticalDatasetType = DatasetType(datasetTypeName, differentDimensions, storageClass) 

245 registry.registerDatasetType(nonIdenticalDatasetType) 

246 

247 # Template can be None 

248 datasetTypeName = "testNoneTemplate" 

249 storageClass = StorageClass("testDatasetType2") 

250 registry.storageClasses.registerStorageClass(storageClass) 

251 dimensions = registry.dimensions.extract(("instrument", "visit")) 

252 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

253 registry.registerDatasetType(inDatasetType) 

254 outDatasetType2 = registry.getDatasetType(datasetTypeName) 

255 self.assertEqual(outDatasetType2, inDatasetType) 

256 

257 allTypes = set(registry.queryDatasetTypes()) 

258 self.assertEqual(allTypes, {outDatasetType1, outDatasetType2}) 

259 

260 def testDimensions(self): 

261 """Tests for `Registry.insertDimensionData`, 

262 `Registry.syncDimensionData`, and `Registry.expandDataId`. 

263 """ 

264 registry = self.makeRegistry() 

265 dimensionName = "instrument" 

266 dimension = registry.dimensions[dimensionName] 

267 dimensionValue = { 

268 "name": "DummyCam", 

269 "visit_max": 10, 

270 "visit_system": 0, 

271 "exposure_max": 10, 

272 "detector_max": 2, 

273 "class_name": "lsst.pipe.base.Instrument", 

274 } 

275 registry.insertDimensionData(dimensionName, dimensionValue) 

276 # Inserting the same value twice should fail 

277 with self.assertRaises(sqlalchemy.exc.IntegrityError): 

278 registry.insertDimensionData(dimensionName, dimensionValue) 

279 # expandDataId should retrieve the record we just inserted 

280 self.assertEqual( 

281 registry.expandDataId(instrument="DummyCam", graph=dimension.graph) 

282 .records[dimensionName] 

283 .toDict(), 

284 dimensionValue, 

285 ) 

286 # expandDataId should raise if there is no record with the given ID. 

287 with self.assertRaises(DataIdValueError): 

288 registry.expandDataId({"instrument": "Unknown"}, graph=dimension.graph) 

289 # band doesn't have a table; insert should fail. 

290 with self.assertRaises(TypeError): 

291 registry.insertDimensionData("band", {"band": "i"}) 

292 dimensionName2 = "physical_filter" 

293 dimension2 = registry.dimensions[dimensionName2] 

294 dimensionValue2 = {"name": "DummyCam_i", "band": "i"} 

295 # Missing required dependency ("instrument") should fail 

296 with self.assertRaises(KeyError): 

297 registry.insertDimensionData(dimensionName2, dimensionValue2) 

298 # Adding required dependency should fix the failure 

299 dimensionValue2["instrument"] = "DummyCam" 

300 registry.insertDimensionData(dimensionName2, dimensionValue2) 

301 # expandDataId should retrieve the record we just inserted. 

302 self.assertEqual( 

303 registry.expandDataId(instrument="DummyCam", physical_filter="DummyCam_i", graph=dimension2.graph) 

304 .records[dimensionName2] 

305 .toDict(), 

306 dimensionValue2, 

307 ) 

308 # Use syncDimensionData to insert a new record successfully. 

309 dimensionName3 = "detector" 

310 dimensionValue3 = { 

311 "instrument": "DummyCam", 

312 "id": 1, 

313 "full_name": "one", 

314 "name_in_raft": "zero", 

315 "purpose": "SCIENCE", 

316 } 

317 self.assertTrue(registry.syncDimensionData(dimensionName3, dimensionValue3)) 

318 # Sync that again. Note that one field ("raft") is NULL, and that 

319 # should be okay. 

320 self.assertFalse(registry.syncDimensionData(dimensionName3, dimensionValue3)) 

321 # Now try that sync with the same primary key but a different value. 

322 # This should fail. 

323 with self.assertRaises(ConflictingDefinitionError): 

324 registry.syncDimensionData( 

325 dimensionName3, 

326 { 

327 "instrument": "DummyCam", 

328 "id": 1, 

329 "full_name": "one", 

330 "name_in_raft": "four", 

331 "purpose": "SCIENCE", 

332 }, 

333 ) 

334 

335 @unittest.skipIf(np is None, "numpy not available.") 

336 def testNumpyDataId(self): 

337 """Test that we can use a numpy int in a dataId.""" 

338 registry = self.makeRegistry() 

339 dimensionEntries = [ 

340 ("instrument", {"instrument": "DummyCam"}), 

341 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}), 

342 # Using an np.int64 here fails unless Records.fromDict is also 

343 # patched to look for numbers.Integral 

344 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}), 

345 ] 

346 for args in dimensionEntries: 

347 registry.insertDimensionData(*args) 

348 

349 # Try a normal integer and something that looks like an int but 

350 # is not. 

351 for visit_id in (42, np.int64(42)): 

352 with self.subTest(visit_id=visit_id, id_type=type(visit_id).__name__): 

353 expanded = registry.expandDataId({"instrument": "DummyCam", "visit": visit_id}) 

354 self.assertEqual(expanded["visit"], int(visit_id)) 

355 self.assertIsInstance(expanded["visit"], int) 

356 

357 def testDataIdRelationships(self): 

358 """Test that `Registry.expandDataId` raises an exception when the given 

359 keys are inconsistent. 

360 """ 

361 registry = self.makeRegistry() 

362 self.loadData(registry, "base.yaml") 

363 # Insert a few more dimension records for the next test. 

364 registry.insertDimensionData( 

365 "exposure", 

366 {"instrument": "Cam1", "id": 1, "obs_id": "one", "physical_filter": "Cam1-G"}, 

367 ) 

368 registry.insertDimensionData( 

369 "exposure", 

370 {"instrument": "Cam1", "id": 2, "obs_id": "two", "physical_filter": "Cam1-G"}, 

371 ) 

372 registry.insertDimensionData( 

373 "visit_system", 

374 {"instrument": "Cam1", "id": 0, "name": "one-to-one"}, 

375 ) 

376 registry.insertDimensionData( 

377 "visit", 

378 {"instrument": "Cam1", "id": 1, "name": "one", "physical_filter": "Cam1-G", "visit_system": 0}, 

379 ) 

380 registry.insertDimensionData( 

381 "visit_definition", 

382 {"instrument": "Cam1", "visit": 1, "exposure": 1, "visit_system": 0}, 

383 ) 

384 with self.assertRaises(InconsistentDataIdError): 

385 registry.expandDataId( 

386 {"instrument": "Cam1", "visit": 1, "exposure": 2}, 

387 ) 

388 

389 def testDataset(self): 

390 """Basic tests for `Registry.insertDatasets`, `Registry.getDataset`, 

391 and `Registry.removeDatasets`. 

392 """ 

393 registry = self.makeRegistry() 

394 self.loadData(registry, "base.yaml") 

395 run = "tésτ" 

396 registry.registerRun(run) 

397 datasetType = registry.getDatasetType("bias") 

398 dataId = {"instrument": "Cam1", "detector": 2} 

399 (ref,) = registry.insertDatasets(datasetType, dataIds=[dataId], run=run) 

400 outRef = registry.getDataset(ref.id) 

401 self.assertIsNotNone(ref.id) 

402 self.assertEqual(ref, outRef) 

403 with self.assertRaises(ConflictingDefinitionError): 

404 registry.insertDatasets(datasetType, dataIds=[dataId], run=run) 

405 registry.removeDatasets([ref]) 

406 self.assertIsNone(registry.findDataset(datasetType, dataId, collections=[run])) 

407 

408 def testFindDataset(self): 

409 """Tests for `Registry.findDataset`.""" 

410 registry = self.makeRegistry() 

411 self.loadData(registry, "base.yaml") 

412 run = "tésτ" 

413 datasetType = registry.getDatasetType("bias") 

414 dataId = {"instrument": "Cam1", "detector": 4} 

415 registry.registerRun(run) 

416 (inputRef,) = registry.insertDatasets(datasetType, dataIds=[dataId], run=run) 

417 outputRef = registry.findDataset(datasetType, dataId, collections=[run]) 

418 self.assertEqual(outputRef, inputRef) 

419 # Check that retrieval with invalid dataId raises 

420 with self.assertRaises(LookupError): 

421 dataId = {"instrument": "Cam1"} # no detector 

422 registry.findDataset(datasetType, dataId, collections=run) 

423 # Check that different dataIds match to different datasets 

424 dataId1 = {"instrument": "Cam1", "detector": 1} 

425 (inputRef1,) = registry.insertDatasets(datasetType, dataIds=[dataId1], run=run) 

426 dataId2 = {"instrument": "Cam1", "detector": 2} 

427 (inputRef2,) = registry.insertDatasets(datasetType, dataIds=[dataId2], run=run) 

428 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef1) 

429 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef2) 

430 self.assertNotEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef2) 

431 self.assertNotEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef1) 

432 # Check that requesting a non-existing dataId returns None 

433 nonExistingDataId = {"instrument": "Cam1", "detector": 3} 

434 self.assertIsNone(registry.findDataset(datasetType, nonExistingDataId, collections=run)) 

435 # Search more than one collection, in which two have the right 

436 # dataset type and another does not. 

437 registry.registerRun("empty") 

438 self.loadData(registry, "datasets-uuid.yaml") 

439 bias1 = registry.findDataset("bias", instrument="Cam1", detector=2, collections=["imported_g"]) 

440 self.assertIsNotNone(bias1) 

441 bias2 = registry.findDataset("bias", instrument="Cam1", detector=2, collections=["imported_r"]) 

442 self.assertIsNotNone(bias2) 

443 self.assertEqual( 

444 bias1, 

445 registry.findDataset( 

446 "bias", instrument="Cam1", detector=2, collections=["empty", "imported_g", "imported_r"] 

447 ), 

448 ) 

449 self.assertEqual( 

450 bias2, 

451 registry.findDataset( 

452 "bias", instrument="Cam1", detector=2, collections=["empty", "imported_r", "imported_g"] 

453 ), 

454 ) 

455 # Search more than one collection, with one of them a CALIBRATION 

456 # collection. 

457 registry.registerCollection("Cam1/calib", CollectionType.CALIBRATION) 

458 timespan = Timespan( 

459 begin=astropy.time.Time("2020-01-01T01:00:00", format="isot", scale="tai"), 

460 end=astropy.time.Time("2020-01-01T02:00:00", format="isot", scale="tai"), 

461 ) 

462 registry.certify("Cam1/calib", [bias2], timespan=timespan) 

463 self.assertEqual( 

464 bias1, 

465 registry.findDataset( 

466 "bias", 

467 instrument="Cam1", 

468 detector=2, 

469 collections=["empty", "imported_g", "Cam1/calib"], 

470 timespan=timespan, 

471 ), 

472 ) 

473 self.assertEqual( 

474 bias2, 

475 registry.findDataset( 

476 "bias", 

477 instrument="Cam1", 

478 detector=2, 

479 collections=["empty", "Cam1/calib", "imported_g"], 

480 timespan=timespan, 

481 ), 

482 ) 

483 # If we try to search those same collections without a timespan, it 

484 # should still work, since the CALIBRATION collection is ignored. 

485 self.assertEqual( 

486 bias1, 

487 registry.findDataset( 

488 "bias", instrument="Cam1", detector=2, collections=["empty", "imported_g", "Cam1/calib"] 

489 ), 

490 ) 

491 self.assertEqual( 

492 bias1, 

493 registry.findDataset( 

494 "bias", instrument="Cam1", detector=2, collections=["empty", "Cam1/calib", "imported_g"] 

495 ), 

496 ) 

497 

498 def testRemoveDatasetTypeSuccess(self): 

499 """Test that Registry.removeDatasetType works when there are no 

500 datasets of that type present. 

501 """ 

502 registry = self.makeRegistry() 

503 self.loadData(registry, "base.yaml") 

504 registry.removeDatasetType("flat") 

505 with self.assertRaises(MissingDatasetTypeError): 

506 registry.getDatasetType("flat") 

507 

508 def testRemoveDatasetTypeFailure(self): 

509 """Test that Registry.removeDatasetType raises when there are datasets 

510 of that type present or if the dataset type is for a component. 

511 """ 

512 registry = self.makeRegistry() 

513 self.loadData(registry, "base.yaml") 

514 self.loadData(registry, "datasets.yaml") 

515 with self.assertRaises(OrphanedRecordError): 

516 registry.removeDatasetType("flat") 

517 with self.assertRaises(ValueError): 

518 registry.removeDatasetType(DatasetType.nameWithComponent("flat", "image")) 

519 

520 def testImportDatasetsUUID(self): 

521 """Test for `Registry._importDatasets` with UUID dataset ID.""" 

522 if isinstance(self.datasetsManager, str): 

523 if not self.datasetsManager.endswith(".ByDimensionsDatasetRecordStorageManagerUUID"): 

524 self.skipTest(f"Unexpected dataset manager {self.datasetsManager}") 

525 elif isinstance(self.datasetsManager, dict) and not self.datasetsManager["cls"].endswith( 

526 ".ByDimensionsDatasetRecordStorageManagerUUID" 

527 ): 

528 self.skipTest(f"Unexpected dataset manager {self.datasetsManager['cls']}") 

529 

530 registry = self.makeRegistry() 

531 self.loadData(registry, "base.yaml") 

532 for run in range(6): 

533 registry.registerRun(f"run{run}") 

534 datasetTypeBias = registry.getDatasetType("bias") 

535 datasetTypeFlat = registry.getDatasetType("flat") 

536 dataIdBias1 = {"instrument": "Cam1", "detector": 1} 

537 dataIdBias2 = {"instrument": "Cam1", "detector": 2} 

538 dataIdFlat1 = {"instrument": "Cam1", "detector": 1, "physical_filter": "Cam1-G", "band": "g"} 

539 

540 ref = DatasetRef(datasetTypeBias, dataIdBias1, run="run0") 

541 (ref1,) = registry._importDatasets([ref]) 

542 # UUID is used without change 

543 self.assertEqual(ref.id, ref1.id) 

544 

545 # All different failure modes 

546 refs = ( 

547 # Importing same DatasetRef with different dataset ID is an error 

548 DatasetRef(datasetTypeBias, dataIdBias1, run="run0"), 

549 # Same DatasetId but different DataId 

550 DatasetRef(datasetTypeBias, dataIdBias2, id=ref1.id, run="run0"), 

551 DatasetRef(datasetTypeFlat, dataIdFlat1, id=ref1.id, run="run0"), 

552 # Same DatasetRef and DatasetId but different run 

553 DatasetRef(datasetTypeBias, dataIdBias1, id=ref1.id, run="run1"), 

554 ) 

555 for ref in refs: 

556 with self.assertRaises(ConflictingDefinitionError): 

557 registry._importDatasets([ref]) 

558 

559 # Test for non-unique IDs, they can be re-imported multiple times. 

560 for run, idGenMode in ((2, DatasetIdGenEnum.DATAID_TYPE), (4, DatasetIdGenEnum.DATAID_TYPE_RUN)): 

561 with self.subTest(idGenMode=idGenMode): 

562 # Make dataset ref with reproducible dataset ID. 

563 ref = DatasetRef(datasetTypeBias, dataIdBias1, run=f"run{run}", id_generation_mode=idGenMode) 

564 (ref1,) = registry._importDatasets([ref]) 

565 self.assertIsInstance(ref1.id, uuid.UUID) 

566 self.assertEqual(ref1.id.version, 5) 

567 self.assertEqual(ref1.id, ref.id) 

568 

569 # Importing it again is OK 

570 (ref2,) = registry._importDatasets([ref1]) 

571 self.assertEqual(ref2.id, ref1.id) 

572 

573 # Cannot import to different run with the same ID 

574 ref = DatasetRef(datasetTypeBias, dataIdBias1, id=ref1.id, run=f"run{run+1}") 

575 with self.assertRaises(ConflictingDefinitionError): 

576 registry._importDatasets([ref]) 

577 

578 ref = DatasetRef( 

579 datasetTypeBias, dataIdBias1, run=f"run{run+1}", id_generation_mode=idGenMode 

580 ) 

581 if idGenMode is DatasetIdGenEnum.DATAID_TYPE: 

582 # Cannot import same DATAID_TYPE ref into a new run 

583 with self.assertRaises(ConflictingDefinitionError): 

584 (ref2,) = registry._importDatasets([ref]) 

585 else: 

586 # DATAID_TYPE_RUN ref can be imported into a new run 

587 (ref2,) = registry._importDatasets([ref]) 

588 

589 def testDatasetTypeComponentQueries(self): 

590 """Test component options when querying for dataset types. 

591 

592 All of the behavior here is deprecated, so many of these tests are 

593 currently wrapped in a context to check that we get a warning whenever 

594 a component dataset is actually returned. 

595 """ 

596 registry = self.makeRegistry() 

597 self.loadData(registry, "base.yaml") 

598 self.loadData(registry, "datasets.yaml") 

599 # Test querying for dataset types with different inputs. 

600 # First query for all dataset types; components should only be included 

601 # when components=True. 

602 self.assertEqual({"bias", "flat"}, NamedValueSet(registry.queryDatasetTypes()).names) 

603 self.assertEqual({"bias", "flat"}, NamedValueSet(registry.queryDatasetTypes(components=False)).names) 

604 with self.assertWarns(FutureWarning): 

605 self.assertLess( 

606 {"bias", "flat", "bias.wcs", "flat.photoCalib"}, 

607 NamedValueSet(registry.queryDatasetTypes(components=True)).names, 

608 ) 

609 # Use a pattern that can match either parent or components. Again, 

610 # components are only returned if components=True. 

611 self.assertEqual({"bias"}, NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"))).names) 

612 self.assertEqual( 

613 {"bias"}, NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"), components=False)).names 

614 ) 

615 with self.assertWarns(FutureWarning): 

616 self.assertLess( 

617 {"bias", "bias.wcs"}, 

618 NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"), components=True)).names, 

619 ) 

620 # This pattern matches only a component. In this case we also return 

621 # that component dataset type if components=None. 

622 with self.assertWarns(FutureWarning): 

623 self.assertEqual( 

624 {"bias.wcs"}, 

625 NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"), components=None)).names, 

626 ) 

627 self.assertEqual( 

628 set(), 

629 NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"), components=False)).names, 

630 ) 

631 with self.assertWarns(FutureWarning): 

632 self.assertEqual( 

633 {"bias.wcs"}, 

634 NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"), components=True)).names, 

635 ) 

636 # Add a dataset type using a StorageClass that we'll then remove; check 

637 # that this does not affect our ability to query for dataset types 

638 # (though it will warn). 

639 tempStorageClass = StorageClass( 

640 name="TempStorageClass", 

641 components={ 

642 "data1": registry.storageClasses.getStorageClass("StructuredDataDict"), 

643 "data2": registry.storageClasses.getStorageClass("StructuredDataDict"), 

644 }, 

645 ) 

646 registry.storageClasses.registerStorageClass(tempStorageClass) 

647 datasetType = DatasetType( 

648 "temporary", 

649 dimensions=["instrument"], 

650 storageClass=tempStorageClass, 

651 universe=registry.dimensions, 

652 ) 

653 registry.registerDatasetType(datasetType) 

654 registry.storageClasses._unregisterStorageClass(tempStorageClass.name) 

655 datasetType._storageClass = None 

656 del tempStorageClass 

657 # Querying for all dataset types, including components, should include 

658 # at least all non-component dataset types (and I don't want to 

659 # enumerate all of the Exposure components for bias and flat here). 

660 with self.assertWarns(FutureWarning): 

661 with self.assertLogs("lsst.daf.butler.registry", logging.WARN) as cm: 

662 everything = NamedValueSet(registry.queryDatasetTypes(components=True)) 

663 self.assertIn("TempStorageClass", cm.output[0]) 

664 self.assertLess({"bias", "flat", "temporary"}, everything.names) 

665 # It should not include "temporary.columns", because we tried to remove 

666 # the storage class that would tell it about that. So if the next line 

667 # fails (i.e. "temporary.columns" _is_ in everything.names), it means 

668 # this part of the test isn't doing anything, because the _unregister 

669 # call about isn't simulating the real-life case we want it to 

670 # simulate, in which different versions of daf_butler in entirely 

671 # different Python processes interact with the same repo. 

672 self.assertNotIn("temporary.data", everything.names) 

673 # Query for dataset types that start with "temp". This should again 

674 # not include the component, and also not fail. 

675 with self.assertLogs("lsst.daf.butler.registry", logging.WARN) as cm: 

676 startsWithTemp = NamedValueSet(registry.queryDatasetTypes(re.compile("temp.*"), components=True)) 

677 self.assertIn("TempStorageClass", cm.output[0]) 

678 self.assertEqual({"temporary"}, startsWithTemp.names) 

679 # Querying with no components should not warn at all. 

680 with self.assertLogs("lsst.daf.butler.registries", logging.WARN) as cm: 

681 startsWithTemp = NamedValueSet(registry.queryDatasetTypes(re.compile("temp.*"), components=False)) 

682 # Must issue a warning of our own to be captured. 

683 logging.getLogger("lsst.daf.butler.registries").warning("test message") 

684 self.assertEqual(len(cm.output), 1) 

685 self.assertIn("test message", cm.output[0]) 

686 

687 def testComponentLookups(self): 

688 """Test searching for component datasets via their parents. 

689 

690 All of the behavior here is deprecated, so many of these tests are 

691 currently wrapped in a context to check that we get a warning whenever 

692 a component dataset is actually returned. 

693 """ 

694 registry = self.makeRegistry() 

695 self.loadData(registry, "base.yaml") 

696 self.loadData(registry, "datasets.yaml") 

697 # Test getting the child dataset type (which does still exist in the 

698 # Registry), and check for consistency with 

699 # DatasetRef.makeComponentRef. 

700 collection = "imported_g" 

701 parentType = registry.getDatasetType("bias") 

702 childType = registry.getDatasetType("bias.wcs") 

703 parentRefResolved = registry.findDataset( 

704 parentType, collections=collection, instrument="Cam1", detector=1 

705 ) 

706 self.assertIsInstance(parentRefResolved, DatasetRef) 

707 self.assertEqual(childType, parentRefResolved.makeComponentRef("wcs").datasetType) 

708 # Search for a single dataset with findDataset. 

709 childRef1 = registry.findDataset("bias.wcs", collections=collection, dataId=parentRefResolved.dataId) 

710 self.assertEqual(childRef1, parentRefResolved.makeComponentRef("wcs")) 

711 # Search for detector data IDs constrained by component dataset 

712 # existence with queryDataIds. 

713 with self.assertWarns(FutureWarning): 

714 dataIds = registry.queryDataIds( 

715 ["detector"], 

716 datasets=["bias.wcs"], 

717 collections=collection, 

718 ).toSet() 

719 self.assertEqual( 

720 dataIds, 

721 DataCoordinateSet( 

722 { 

723 DataCoordinate.standardize(instrument="Cam1", detector=d, graph=parentType.dimensions) 

724 for d in (1, 2, 3) 

725 }, 

726 parentType.dimensions, 

727 ), 

728 ) 

729 # Search for multiple datasets of a single type with queryDatasets. 

730 with self.assertWarns(FutureWarning): 

731 childRefs2 = set( 

732 registry.queryDatasets( 

733 "bias.wcs", 

734 collections=collection, 

735 ) 

736 ) 

737 self.assertEqual({ref.datasetType for ref in childRefs2}, {childType}) 

738 self.assertEqual({ref.dataId for ref in childRefs2}, set(dataIds)) 

739 

740 def testCollections(self): 

741 """Tests for registry methods that manage collections.""" 

742 registry = self.makeRegistry() 

743 other_registry = self.makeRegistry(share_repo_with=registry) 

744 self.loadData(registry, "base.yaml") 

745 self.loadData(registry, "datasets.yaml") 

746 run1 = "imported_g" 

747 run2 = "imported_r" 

748 # Test setting a collection docstring after it has been created. 

749 registry.setCollectionDocumentation(run1, "doc for run1") 

750 self.assertEqual(registry.getCollectionDocumentation(run1), "doc for run1") 

751 registry.setCollectionDocumentation(run1, None) 

752 self.assertIsNone(registry.getCollectionDocumentation(run1)) 

753 datasetType = "bias" 

754 # Find some datasets via their run's collection. 

755 dataId1 = {"instrument": "Cam1", "detector": 1} 

756 ref1 = registry.findDataset(datasetType, dataId1, collections=run1) 

757 self.assertIsNotNone(ref1) 

758 dataId2 = {"instrument": "Cam1", "detector": 2} 

759 ref2 = registry.findDataset(datasetType, dataId2, collections=run1) 

760 self.assertIsNotNone(ref2) 

761 # Associate those into a new collection, then look for them there. 

762 tag1 = "tag1" 

763 registry.registerCollection(tag1, type=CollectionType.TAGGED, doc="doc for tag1") 

764 # Check that we can query for old and new collections by type. 

765 self.assertEqual(set(registry.queryCollections(collectionTypes=CollectionType.RUN)), {run1, run2}) 

766 self.assertEqual( 

767 set(registry.queryCollections(collectionTypes={CollectionType.TAGGED, CollectionType.RUN})), 

768 {tag1, run1, run2}, 

769 ) 

770 self.assertEqual(registry.getCollectionDocumentation(tag1), "doc for tag1") 

771 registry.associate(tag1, [ref1, ref2]) 

772 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1) 

773 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

774 # Disassociate one and verify that we can't it there anymore... 

775 registry.disassociate(tag1, [ref1]) 

776 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=tag1)) 

777 # ...but we can still find ref2 in tag1, and ref1 in the run. 

778 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run1), ref1) 

779 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

780 collections = set(registry.queryCollections()) 

781 self.assertEqual(collections, {run1, run2, tag1}) 

782 # Associate both refs into tag1 again; ref2 is already there, but that 

783 # should be a harmless no-op. 

784 registry.associate(tag1, [ref1, ref2]) 

785 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1) 

786 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

787 # Get a different dataset (from a different run) that has the same 

788 # dataset type and data ID as ref2. 

789 ref2b = registry.findDataset(datasetType, dataId2, collections=run2) 

790 self.assertNotEqual(ref2, ref2b) 

791 # Attempting to associate that into tag1 should be an error. 

792 with self.assertRaises(ConflictingDefinitionError): 

793 registry.associate(tag1, [ref2b]) 

794 # That error shouldn't have messed up what we had before. 

795 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1) 

796 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

797 # Attempt to associate the conflicting dataset again, this time with 

798 # a dataset that isn't in the collection and won't cause a conflict. 

799 # Should also fail without modifying anything. 

800 dataId3 = {"instrument": "Cam1", "detector": 3} 

801 ref3 = registry.findDataset(datasetType, dataId3, collections=run1) 

802 with self.assertRaises(ConflictingDefinitionError): 

803 registry.associate(tag1, [ref3, ref2b]) 

804 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1) 

805 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

806 self.assertIsNone(registry.findDataset(datasetType, dataId3, collections=tag1)) 

807 # Register a chained collection that searches [tag1, run2] 

808 chain1 = "chain1" 

809 registry.registerCollection(chain1, type=CollectionType.CHAINED) 

810 self.assertIs(registry.getCollectionType(chain1), CollectionType.CHAINED) 

811 # Chained collection exists, but has no collections in it. 

812 self.assertFalse(registry.getCollectionChain(chain1)) 

813 # If we query for all collections, we should get the chained collection 

814 # only if we don't ask to flatten it (i.e. yield only its children). 

815 self.assertEqual(set(registry.queryCollections(flattenChains=False)), {tag1, run1, run2, chain1}) 

816 self.assertEqual(set(registry.queryCollections(flattenChains=True)), {tag1, run1, run2}) 

817 # Attempt to set its child collections to something circular; that 

818 # should fail. 

819 with self.assertRaises(ValueError): 

820 registry.setCollectionChain(chain1, [tag1, chain1]) 

821 # Add the child collections. 

822 registry.setCollectionChain(chain1, [tag1, run2]) 

823 self.assertEqual(list(registry.getCollectionChain(chain1)), [tag1, run2]) 

824 self.assertEqual(registry.getCollectionParentChains(tag1), {chain1}) 

825 self.assertEqual(registry.getCollectionParentChains(run2), {chain1}) 

826 # Refresh the other registry that points to the same repo, and make 

827 # sure it can see the things we've done (note that this does require 

828 # an explicit refresh(); that's the documented behavior, because 

829 # caching is ~impossible otherwise). 

830 if other_registry is not None: 

831 other_registry.refresh() 

832 self.assertEqual(list(other_registry.getCollectionChain(chain1)), [tag1, run2]) 

833 self.assertEqual(other_registry.getCollectionParentChains(tag1), {chain1}) 

834 self.assertEqual(other_registry.getCollectionParentChains(run2), {chain1}) 

835 # Searching for dataId1 or dataId2 in the chain should return ref1 and 

836 # ref2, because both are in tag1. 

837 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain1), ref1) 

838 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain1), ref2) 

839 # Now disassociate ref2 from tag1. The search (for bias) with 

840 # dataId2 in chain1 should then: 

841 # 1. not find it in tag1 

842 # 2. find a different dataset in run2 

843 registry.disassociate(tag1, [ref2]) 

844 ref2b = registry.findDataset(datasetType, dataId2, collections=chain1) 

845 self.assertNotEqual(ref2b, ref2) 

846 self.assertEqual(ref2b, registry.findDataset(datasetType, dataId2, collections=run2)) 

847 # Define a new chain so we can test recursive chains. 

848 chain2 = "chain2" 

849 registry.registerCollection(chain2, type=CollectionType.CHAINED) 

850 registry.setCollectionChain(chain2, [run2, chain1]) 

851 self.assertEqual(registry.getCollectionParentChains(chain1), {chain2}) 

852 self.assertEqual(registry.getCollectionParentChains(run2), {chain1, chain2}) 

853 # Query for collections matching a regex. 

854 self.assertCountEqual( 

855 list(registry.queryCollections(re.compile("imported_."), flattenChains=False)), 

856 ["imported_r", "imported_g"], 

857 ) 

858 # Query for collections matching a regex or an explicit str. 

859 self.assertCountEqual( 

860 list(registry.queryCollections([re.compile("imported_."), "chain1"], flattenChains=False)), 

861 ["imported_r", "imported_g", "chain1"], 

862 ) 

863 # Search for bias with dataId1 should find it via tag1 in chain2, 

864 # recursing, because is not in run1. 

865 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=run2)) 

866 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain2), ref1) 

867 # Search for bias with dataId2 should find it in run2 (ref2b). 

868 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain2), ref2b) 

869 # Search for a flat that is in run2. That should not be found 

870 # at the front of chain2, because of the restriction to bias 

871 # on run2 there, but it should be found in at the end of chain1. 

872 dataId4 = {"instrument": "Cam1", "detector": 3, "physical_filter": "Cam1-R2"} 

873 ref4 = registry.findDataset("flat", dataId4, collections=run2) 

874 self.assertIsNotNone(ref4) 

875 self.assertEqual(ref4, registry.findDataset("flat", dataId4, collections=chain2)) 

876 # Deleting a collection that's part of a CHAINED collection is not 

877 # allowed, and is exception-safe. 

878 with self.assertRaises(sqlalchemy.exc.IntegrityError): 

879 registry.removeCollection(run2) 

880 self.assertEqual(registry.getCollectionType(run2), CollectionType.RUN) 

881 with self.assertRaises(sqlalchemy.exc.IntegrityError): 

882 registry.removeCollection(chain1) 

883 self.assertEqual(registry.getCollectionType(chain1), CollectionType.CHAINED) 

884 # Actually remove chain2, test that it's gone by asking for its type. 

885 registry.removeCollection(chain2) 

886 with self.assertRaises(MissingCollectionError): 

887 registry.getCollectionType(chain2) 

888 # Actually remove run2 and chain1, which should work now. 

889 registry.removeCollection(chain1) 

890 registry.removeCollection(run2) 

891 with self.assertRaises(MissingCollectionError): 

892 registry.getCollectionType(run2) 

893 with self.assertRaises(MissingCollectionError): 

894 registry.getCollectionType(chain1) 

895 # Remove tag1 as well, just to test that we can remove TAGGED 

896 # collections. 

897 registry.removeCollection(tag1) 

898 with self.assertRaises(MissingCollectionError): 

899 registry.getCollectionType(tag1) 

900 

901 def testCollectionChainFlatten(self): 

902 """Test that Registry.setCollectionChain obeys its 'flatten' option.""" 

903 registry = self.makeRegistry() 

904 registry.registerCollection("inner", CollectionType.CHAINED) 

905 registry.registerCollection("innermost", CollectionType.RUN) 

906 registry.setCollectionChain("inner", ["innermost"]) 

907 registry.registerCollection("outer", CollectionType.CHAINED) 

908 registry.setCollectionChain("outer", ["inner"], flatten=False) 

909 self.assertEqual(list(registry.getCollectionChain("outer")), ["inner"]) 

910 registry.setCollectionChain("outer", ["inner"], flatten=True) 

911 self.assertEqual(list(registry.getCollectionChain("outer")), ["innermost"]) 

912 

913 def testBasicTransaction(self): 

914 """Test that all operations within a single transaction block are 

915 rolled back if an exception propagates out of the block. 

916 """ 

917 registry = self.makeRegistry() 

918 storageClass = StorageClass("testDatasetType") 

919 registry.storageClasses.registerStorageClass(storageClass) 

920 with registry.transaction(): 

921 registry.insertDimensionData("instrument", {"name": "Cam1", "class_name": "A"}) 

922 with self.assertRaises(ValueError): 

923 with registry.transaction(): 

924 registry.insertDimensionData("instrument", {"name": "Cam2"}) 

925 raise ValueError("Oops, something went wrong") 

926 # Cam1 should exist 

927 self.assertEqual(registry.expandDataId(instrument="Cam1").records["instrument"].class_name, "A") 

928 # But Cam2 and Cam3 should both not exist 

929 with self.assertRaises(DataIdValueError): 

930 registry.expandDataId(instrument="Cam2") 

931 with self.assertRaises(DataIdValueError): 

932 registry.expandDataId(instrument="Cam3") 

933 

934 def testNestedTransaction(self): 

935 """Test that operations within a transaction block are not rolled back 

936 if an exception propagates out of an inner transaction block and is 

937 then caught. 

938 """ 

939 registry = self.makeRegistry() 

940 dimension = registry.dimensions["instrument"] 

941 dataId1 = {"instrument": "DummyCam"} 

942 dataId2 = {"instrument": "DummyCam2"} 

943 checkpointReached = False 

944 with registry.transaction(): 

945 # This should be added and (ultimately) committed. 

946 registry.insertDimensionData(dimension, dataId1) 

947 with self.assertRaises(sqlalchemy.exc.IntegrityError): 

948 with registry.transaction(savepoint=True): 

949 # This does not conflict, and should succeed (but not 

950 # be committed). 

951 registry.insertDimensionData(dimension, dataId2) 

952 checkpointReached = True 

953 # This should conflict and raise, triggerring a rollback 

954 # of the previous insertion within the same transaction 

955 # context, but not the original insertion in the outer 

956 # block. 

957 registry.insertDimensionData(dimension, dataId1) 

958 self.assertTrue(checkpointReached) 

959 self.assertIsNotNone(registry.expandDataId(dataId1, graph=dimension.graph)) 

960 with self.assertRaises(DataIdValueError): 

961 registry.expandDataId(dataId2, graph=dimension.graph) 

962 

963 def testInstrumentDimensions(self): 

964 """Test queries involving only instrument dimensions, with no joins to 

965 skymap. 

966 """ 

967 registry = self.makeRegistry() 

968 

969 # need a bunch of dimensions and datasets for test 

970 registry.insertDimensionData( 

971 "instrument", dict(name="DummyCam", visit_max=25, exposure_max=300, detector_max=6) 

972 ) 

973 registry.insertDimensionData( 

974 "physical_filter", 

975 dict(instrument="DummyCam", name="dummy_r", band="r"), 

976 dict(instrument="DummyCam", name="dummy_i", band="i"), 

977 ) 

978 registry.insertDimensionData( 

979 "detector", *[dict(instrument="DummyCam", id=i, full_name=str(i)) for i in range(1, 6)] 

980 ) 

981 registry.insertDimensionData( 

982 "visit_system", 

983 dict(instrument="DummyCam", id=1, name="default"), 

984 ) 

985 registry.insertDimensionData( 

986 "visit", 

987 dict(instrument="DummyCam", id=10, name="ten", physical_filter="dummy_i", visit_system=1), 

988 dict(instrument="DummyCam", id=11, name="eleven", physical_filter="dummy_r", visit_system=1), 

989 dict(instrument="DummyCam", id=20, name="twelve", physical_filter="dummy_r", visit_system=1), 

990 ) 

991 for i in range(1, 6): 

992 registry.insertDimensionData( 

993 "visit_detector_region", 

994 dict(instrument="DummyCam", visit=10, detector=i), 

995 dict(instrument="DummyCam", visit=11, detector=i), 

996 dict(instrument="DummyCam", visit=20, detector=i), 

997 ) 

998 registry.insertDimensionData( 

999 "exposure", 

1000 dict(instrument="DummyCam", id=100, obs_id="100", physical_filter="dummy_i"), 

1001 dict(instrument="DummyCam", id=101, obs_id="101", physical_filter="dummy_i"), 

1002 dict(instrument="DummyCam", id=110, obs_id="110", physical_filter="dummy_r"), 

1003 dict(instrument="DummyCam", id=111, obs_id="111", physical_filter="dummy_r"), 

1004 dict(instrument="DummyCam", id=200, obs_id="200", physical_filter="dummy_r"), 

1005 dict(instrument="DummyCam", id=201, obs_id="201", physical_filter="dummy_r"), 

1006 ) 

1007 registry.insertDimensionData( 

1008 "visit_definition", 

1009 dict(instrument="DummyCam", exposure=100, visit_system=1, visit=10), 

1010 dict(instrument="DummyCam", exposure=101, visit_system=1, visit=10), 

1011 dict(instrument="DummyCam", exposure=110, visit_system=1, visit=11), 

1012 dict(instrument="DummyCam", exposure=111, visit_system=1, visit=11), 

1013 dict(instrument="DummyCam", exposure=200, visit_system=1, visit=20), 

1014 dict(instrument="DummyCam", exposure=201, visit_system=1, visit=20), 

1015 ) 

1016 # dataset types 

1017 run1 = "test1_r" 

1018 run2 = "test2_r" 

1019 tagged2 = "test2_t" 

1020 registry.registerRun(run1) 

1021 registry.registerRun(run2) 

1022 registry.registerCollection(tagged2) 

1023 storageClass = StorageClass("testDataset") 

1024 registry.storageClasses.registerStorageClass(storageClass) 

1025 rawType = DatasetType( 

1026 name="RAW", 

1027 dimensions=registry.dimensions.extract(("instrument", "exposure", "detector")), 

1028 storageClass=storageClass, 

1029 ) 

1030 registry.registerDatasetType(rawType) 

1031 calexpType = DatasetType( 

1032 name="CALEXP", 

1033 dimensions=registry.dimensions.extract(("instrument", "visit", "detector")), 

1034 storageClass=storageClass, 

1035 ) 

1036 registry.registerDatasetType(calexpType) 

1037 

1038 # add pre-existing datasets 

1039 for exposure in (100, 101, 110, 111): 

1040 for detector in (1, 2, 3): 

1041 # note that only 3 of 5 detectors have datasets 

1042 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector) 

1043 (ref,) = registry.insertDatasets(rawType, dataIds=[dataId], run=run1) 

1044 # exposures 100 and 101 appear in both run1 and tagged2. 

1045 # 100 has different datasets in the different collections 

1046 # 101 has the same dataset in both collections. 

1047 if exposure == 100: 

1048 (ref,) = registry.insertDatasets(rawType, dataIds=[dataId], run=run2) 

1049 if exposure in (100, 101): 

1050 registry.associate(tagged2, [ref]) 

1051 # Add pre-existing datasets to tagged2. 

1052 for exposure in (200, 201): 

1053 for detector in (3, 4, 5): 

1054 # note that only 3 of 5 detectors have datasets 

1055 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector) 

1056 (ref,) = registry.insertDatasets(rawType, dataIds=[dataId], run=run2) 

1057 registry.associate(tagged2, [ref]) 

1058 

1059 dimensions = DimensionGraph( 

1060 registry.dimensions, dimensions=(rawType.dimensions.required | calexpType.dimensions.required) 

1061 ) 

1062 # Test that single dim string works as well as list of str 

1063 rows = registry.queryDataIds("visit", datasets=rawType, collections=run1).expanded().toSet() 

1064 rowsI = registry.queryDataIds(["visit"], datasets=rawType, collections=run1).expanded().toSet() 

1065 self.assertEqual(rows, rowsI) 

1066 # with empty expression 

1067 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1).expanded().toSet() 

1068 self.assertEqual(len(rows), 4 * 3) # 4 exposures times 3 detectors 

1069 self.assertCountEqual({dataId["exposure"] for dataId in rows}, (100, 101, 110, 111)) 

1070 self.assertCountEqual({dataId["visit"] for dataId in rows}, (10, 11)) 

1071 self.assertCountEqual({dataId["detector"] for dataId in rows}, (1, 2, 3)) 

1072 

1073 # second collection 

1074 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=tagged2).toSet() 

1075 self.assertEqual(len(rows), 4 * 3) # 4 exposures times 3 detectors 

1076 for dataId in rows: 

1077 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit")) 

1078 self.assertCountEqual({dataId["exposure"] for dataId in rows}, (100, 101, 200, 201)) 

1079 self.assertCountEqual({dataId["visit"] for dataId in rows}, (10, 20)) 

1080 self.assertCountEqual({dataId["detector"] for dataId in rows}, (1, 2, 3, 4, 5)) 

1081 

1082 # with two input datasets 

1083 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=[run1, tagged2]).toSet() 

1084 self.assertEqual(len(set(rows)), 6 * 3) # 6 exposures times 3 detectors; set needed to de-dupe 

1085 for dataId in rows: 

1086 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit")) 

1087 self.assertCountEqual({dataId["exposure"] for dataId in rows}, (100, 101, 110, 111, 200, 201)) 

1088 self.assertCountEqual({dataId["visit"] for dataId in rows}, (10, 11, 20)) 

1089 self.assertCountEqual({dataId["detector"] for dataId in rows}, (1, 2, 3, 4, 5)) 

1090 

1091 # limit to single visit 

1092 rows = registry.queryDataIds( 

1093 dimensions, datasets=rawType, collections=run1, where="visit = 10", instrument="DummyCam" 

1094 ).toSet() 

1095 self.assertEqual(len(rows), 2 * 3) # 2 exposures times 3 detectors 

1096 self.assertCountEqual({dataId["exposure"] for dataId in rows}, (100, 101)) 

1097 self.assertCountEqual({dataId["visit"] for dataId in rows}, (10,)) 

1098 self.assertCountEqual({dataId["detector"] for dataId in rows}, (1, 2, 3)) 

1099 

1100 # more limiting expression, using link names instead of Table.column 

1101 rows = registry.queryDataIds( 

1102 dimensions, 

1103 datasets=rawType, 

1104 collections=run1, 

1105 where="visit = 10 and detector > 1 and 'DummyCam'=instrument", 

1106 ).toSet() 

1107 self.assertEqual(len(rows), 2 * 2) # 2 exposures times 2 detectors 

1108 self.assertCountEqual({dataId["exposure"] for dataId in rows}, (100, 101)) 

1109 self.assertCountEqual({dataId["visit"] for dataId in rows}, (10,)) 

1110 self.assertCountEqual({dataId["detector"] for dataId in rows}, (2, 3)) 

1111 

1112 # queryDataIds with only one of `datasets` and `collections` is an 

1113 # error. 

1114 with self.assertRaises(CollectionError): 

1115 registry.queryDataIds(dimensions, datasets=rawType) 

1116 with self.assertRaises(ArgumentError): 

1117 registry.queryDataIds(dimensions, collections=run1) 

1118 

1119 # expression excludes everything 

1120 rows = registry.queryDataIds( 

1121 dimensions, datasets=rawType, collections=run1, where="visit > 1000", instrument="DummyCam" 

1122 ).toSet() 

1123 self.assertEqual(len(rows), 0) 

1124 

1125 # Selecting by physical_filter, this is not in the dimensions, but it 

1126 # is a part of the full expression so it should work too. 

1127 rows = registry.queryDataIds( 

1128 dimensions, 

1129 datasets=rawType, 

1130 collections=run1, 

1131 where="physical_filter = 'dummy_r'", 

1132 instrument="DummyCam", 

1133 ).toSet() 

1134 self.assertEqual(len(rows), 2 * 3) # 2 exposures times 3 detectors 

1135 self.assertCountEqual({dataId["exposure"] for dataId in rows}, (110, 111)) 

1136 self.assertCountEqual({dataId["visit"] for dataId in rows}, (11,)) 

1137 self.assertCountEqual({dataId["detector"] for dataId in rows}, (1, 2, 3)) 

1138 

1139 def testSkyMapDimensions(self): 

1140 """Tests involving only skymap dimensions, no joins to instrument.""" 

1141 registry = self.makeRegistry() 

1142 

1143 # need a bunch of dimensions and datasets for test, we want 

1144 # "band" in the test so also have to add physical_filter 

1145 # dimensions 

1146 registry.insertDimensionData("instrument", dict(instrument="DummyCam")) 

1147 registry.insertDimensionData( 

1148 "physical_filter", 

1149 dict(instrument="DummyCam", name="dummy_r", band="r"), 

1150 dict(instrument="DummyCam", name="dummy_i", band="i"), 

1151 ) 

1152 registry.insertDimensionData("skymap", dict(name="DummyMap", hash=b"sha!")) 

1153 for tract in range(10): 

1154 registry.insertDimensionData("tract", dict(skymap="DummyMap", id=tract)) 

1155 registry.insertDimensionData( 

1156 "patch", 

1157 *[dict(skymap="DummyMap", tract=tract, id=patch, cell_x=0, cell_y=0) for patch in range(10)], 

1158 ) 

1159 

1160 # dataset types 

1161 run = "tésτ" 

1162 registry.registerRun(run) 

1163 storageClass = StorageClass("testDataset") 

1164 registry.storageClasses.registerStorageClass(storageClass) 

1165 calexpType = DatasetType( 

1166 name="deepCoadd_calexp", 

1167 dimensions=registry.dimensions.extract(("skymap", "tract", "patch", "band")), 

1168 storageClass=storageClass, 

1169 ) 

1170 registry.registerDatasetType(calexpType) 

1171 mergeType = DatasetType( 

1172 name="deepCoadd_mergeDet", 

1173 dimensions=registry.dimensions.extract(("skymap", "tract", "patch")), 

1174 storageClass=storageClass, 

1175 ) 

1176 registry.registerDatasetType(mergeType) 

1177 measType = DatasetType( 

1178 name="deepCoadd_meas", 

1179 dimensions=registry.dimensions.extract(("skymap", "tract", "patch", "band")), 

1180 storageClass=storageClass, 

1181 ) 

1182 registry.registerDatasetType(measType) 

1183 

1184 dimensions = DimensionGraph( 

1185 registry.dimensions, 

1186 dimensions=( 

1187 calexpType.dimensions.required | mergeType.dimensions.required | measType.dimensions.required 

1188 ), 

1189 ) 

1190 

1191 # add pre-existing datasets 

1192 for tract in (1, 3, 5): 

1193 for patch in (2, 4, 6, 7): 

1194 dataId = dict(skymap="DummyMap", tract=tract, patch=patch) 

1195 registry.insertDatasets(mergeType, dataIds=[dataId], run=run) 

1196 for aFilter in ("i", "r"): 

1197 dataId = dict(skymap="DummyMap", tract=tract, patch=patch, band=aFilter) 

1198 registry.insertDatasets(calexpType, dataIds=[dataId], run=run) 

1199 

1200 # with empty expression 

1201 rows = registry.queryDataIds(dimensions, datasets=[calexpType, mergeType], collections=run).toSet() 

1202 self.assertEqual(len(rows), 3 * 4 * 2) # 4 tracts x 4 patches x 2 filters 

1203 for dataId in rows: 

1204 self.assertCountEqual(dataId.keys(), ("skymap", "tract", "patch", "band")) 

1205 self.assertCountEqual({dataId["tract"] for dataId in rows}, (1, 3, 5)) 

1206 self.assertCountEqual({dataId["patch"] for dataId in rows}, (2, 4, 6, 7)) 

1207 self.assertCountEqual({dataId["band"] for dataId in rows}, ("i", "r")) 

1208 

1209 # limit to 2 tracts and 2 patches 

1210 rows = registry.queryDataIds( 

1211 dimensions, 

1212 datasets=[calexpType, mergeType], 

1213 collections=run, 

1214 where="tract IN (1, 5) AND patch IN (2, 7)", 

1215 skymap="DummyMap", 

1216 ).toSet() 

1217 self.assertEqual(len(rows), 2 * 2 * 2) # 2 tracts x 2 patches x 2 filters 

1218 self.assertCountEqual({dataId["tract"] for dataId in rows}, (1, 5)) 

1219 self.assertCountEqual({dataId["patch"] for dataId in rows}, (2, 7)) 

1220 self.assertCountEqual({dataId["band"] for dataId in rows}, ("i", "r")) 

1221 

1222 # limit to single filter 

1223 rows = registry.queryDataIds( 

1224 dimensions, datasets=[calexpType, mergeType], collections=run, where="band = 'i'" 

1225 ).toSet() 

1226 self.assertEqual(len(rows), 3 * 4 * 1) # 4 tracts x 4 patches x 2 filters 

1227 self.assertCountEqual({dataId["tract"] for dataId in rows}, (1, 3, 5)) 

1228 self.assertCountEqual({dataId["patch"] for dataId in rows}, (2, 4, 6, 7)) 

1229 self.assertCountEqual({dataId["band"] for dataId in rows}, ("i",)) 

1230 

1231 # Specifying non-existing skymap is an exception 

1232 with self.assertRaisesRegex(DataIdValueError, "Unknown values specified for governor dimension"): 

1233 rows = registry.queryDataIds( 

1234 dimensions, datasets=[calexpType, mergeType], collections=run, where="skymap = 'Mars'" 

1235 ).toSet() 

1236 

1237 def testSpatialJoin(self): 

1238 """Test queries that involve spatial overlap joins.""" 

1239 registry = self.makeRegistry() 

1240 self.loadData(registry, "hsc-rc2-subset.yaml") 

1241 

1242 # Dictionary of spatial DatabaseDimensionElements, keyed by the name of 

1243 # the TopologicalFamily they belong to. We'll relate all elements in 

1244 # each family to all of the elements in each other family. 

1245 families = defaultdict(set) 

1246 # Dictionary of {element.name: {dataId: region}}. 

1247 regions = {} 

1248 for element in registry.dimensions.getDatabaseElements(): 

1249 if element.spatial is not None: 

1250 families[element.spatial.name].add(element) 

1251 regions[element.name] = { 

1252 record.dataId: record.region for record in registry.queryDimensionRecords(element) 

1253 } 

1254 

1255 # If this check fails, it's not necessarily a problem - it may just be 

1256 # a reasonable change to the default dimension definitions - but the 

1257 # test below depends on there being more than one family to do anything 

1258 # useful. 

1259 self.assertEqual(len(families), 2) 

1260 

1261 # Overlap DatabaseDimensionElements with each other. 

1262 for family1, family2 in itertools.combinations(families, 2): 

1263 for element1, element2 in itertools.product(families[family1], families[family2]): 

1264 graph = DimensionGraph.union(element1.graph, element2.graph) 

1265 # Construct expected set of overlapping data IDs via a 

1266 # brute-force comparison of the regions we've already fetched. 

1267 expected = { 

1268 DataCoordinate.standardize({**dataId1.byName(), **dataId2.byName()}, graph=graph) 

1269 for (dataId1, region1), (dataId2, region2) in itertools.product( 

1270 regions[element1.name].items(), regions[element2.name].items() 

1271 ) 

1272 if not region1.isDisjointFrom(region2) 

1273 } 

1274 self.assertGreater(len(expected), 2, msg="Test that we aren't just comparing empty sets.") 

1275 queried = set(registry.queryDataIds(graph)) 

1276 self.assertEqual(expected, queried) 

1277 

1278 # Overlap each DatabaseDimensionElement with the commonSkyPix system. 

1279 commonSkyPix = registry.dimensions.commonSkyPix 

1280 for elementName, these_regions in regions.items(): 

1281 graph = DimensionGraph.union(registry.dimensions[elementName].graph, commonSkyPix.graph) 

1282 expected = set() 

1283 for dataId, region in these_regions.items(): 

1284 for begin, end in commonSkyPix.pixelization.envelope(region): 

1285 expected.update( 

1286 DataCoordinate.standardize({commonSkyPix.name: index, **dataId.byName()}, graph=graph) 

1287 for index in range(begin, end) 

1288 ) 

1289 self.assertGreater(len(expected), 2, msg="Test that we aren't just comparing empty sets.") 

1290 queried = set(registry.queryDataIds(graph)) 

1291 self.assertEqual(expected, queried) 

1292 

1293 def testAbstractQuery(self): 

1294 """Test that we can run a query that just lists the known 

1295 bands. This is tricky because band is 

1296 backed by a query against physical_filter. 

1297 """ 

1298 registry = self.makeRegistry() 

1299 registry.insertDimensionData("instrument", dict(name="DummyCam")) 

1300 registry.insertDimensionData( 

1301 "physical_filter", 

1302 dict(instrument="DummyCam", name="dummy_i", band="i"), 

1303 dict(instrument="DummyCam", name="dummy_i2", band="i"), 

1304 dict(instrument="DummyCam", name="dummy_r", band="r"), 

1305 ) 

1306 rows = registry.queryDataIds(["band"]).toSet() 

1307 self.assertCountEqual( 

1308 rows, 

1309 [ 

1310 DataCoordinate.standardize(band="i", universe=registry.dimensions), 

1311 DataCoordinate.standardize(band="r", universe=registry.dimensions), 

1312 ], 

1313 ) 

1314 

1315 def testAttributeManager(self): 

1316 """Test basic functionality of attribute manager.""" 

1317 # number of attributes with schema versions in a fresh database, 

1318 # 6 managers with 2 records per manager, plus config for dimensions 

1319 VERSION_COUNT = 6 * 2 + 1 

1320 

1321 registry = self.makeRegistry() 

1322 attributes = registry._managers.attributes 

1323 

1324 # check what get() returns for non-existing key 

1325 self.assertIsNone(attributes.get("attr")) 

1326 self.assertEqual(attributes.get("attr", ""), "") 

1327 self.assertEqual(attributes.get("attr", "Value"), "Value") 

1328 self.assertEqual(len(list(attributes.items())), VERSION_COUNT) 

1329 

1330 # cannot store empty key or value 

1331 with self.assertRaises(ValueError): 

1332 attributes.set("", "value") 

1333 with self.assertRaises(ValueError): 

1334 attributes.set("attr", "") 

1335 

1336 # set value of non-existing key 

1337 attributes.set("attr", "value") 

1338 self.assertEqual(len(list(attributes.items())), VERSION_COUNT + 1) 

1339 self.assertEqual(attributes.get("attr"), "value") 

1340 

1341 # update value of existing key 

1342 with self.assertRaises(ButlerAttributeExistsError): 

1343 attributes.set("attr", "value2") 

1344 

1345 attributes.set("attr", "value2", force=True) 

1346 self.assertEqual(len(list(attributes.items())), VERSION_COUNT + 1) 

1347 self.assertEqual(attributes.get("attr"), "value2") 

1348 

1349 # delete existing key 

1350 self.assertTrue(attributes.delete("attr")) 

1351 self.assertEqual(len(list(attributes.items())), VERSION_COUNT) 

1352 

1353 # delete non-existing key 

1354 self.assertFalse(attributes.delete("non-attr")) 

1355 

1356 # store bunch of keys and get the list back 

1357 data = [ 

1358 ("version.core", "1.2.3"), 

1359 ("version.dimensions", "3.2.1"), 

1360 ("config.managers.opaque", "ByNameOpaqueTableStorageManager"), 

1361 ] 

1362 for key, value in data: 

1363 attributes.set(key, value) 

1364 items = dict(attributes.items()) 

1365 for key, value in data: 

1366 self.assertEqual(items[key], value) 

1367 

1368 def testQueryDatasetsDeduplication(self): 

1369 """Test that the findFirst option to queryDatasets selects datasets 

1370 from collections in the order given". 

1371 """ 

1372 registry = self.makeRegistry() 

1373 self.loadData(registry, "base.yaml") 

1374 self.loadData(registry, "datasets.yaml") 

1375 self.assertCountEqual( 

1376 list(registry.queryDatasets("bias", collections=["imported_g", "imported_r"])), 

1377 [ 

1378 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"), 

1379 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g"), 

1380 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g"), 

1381 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r"), 

1382 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r"), 

1383 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

1384 ], 

1385 ) 

1386 self.assertCountEqual( 

1387 list(registry.queryDatasets("bias", collections=["imported_g", "imported_r"], findFirst=True)), 

1388 [ 

1389 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"), 

1390 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g"), 

1391 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g"), 

1392 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

1393 ], 

1394 ) 

1395 self.assertCountEqual( 

1396 list(registry.queryDatasets("bias", collections=["imported_r", "imported_g"], findFirst=True)), 

1397 [ 

1398 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"), 

1399 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r"), 

1400 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r"), 

1401 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

1402 ], 

1403 ) 

1404 

1405 def testQueryResults(self): 

1406 """Test querying for data IDs and then manipulating the QueryResults 

1407 object returned to perform other queries. 

1408 """ 

1409 registry = self.makeRegistry() 

1410 self.loadData(registry, "base.yaml") 

1411 self.loadData(registry, "datasets.yaml") 

1412 bias = registry.getDatasetType("bias") 

1413 flat = registry.getDatasetType("flat") 

1414 # Obtain expected results from methods other than those we're testing 

1415 # here. That includes: 

1416 # - the dimensions of the data IDs we want to query: 

1417 expectedGraph = DimensionGraph(registry.dimensions, names=["detector", "physical_filter"]) 

1418 # - the dimensions of some other data IDs we'll extract from that: 

1419 expectedSubsetGraph = DimensionGraph(registry.dimensions, names=["detector"]) 

1420 # - the data IDs we expect to obtain from the first queries: 

1421 expectedDataIds = DataCoordinateSet( 

1422 { 

1423 DataCoordinate.standardize( 

1424 instrument="Cam1", detector=d, physical_filter=p, universe=registry.dimensions 

1425 ) 

1426 for d, p in itertools.product({1, 2, 3}, {"Cam1-G", "Cam1-R1", "Cam1-R2"}) 

1427 }, 

1428 graph=expectedGraph, 

1429 hasFull=False, 

1430 hasRecords=False, 

1431 ) 

1432 # - the flat datasets we expect to find from those data IDs, in just 

1433 # one collection (so deduplication is irrelevant): 

1434 expectedFlats = [ 

1435 registry.findDataset( 

1436 flat, instrument="Cam1", detector=1, physical_filter="Cam1-R1", collections="imported_r" 

1437 ), 

1438 registry.findDataset( 

1439 flat, instrument="Cam1", detector=2, physical_filter="Cam1-R1", collections="imported_r" 

1440 ), 

1441 registry.findDataset( 

1442 flat, instrument="Cam1", detector=3, physical_filter="Cam1-R2", collections="imported_r" 

1443 ), 

1444 ] 

1445 # - the data IDs we expect to extract from that: 

1446 expectedSubsetDataIds = expectedDataIds.subset(expectedSubsetGraph) 

1447 # - the bias datasets we expect to find from those data IDs, after we 

1448 # subset-out the physical_filter dimension, both with duplicates: 

1449 expectedAllBiases = [ 

1450 registry.findDataset(bias, instrument="Cam1", detector=1, collections="imported_g"), 

1451 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_g"), 

1452 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_g"), 

1453 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_r"), 

1454 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_r"), 

1455 ] 

1456 # - ...and without duplicates: 

1457 expectedDeduplicatedBiases = [ 

1458 registry.findDataset(bias, instrument="Cam1", detector=1, collections="imported_g"), 

1459 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_r"), 

1460 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_r"), 

1461 ] 

1462 # Test against those expected results, using a "lazy" query for the 

1463 # data IDs (which re-executes that query each time we use it to do 

1464 # something new). 

1465 dataIds = registry.queryDataIds( 

1466 ["detector", "physical_filter"], 

1467 where="detector.purpose = 'SCIENCE'", # this rejects detector=4 

1468 instrument="Cam1", 

1469 ) 

1470 self.assertEqual(dataIds.graph, expectedGraph) 

1471 self.assertEqual(dataIds.toSet(), expectedDataIds) 

1472 self.assertCountEqual( 

1473 list( 

1474 dataIds.findDatasets( 

1475 flat, 

1476 collections=["imported_r"], 

1477 ) 

1478 ), 

1479 expectedFlats, 

1480 ) 

1481 subsetDataIds = dataIds.subset(expectedSubsetGraph, unique=True) 

1482 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph) 

1483 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds) 

1484 self.assertCountEqual( 

1485 list(subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=False)), 

1486 expectedAllBiases, 

1487 ) 

1488 self.assertCountEqual( 

1489 list(subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=True)), 

1490 expectedDeduplicatedBiases, 

1491 ) 

1492 

1493 # Check dimensions match. 

1494 with self.assertRaises(ValueError): 

1495 subsetDataIds.findDatasets("flat", collections=["imported_r", "imported_g"], findFirst=True) 

1496 

1497 # Use a component dataset type. 

1498 self.assertCountEqual( 

1499 [ 

1500 ref.makeComponentRef("image") 

1501 for ref in subsetDataIds.findDatasets( 

1502 bias, 

1503 collections=["imported_r", "imported_g"], 

1504 findFirst=False, 

1505 ) 

1506 ], 

1507 [ref.makeComponentRef("image") for ref in expectedAllBiases], 

1508 ) 

1509 

1510 # Use a named dataset type that does not exist and a dataset type 

1511 # object that does not exist. 

1512 unknown_type = DatasetType("not_known", dimensions=bias.dimensions, storageClass="Exposure") 

1513 

1514 # Test both string name and dataset type object. 

1515 test_type: str | DatasetType 

1516 for test_type, test_type_name in ( 

1517 (unknown_type, unknown_type.name), 

1518 (unknown_type.name, unknown_type.name), 

1519 ): 

1520 with self.assertRaisesRegex(DatasetTypeError, expected_regex=test_type_name): 

1521 list( 

1522 subsetDataIds.findDatasets( 

1523 test_type, collections=["imported_r", "imported_g"], findFirst=True 

1524 ) 

1525 ) 

1526 

1527 # Materialize the bias dataset queries (only) by putting the results 

1528 # into temporary tables, then repeat those tests. 

1529 with subsetDataIds.findDatasets( 

1530 bias, collections=["imported_r", "imported_g"], findFirst=False 

1531 ).materialize() as biases: 

1532 self.assertCountEqual(list(biases), expectedAllBiases) 

1533 with subsetDataIds.findDatasets( 

1534 bias, collections=["imported_r", "imported_g"], findFirst=True 

1535 ).materialize() as biases: 

1536 self.assertCountEqual(list(biases), expectedDeduplicatedBiases) 

1537 # Materialize the data ID subset query, but not the dataset queries. 

1538 with subsetDataIds.materialize() as subsetDataIds: 

1539 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph) 

1540 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds) 

1541 self.assertCountEqual( 

1542 list( 

1543 subsetDataIds.findDatasets( 

1544 bias, collections=["imported_r", "imported_g"], findFirst=False 

1545 ) 

1546 ), 

1547 expectedAllBiases, 

1548 ) 

1549 self.assertCountEqual( 

1550 list( 

1551 subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=True) 

1552 ), 

1553 expectedDeduplicatedBiases, 

1554 ) 

1555 # Materialize the dataset queries, too. 

1556 with subsetDataIds.findDatasets( 

1557 bias, collections=["imported_r", "imported_g"], findFirst=False 

1558 ).materialize() as biases: 

1559 self.assertCountEqual(list(biases), expectedAllBiases) 

1560 with subsetDataIds.findDatasets( 

1561 bias, collections=["imported_r", "imported_g"], findFirst=True 

1562 ).materialize() as biases: 

1563 self.assertCountEqual(list(biases), expectedDeduplicatedBiases) 

1564 # Materialize the original query, but none of the follow-up queries. 

1565 with dataIds.materialize() as dataIds: 

1566 self.assertEqual(dataIds.graph, expectedGraph) 

1567 self.assertEqual(dataIds.toSet(), expectedDataIds) 

1568 self.assertCountEqual( 

1569 list( 

1570 dataIds.findDatasets( 

1571 flat, 

1572 collections=["imported_r"], 

1573 ) 

1574 ), 

1575 expectedFlats, 

1576 ) 

1577 subsetDataIds = dataIds.subset(expectedSubsetGraph, unique=True) 

1578 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph) 

1579 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds) 

1580 self.assertCountEqual( 

1581 list( 

1582 subsetDataIds.findDatasets( 

1583 bias, collections=["imported_r", "imported_g"], findFirst=False 

1584 ) 

1585 ), 

1586 expectedAllBiases, 

1587 ) 

1588 self.assertCountEqual( 

1589 list( 

1590 subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=True) 

1591 ), 

1592 expectedDeduplicatedBiases, 

1593 ) 

1594 # Materialize just the bias dataset queries. 

1595 with subsetDataIds.findDatasets( 

1596 bias, collections=["imported_r", "imported_g"], findFirst=False 

1597 ).materialize() as biases: 

1598 self.assertCountEqual(list(biases), expectedAllBiases) 

1599 with subsetDataIds.findDatasets( 

1600 bias, collections=["imported_r", "imported_g"], findFirst=True 

1601 ).materialize() as biases: 

1602 self.assertCountEqual(list(biases), expectedDeduplicatedBiases) 

1603 # Materialize the subset data ID query, but not the dataset 

1604 # queries. 

1605 with subsetDataIds.materialize() as subsetDataIds: 

1606 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph) 

1607 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds) 

1608 self.assertCountEqual( 

1609 list( 

1610 subsetDataIds.findDatasets( 

1611 bias, collections=["imported_r", "imported_g"], findFirst=False 

1612 ) 

1613 ), 

1614 expectedAllBiases, 

1615 ) 

1616 self.assertCountEqual( 

1617 list( 

1618 subsetDataIds.findDatasets( 

1619 bias, collections=["imported_r", "imported_g"], findFirst=True 

1620 ) 

1621 ), 

1622 expectedDeduplicatedBiases, 

1623 ) 

1624 # Materialize the bias dataset queries, too, so now we're 

1625 # materializing every single step. 

1626 with subsetDataIds.findDatasets( 

1627 bias, collections=["imported_r", "imported_g"], findFirst=False 

1628 ).materialize() as biases: 

1629 self.assertCountEqual(list(biases), expectedAllBiases) 

1630 with subsetDataIds.findDatasets( 

1631 bias, collections=["imported_r", "imported_g"], findFirst=True 

1632 ).materialize() as biases: 

1633 self.assertCountEqual(list(biases), expectedDeduplicatedBiases) 

1634 

1635 def testStorageClassPropagation(self): 

1636 """Test that queries for datasets respect the storage class passed in 

1637 as part of a full dataset type. 

1638 """ 

1639 registry = self.makeRegistry() 

1640 self.loadData(registry, "base.yaml") 

1641 dataset_type_in_registry = DatasetType( 

1642 "tbl", dimensions=["instrument"], storageClass="Packages", universe=registry.dimensions 

1643 ) 

1644 registry.registerDatasetType(dataset_type_in_registry) 

1645 run = "run1" 

1646 registry.registerRun(run) 

1647 (inserted_ref,) = registry.insertDatasets( 

1648 dataset_type_in_registry, [registry.expandDataId(instrument="Cam1")], run=run 

1649 ) 

1650 self.assertEqual(inserted_ref.datasetType, dataset_type_in_registry) 

1651 query_dataset_type = DatasetType( 

1652 "tbl", dimensions=["instrument"], storageClass="StructuredDataDict", universe=registry.dimensions 

1653 ) 

1654 self.assertNotEqual(dataset_type_in_registry, query_dataset_type) 

1655 query_datasets_result = registry.queryDatasets(query_dataset_type, collections=[run]) 

1656 self.assertEqual(query_datasets_result.parentDatasetType, query_dataset_type) # type: ignore 

1657 (query_datasets_ref,) = query_datasets_result 

1658 self.assertEqual(query_datasets_ref.datasetType, query_dataset_type) 

1659 query_data_ids_find_datasets_result = registry.queryDataIds(["instrument"]).findDatasets( 

1660 query_dataset_type, collections=[run] 

1661 ) 

1662 self.assertEqual(query_data_ids_find_datasets_result.parentDatasetType, query_dataset_type) 

1663 (query_data_ids_find_datasets_ref,) = query_data_ids_find_datasets_result 

1664 self.assertEqual(query_data_ids_find_datasets_ref.datasetType, query_dataset_type) 

1665 query_dataset_types_result = registry.queryDatasetTypes(query_dataset_type) 

1666 self.assertEqual(list(query_dataset_types_result), [query_dataset_type]) 

1667 find_dataset_ref = registry.findDataset(query_dataset_type, instrument="Cam1", collections=[run]) 

1668 self.assertEqual(find_dataset_ref.datasetType, query_dataset_type) 

1669 

1670 def testEmptyDimensionsQueries(self): 

1671 """Test Query and QueryResults objects in the case where there are no 

1672 dimensions. 

1673 """ 

1674 # Set up test data: one dataset type, two runs, one dataset in each. 

1675 registry = self.makeRegistry() 

1676 self.loadData(registry, "base.yaml") 

1677 schema = DatasetType("schema", dimensions=registry.dimensions.empty, storageClass="Catalog") 

1678 registry.registerDatasetType(schema) 

1679 dataId = DataCoordinate.makeEmpty(registry.dimensions) 

1680 run1 = "run1" 

1681 run2 = "run2" 

1682 registry.registerRun(run1) 

1683 registry.registerRun(run2) 

1684 (dataset1,) = registry.insertDatasets(schema, dataIds=[dataId], run=run1) 

1685 (dataset2,) = registry.insertDatasets(schema, dataIds=[dataId], run=run2) 

1686 # Query directly for both of the datasets, and each one, one at a time. 

1687 self.checkQueryResults( 

1688 registry.queryDatasets(schema, collections=[run1, run2], findFirst=False), [dataset1, dataset2] 

1689 ) 

1690 self.checkQueryResults( 

1691 registry.queryDatasets(schema, collections=[run1, run2], findFirst=True), 

1692 [dataset1], 

1693 ) 

1694 self.checkQueryResults( 

1695 registry.queryDatasets(schema, collections=[run2, run1], findFirst=True), 

1696 [dataset2], 

1697 ) 

1698 # Query for data IDs with no dimensions. 

1699 dataIds = registry.queryDataIds([]) 

1700 self.checkQueryResults(dataIds, [dataId]) 

1701 # Use queried data IDs to find the datasets. 

1702 self.checkQueryResults( 

1703 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False), 

1704 [dataset1, dataset2], 

1705 ) 

1706 self.checkQueryResults( 

1707 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), 

1708 [dataset1], 

1709 ) 

1710 self.checkQueryResults( 

1711 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), 

1712 [dataset2], 

1713 ) 

1714 # Now materialize the data ID query results and repeat those tests. 

1715 with dataIds.materialize() as dataIds: 

1716 self.checkQueryResults(dataIds, [dataId]) 

1717 self.checkQueryResults( 

1718 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), 

1719 [dataset1], 

1720 ) 

1721 self.checkQueryResults( 

1722 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), 

1723 [dataset2], 

1724 ) 

1725 # Query for non-empty data IDs, then subset that to get the empty one. 

1726 # Repeat the above tests starting from that. 

1727 dataIds = registry.queryDataIds(["instrument"]).subset(registry.dimensions.empty, unique=True) 

1728 self.checkQueryResults(dataIds, [dataId]) 

1729 self.checkQueryResults( 

1730 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False), 

1731 [dataset1, dataset2], 

1732 ) 

1733 self.checkQueryResults( 

1734 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), 

1735 [dataset1], 

1736 ) 

1737 self.checkQueryResults( 

1738 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), 

1739 [dataset2], 

1740 ) 

1741 with dataIds.materialize() as dataIds: 

1742 self.checkQueryResults(dataIds, [dataId]) 

1743 self.checkQueryResults( 

1744 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False), 

1745 [dataset1, dataset2], 

1746 ) 

1747 self.checkQueryResults( 

1748 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), 

1749 [dataset1], 

1750 ) 

1751 self.checkQueryResults( 

1752 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), 

1753 [dataset2], 

1754 ) 

1755 # Query for non-empty data IDs, then materialize, then subset to get 

1756 # the empty one. Repeat again. 

1757 with registry.queryDataIds(["instrument"]).materialize() as nonEmptyDataIds: 

1758 dataIds = nonEmptyDataIds.subset(registry.dimensions.empty, unique=True) 

1759 self.checkQueryResults(dataIds, [dataId]) 

1760 self.checkQueryResults( 

1761 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False), 

1762 [dataset1, dataset2], 

1763 ) 

1764 self.checkQueryResults( 

1765 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), 

1766 [dataset1], 

1767 ) 

1768 self.checkQueryResults( 

1769 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), 

1770 [dataset2], 

1771 ) 

1772 with dataIds.materialize() as dataIds: 

1773 self.checkQueryResults(dataIds, [dataId]) 

1774 self.checkQueryResults( 

1775 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False), 

1776 [dataset1, dataset2], 

1777 ) 

1778 self.checkQueryResults( 

1779 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), 

1780 [dataset1], 

1781 ) 

1782 self.checkQueryResults( 

1783 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), 

1784 [dataset2], 

1785 ) 

1786 # Query for non-empty data IDs with a constraint on an empty-data-ID 

1787 # dataset that exists. 

1788 dataIds = registry.queryDataIds(["instrument"], datasets="schema", collections=...) 

1789 self.checkQueryResults( 

1790 dataIds.subset(unique=True), 

1791 [DataCoordinate.standardize(instrument="Cam1", universe=registry.dimensions)], 

1792 ) 

1793 # Again query for non-empty data IDs with a constraint on empty-data-ID 

1794 # datasets, but when the datasets don't exist. We delete the existing 

1795 # dataset and query just that collection rather than creating a new 

1796 # empty collection because this is a bit less likely for our build-time 

1797 # logic to shortcut-out (via the collection summaries), and such a 

1798 # shortcut would make this test a bit more trivial than we'd like. 

1799 registry.removeDatasets([dataset2]) 

1800 dataIds = registry.queryDataIds(["instrument"], datasets="schema", collections=run2) 

1801 self.checkQueryResults(dataIds, []) 

1802 

1803 def testDimensionDataModifications(self): 

1804 """Test that modifying dimension records via: 

1805 syncDimensionData(..., update=True) and 

1806 insertDimensionData(..., replace=True) works as expected, even in the 

1807 presence of datasets using those dimensions and spatial overlap 

1808 relationships. 

1809 """ 

1810 

1811 def unpack_range_set(ranges: lsst.sphgeom.RangeSet) -> Iterator[int]: 

1812 """Unpack a sphgeom.RangeSet into the integers it contains.""" 

1813 for begin, end in ranges: 

1814 yield from range(begin, end) 

1815 

1816 def range_set_hull( 

1817 ranges: lsst.sphgeom.RangeSet, 

1818 pixelization: lsst.sphgeom.HtmPixelization, 

1819 ) -> lsst.sphgeom.ConvexPolygon: 

1820 """Create a ConvexPolygon hull of the region defined by a set of 

1821 HTM pixelization index ranges. 

1822 """ 

1823 points = [] 

1824 for index in unpack_range_set(ranges): 

1825 points.extend(pixelization.triangle(index).getVertices()) 

1826 return lsst.sphgeom.ConvexPolygon(points) 

1827 

1828 # Use HTM to set up an initial parent region (one arbitrary trixel) 

1829 # and four child regions (the trixels within the parent at the next 

1830 # level. We'll use the parent as a tract/visit region and the children 

1831 # as its patch/visit_detector regions. 

1832 registry = self.makeRegistry() 

1833 htm6 = registry.dimensions.skypix["htm"][6].pixelization 

1834 commonSkyPix = registry.dimensions.commonSkyPix.pixelization 

1835 index = 12288 

1836 child_ranges_small = lsst.sphgeom.RangeSet(index).scaled(4) 

1837 assert htm6.universe().contains(child_ranges_small) 

1838 child_regions_small = [htm6.triangle(i) for i in unpack_range_set(child_ranges_small)] 

1839 parent_region_small = lsst.sphgeom.ConvexPolygon( 

1840 list(itertools.chain.from_iterable(c.getVertices() for c in child_regions_small)) 

1841 ) 

1842 assert all(parent_region_small.contains(c) for c in child_regions_small) 

1843 # Make a larger version of each child region, defined to be the set of 

1844 # htm6 trixels that overlap the original's bounding circle. Make a new 

1845 # parent that's the convex hull of the new children. 

1846 child_regions_large = [ 

1847 range_set_hull(htm6.envelope(c.getBoundingCircle()), htm6) for c in child_regions_small 

1848 ] 

1849 assert all( 

1850 large.contains(small) 

1851 for large, small in zip(child_regions_large, child_regions_small, strict=True) 

1852 ) 

1853 parent_region_large = lsst.sphgeom.ConvexPolygon( 

1854 list(itertools.chain.from_iterable(c.getVertices() for c in child_regions_large)) 

1855 ) 

1856 assert all(parent_region_large.contains(c) for c in child_regions_large) 

1857 assert parent_region_large.contains(parent_region_small) 

1858 assert not parent_region_small.contains(parent_region_large) 

1859 assert not all(parent_region_small.contains(c) for c in child_regions_large) 

1860 # Find some commonSkyPix indices that overlap the large regions but not 

1861 # overlap the small regions. We use commonSkyPix here to make sure the 

1862 # real tests later involve what's in the database, not just post-query 

1863 # filtering of regions. 

1864 child_difference_indices = [] 

1865 for large, small in zip(child_regions_large, child_regions_small, strict=True): 

1866 difference = list(unpack_range_set(commonSkyPix.envelope(large) - commonSkyPix.envelope(small))) 

1867 assert difference, "if this is empty, we can't test anything useful with these regions" 

1868 assert all( 

1869 not commonSkyPix.triangle(d).isDisjointFrom(large) 

1870 and commonSkyPix.triangle(d).isDisjointFrom(small) 

1871 for d in difference 

1872 ) 

1873 child_difference_indices.append(difference) 

1874 parent_difference_indices = list( 

1875 unpack_range_set( 

1876 commonSkyPix.envelope(parent_region_large) - commonSkyPix.envelope(parent_region_small) 

1877 ) 

1878 ) 

1879 assert parent_difference_indices, "if this is empty, we can't test anything useful with these regions" 

1880 assert all( 

1881 ( 

1882 not commonSkyPix.triangle(d).isDisjointFrom(parent_region_large) 

1883 and commonSkyPix.triangle(d).isDisjointFrom(parent_region_small) 

1884 ) 

1885 for d in parent_difference_indices 

1886 ) 

1887 # Now that we've finally got those regions, we'll insert the large ones 

1888 # as tract/patch dimension records. 

1889 skymap_name = "testing_v1" 

1890 registry.insertDimensionData( 

1891 "skymap", 

1892 { 

1893 "name": skymap_name, 

1894 "hash": bytes([42]), 

1895 "tract_max": 1, 

1896 "patch_nx_max": 2, 

1897 "patch_ny_max": 2, 

1898 }, 

1899 ) 

1900 registry.insertDimensionData("tract", {"skymap": skymap_name, "id": 0, "region": parent_region_large}) 

1901 registry.insertDimensionData( 

1902 "patch", 

1903 *[ 

1904 {"skymap": skymap_name, "tract": 0, "id": n, "cell_x": n % 2, "cell_y": n // 2, "region": c} 

1905 for n, c in enumerate(child_regions_large) 

1906 ], 

1907 ) 

1908 # Add at dataset that uses these dimensions to make sure that modifying 

1909 # them doesn't disrupt foreign keys (need to make sure DB doesn't 

1910 # implement insert with replace=True as delete-then-insert). 

1911 dataset_type = DatasetType( 

1912 "coadd", 

1913 dimensions=["tract", "patch"], 

1914 universe=registry.dimensions, 

1915 storageClass="Exposure", 

1916 ) 

1917 registry.registerDatasetType(dataset_type) 

1918 registry.registerCollection("the_run", CollectionType.RUN) 

1919 registry.insertDatasets( 

1920 dataset_type, 

1921 [{"skymap": skymap_name, "tract": 0, "patch": 2}], 

1922 run="the_run", 

1923 ) 

1924 # Query for tracts and patches that overlap some "difference" htm9 

1925 # pixels; there should be overlaps, because the database has 

1926 # the "large" suite of regions. 

1927 self.assertEqual( 

1928 {0}, 

1929 { 

1930 data_id["tract"] 

1931 for data_id in registry.queryDataIds( 

1932 ["tract"], 

1933 skymap=skymap_name, 

1934 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]}, 

1935 ) 

1936 }, 

1937 ) 

1938 for patch_id, patch_difference_indices in enumerate(child_difference_indices): 

1939 self.assertIn( 

1940 patch_id, 

1941 { 

1942 data_id["patch"] 

1943 for data_id in registry.queryDataIds( 

1944 ["patch"], 

1945 skymap=skymap_name, 

1946 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]}, 

1947 ) 

1948 }, 

1949 ) 

1950 # Use sync to update the tract region and insert to update the regions 

1951 # of the patches, to the "small" suite. 

1952 updated = registry.syncDimensionData( 

1953 "tract", 

1954 {"skymap": skymap_name, "id": 0, "region": parent_region_small}, 

1955 update=True, 

1956 ) 

1957 self.assertEqual(updated, {"region": parent_region_large}) 

1958 registry.insertDimensionData( 

1959 "patch", 

1960 *[ 

1961 {"skymap": skymap_name, "tract": 0, "id": n, "cell_x": n % 2, "cell_y": n // 2, "region": c} 

1962 for n, c in enumerate(child_regions_small) 

1963 ], 

1964 replace=True, 

1965 ) 

1966 # Query again; there now should be no such overlaps, because the 

1967 # database has the "small" suite of regions. 

1968 self.assertFalse( 

1969 set( 

1970 registry.queryDataIds( 

1971 ["tract"], 

1972 skymap=skymap_name, 

1973 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]}, 

1974 ) 

1975 ) 

1976 ) 

1977 for patch_id, patch_difference_indices in enumerate(child_difference_indices): 

1978 self.assertNotIn( 

1979 patch_id, 

1980 { 

1981 data_id["patch"] 

1982 for data_id in registry.queryDataIds( 

1983 ["patch"], 

1984 skymap=skymap_name, 

1985 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]}, 

1986 ) 

1987 }, 

1988 ) 

1989 # Update back to the large regions and query one more time. 

1990 updated = registry.syncDimensionData( 

1991 "tract", 

1992 {"skymap": skymap_name, "id": 0, "region": parent_region_large}, 

1993 update=True, 

1994 ) 

1995 self.assertEqual(updated, {"region": parent_region_small}) 

1996 registry.insertDimensionData( 

1997 "patch", 

1998 *[ 

1999 {"skymap": skymap_name, "tract": 0, "id": n, "cell_x": n % 2, "cell_y": n // 2, "region": c} 

2000 for n, c in enumerate(child_regions_large) 

2001 ], 

2002 replace=True, 

2003 ) 

2004 self.assertEqual( 

2005 {0}, 

2006 { 

2007 data_id["tract"] 

2008 for data_id in registry.queryDataIds( 

2009 ["tract"], 

2010 skymap=skymap_name, 

2011 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]}, 

2012 ) 

2013 }, 

2014 ) 

2015 for patch_id, patch_difference_indices in enumerate(child_difference_indices): 

2016 self.assertIn( 

2017 patch_id, 

2018 { 

2019 data_id["patch"] 

2020 for data_id in registry.queryDataIds( 

2021 ["patch"], 

2022 skymap=skymap_name, 

2023 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]}, 

2024 ) 

2025 }, 

2026 ) 

2027 

2028 def testCalibrationCollections(self): 

2029 """Test operations on `~CollectionType.CALIBRATION` collections, 

2030 including `Registry.certify`, `Registry.decertify`, and 

2031 `Registry.findDataset`. 

2032 """ 

2033 # Setup - make a Registry, fill it with some datasets in 

2034 # non-calibration collections. 

2035 registry = self.makeRegistry() 

2036 self.loadData(registry, "base.yaml") 

2037 self.loadData(registry, "datasets.yaml") 

2038 # Set up some timestamps. 

2039 t1 = astropy.time.Time("2020-01-01T01:00:00", format="isot", scale="tai") 

2040 t2 = astropy.time.Time("2020-01-01T02:00:00", format="isot", scale="tai") 

2041 t3 = astropy.time.Time("2020-01-01T03:00:00", format="isot", scale="tai") 

2042 t4 = astropy.time.Time("2020-01-01T04:00:00", format="isot", scale="tai") 

2043 t5 = astropy.time.Time("2020-01-01T05:00:00", format="isot", scale="tai") 

2044 allTimespans = [ 

2045 Timespan(a, b) for a, b in itertools.combinations([None, t1, t2, t3, t4, t5, None], r=2) 

2046 ] 

2047 # Get references to some datasets. 

2048 bias2a = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g") 

2049 bias3a = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g") 

2050 bias2b = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r") 

2051 bias3b = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r") 

2052 # Register the main calibration collection we'll be working with. 

2053 collection = "Cam1/calibs/default" 

2054 registry.registerCollection(collection, type=CollectionType.CALIBRATION) 

2055 # Cannot associate into a calibration collection (no timespan). 

2056 with self.assertRaises(CollectionTypeError): 

2057 registry.associate(collection, [bias2a]) 

2058 # Certify 2a dataset with [t2, t4) validity. 

2059 registry.certify(collection, [bias2a], Timespan(begin=t2, end=t4)) 

2060 # Test that we can query for this dataset via the new collection, both 

2061 # on its own and with a RUN collection, as long as we don't try to join 

2062 # in temporal dimensions or use findFirst=True. 

2063 self.assertEqual( 

2064 set(registry.queryDatasets("bias", findFirst=False, collections=collection)), 

2065 {bias2a}, 

2066 ) 

2067 self.assertEqual( 

2068 set(registry.queryDatasets("bias", findFirst=False, collections=[collection, "imported_r"])), 

2069 { 

2070 bias2a, 

2071 bias2b, 

2072 bias3b, 

2073 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

2074 }, 

2075 ) 

2076 self.assertEqual( 

2077 set(registry.queryDataIds("detector", datasets="bias", collections=collection)), 

2078 {registry.expandDataId(instrument="Cam1", detector=2)}, 

2079 ) 

2080 self.assertEqual( 

2081 set(registry.queryDataIds("detector", datasets="bias", collections=[collection, "imported_r"])), 

2082 { 

2083 registry.expandDataId(instrument="Cam1", detector=2), 

2084 registry.expandDataId(instrument="Cam1", detector=3), 

2085 registry.expandDataId(instrument="Cam1", detector=4), 

2086 }, 

2087 ) 

2088 

2089 # We should not be able to certify 2b with anything overlapping that 

2090 # window. 

2091 with self.assertRaises(ConflictingDefinitionError): 

2092 registry.certify(collection, [bias2b], Timespan(begin=None, end=t3)) 

2093 with self.assertRaises(ConflictingDefinitionError): 

2094 registry.certify(collection, [bias2b], Timespan(begin=None, end=t5)) 

2095 with self.assertRaises(ConflictingDefinitionError): 

2096 registry.certify(collection, [bias2b], Timespan(begin=t1, end=t3)) 

2097 with self.assertRaises(ConflictingDefinitionError): 

2098 registry.certify(collection, [bias2b], Timespan(begin=t1, end=t5)) 

2099 with self.assertRaises(ConflictingDefinitionError): 

2100 registry.certify(collection, [bias2b], Timespan(begin=t1, end=None)) 

2101 with self.assertRaises(ConflictingDefinitionError): 

2102 registry.certify(collection, [bias2b], Timespan(begin=t2, end=t3)) 

2103 with self.assertRaises(ConflictingDefinitionError): 

2104 registry.certify(collection, [bias2b], Timespan(begin=t2, end=t5)) 

2105 with self.assertRaises(ConflictingDefinitionError): 

2106 registry.certify(collection, [bias2b], Timespan(begin=t2, end=None)) 

2107 # We should be able to certify 3a with a range overlapping that window, 

2108 # because it's for a different detector. 

2109 # We'll certify 3a over [t1, t3). 

2110 registry.certify(collection, [bias3a], Timespan(begin=t1, end=t3)) 

2111 # Now we'll certify 2b and 3b together over [t4, ∞). 

2112 registry.certify(collection, [bias2b, bias3b], Timespan(begin=t4, end=None)) 

2113 

2114 # Fetch all associations and check that they are what we expect. 

2115 self.assertCountEqual( 

2116 list( 

2117 registry.queryDatasetAssociations( 

2118 "bias", 

2119 collections=[collection, "imported_g", "imported_r"], 

2120 ) 

2121 ), 

2122 [ 

2123 DatasetAssociation( 

2124 ref=registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"), 

2125 collection="imported_g", 

2126 timespan=None, 

2127 ), 

2128 DatasetAssociation( 

2129 ref=registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

2130 collection="imported_r", 

2131 timespan=None, 

2132 ), 

2133 DatasetAssociation(ref=bias2a, collection="imported_g", timespan=None), 

2134 DatasetAssociation(ref=bias3a, collection="imported_g", timespan=None), 

2135 DatasetAssociation(ref=bias2b, collection="imported_r", timespan=None), 

2136 DatasetAssociation(ref=bias3b, collection="imported_r", timespan=None), 

2137 DatasetAssociation(ref=bias2a, collection=collection, timespan=Timespan(begin=t2, end=t4)), 

2138 DatasetAssociation(ref=bias3a, collection=collection, timespan=Timespan(begin=t1, end=t3)), 

2139 DatasetAssociation(ref=bias2b, collection=collection, timespan=Timespan(begin=t4, end=None)), 

2140 DatasetAssociation(ref=bias3b, collection=collection, timespan=Timespan(begin=t4, end=None)), 

2141 ], 

2142 ) 

2143 

2144 class Ambiguous: 

2145 """Tag class to denote lookups that should be ambiguous.""" 

2146 

2147 pass 

2148 

2149 def assertLookup( 

2150 detector: int, timespan: Timespan, expected: DatasetRef | type[Ambiguous] | None 

2151 ) -> None: 

2152 """Local function that asserts that a bias lookup returns the given 

2153 expected result. 

2154 """ 

2155 if expected is Ambiguous: 

2156 with self.assertRaises((DatasetTypeError, LookupError)): 

2157 registry.findDataset( 

2158 "bias", 

2159 collections=collection, 

2160 instrument="Cam1", 

2161 detector=detector, 

2162 timespan=timespan, 

2163 ) 

2164 else: 

2165 self.assertEqual( 

2166 expected, 

2167 registry.findDataset( 

2168 "bias", 

2169 collections=collection, 

2170 instrument="Cam1", 

2171 detector=detector, 

2172 timespan=timespan, 

2173 ), 

2174 ) 

2175 

2176 # Systematically test lookups against expected results. 

2177 assertLookup(detector=2, timespan=Timespan(None, t1), expected=None) 

2178 assertLookup(detector=2, timespan=Timespan(None, t2), expected=None) 

2179 assertLookup(detector=2, timespan=Timespan(None, t3), expected=bias2a) 

2180 assertLookup(detector=2, timespan=Timespan(None, t4), expected=bias2a) 

2181 assertLookup(detector=2, timespan=Timespan(None, t5), expected=Ambiguous) 

2182 assertLookup(detector=2, timespan=Timespan(None, None), expected=Ambiguous) 

2183 assertLookup(detector=2, timespan=Timespan(t1, t2), expected=None) 

2184 assertLookup(detector=2, timespan=Timespan(t1, t3), expected=bias2a) 

2185 assertLookup(detector=2, timespan=Timespan(t1, t4), expected=bias2a) 

2186 assertLookup(detector=2, timespan=Timespan(t1, t5), expected=Ambiguous) 

2187 assertLookup(detector=2, timespan=Timespan(t1, None), expected=Ambiguous) 

2188 assertLookup(detector=2, timespan=Timespan(t2, t3), expected=bias2a) 

2189 assertLookup(detector=2, timespan=Timespan(t2, t4), expected=bias2a) 

2190 assertLookup(detector=2, timespan=Timespan(t2, t5), expected=Ambiguous) 

2191 assertLookup(detector=2, timespan=Timespan(t2, None), expected=Ambiguous) 

2192 assertLookup(detector=2, timespan=Timespan(t3, t4), expected=bias2a) 

2193 assertLookup(detector=2, timespan=Timespan(t3, t5), expected=Ambiguous) 

2194 assertLookup(detector=2, timespan=Timespan(t3, None), expected=Ambiguous) 

2195 assertLookup(detector=2, timespan=Timespan(t4, t5), expected=bias2b) 

2196 assertLookup(detector=2, timespan=Timespan(t4, None), expected=bias2b) 

2197 assertLookup(detector=2, timespan=Timespan(t5, None), expected=bias2b) 

2198 assertLookup(detector=3, timespan=Timespan(None, t1), expected=None) 

2199 assertLookup(detector=3, timespan=Timespan(None, t2), expected=bias3a) 

2200 assertLookup(detector=3, timespan=Timespan(None, t3), expected=bias3a) 

2201 assertLookup(detector=3, timespan=Timespan(None, t4), expected=bias3a) 

2202 assertLookup(detector=3, timespan=Timespan(None, t5), expected=Ambiguous) 

2203 assertLookup(detector=3, timespan=Timespan(None, None), expected=Ambiguous) 

2204 assertLookup(detector=3, timespan=Timespan(t1, t2), expected=bias3a) 

2205 assertLookup(detector=3, timespan=Timespan(t1, t3), expected=bias3a) 

2206 assertLookup(detector=3, timespan=Timespan(t1, t4), expected=bias3a) 

2207 assertLookup(detector=3, timespan=Timespan(t1, t5), expected=Ambiguous) 

2208 assertLookup(detector=3, timespan=Timespan(t1, None), expected=Ambiguous) 

2209 assertLookup(detector=3, timespan=Timespan(t2, t3), expected=bias3a) 

2210 assertLookup(detector=3, timespan=Timespan(t2, t4), expected=bias3a) 

2211 assertLookup(detector=3, timespan=Timespan(t2, t5), expected=Ambiguous) 

2212 assertLookup(detector=3, timespan=Timespan(t2, None), expected=Ambiguous) 

2213 assertLookup(detector=3, timespan=Timespan(t3, t4), expected=None) 

2214 assertLookup(detector=3, timespan=Timespan(t3, t5), expected=bias3b) 

2215 assertLookup(detector=3, timespan=Timespan(t3, None), expected=bias3b) 

2216 assertLookup(detector=3, timespan=Timespan(t4, t5), expected=bias3b) 

2217 assertLookup(detector=3, timespan=Timespan(t4, None), expected=bias3b) 

2218 assertLookup(detector=3, timespan=Timespan(t5, None), expected=bias3b) 

2219 

2220 # Decertify [t3, t5) for all data IDs, and do test lookups again. 

2221 # This should truncate bias2a to [t2, t3), leave bias3a unchanged at 

2222 # [t1, t3), and truncate bias2b and bias3b to [t5, ∞). 

2223 registry.decertify(collection=collection, datasetType="bias", timespan=Timespan(t3, t5)) 

2224 assertLookup(detector=2, timespan=Timespan(None, t1), expected=None) 

2225 assertLookup(detector=2, timespan=Timespan(None, t2), expected=None) 

2226 assertLookup(detector=2, timespan=Timespan(None, t3), expected=bias2a) 

2227 assertLookup(detector=2, timespan=Timespan(None, t4), expected=bias2a) 

2228 assertLookup(detector=2, timespan=Timespan(None, t5), expected=bias2a) 

2229 assertLookup(detector=2, timespan=Timespan(None, None), expected=Ambiguous) 

2230 assertLookup(detector=2, timespan=Timespan(t1, t2), expected=None) 

2231 assertLookup(detector=2, timespan=Timespan(t1, t3), expected=bias2a) 

2232 assertLookup(detector=2, timespan=Timespan(t1, t4), expected=bias2a) 

2233 assertLookup(detector=2, timespan=Timespan(t1, t5), expected=bias2a) 

2234 assertLookup(detector=2, timespan=Timespan(t1, None), expected=Ambiguous) 

2235 assertLookup(detector=2, timespan=Timespan(t2, t3), expected=bias2a) 

2236 assertLookup(detector=2, timespan=Timespan(t2, t4), expected=bias2a) 

2237 assertLookup(detector=2, timespan=Timespan(t2, t5), expected=bias2a) 

2238 assertLookup(detector=2, timespan=Timespan(t2, None), expected=Ambiguous) 

2239 assertLookup(detector=2, timespan=Timespan(t3, t4), expected=None) 

2240 assertLookup(detector=2, timespan=Timespan(t3, t5), expected=None) 

2241 assertLookup(detector=2, timespan=Timespan(t3, None), expected=bias2b) 

2242 assertLookup(detector=2, timespan=Timespan(t4, t5), expected=None) 

2243 assertLookup(detector=2, timespan=Timespan(t4, None), expected=bias2b) 

2244 assertLookup(detector=2, timespan=Timespan(t5, None), expected=bias2b) 

2245 assertLookup(detector=3, timespan=Timespan(None, t1), expected=None) 

2246 assertLookup(detector=3, timespan=Timespan(None, t2), expected=bias3a) 

2247 assertLookup(detector=3, timespan=Timespan(None, t3), expected=bias3a) 

2248 assertLookup(detector=3, timespan=Timespan(None, t4), expected=bias3a) 

2249 assertLookup(detector=3, timespan=Timespan(None, t5), expected=bias3a) 

2250 assertLookup(detector=3, timespan=Timespan(None, None), expected=Ambiguous) 

2251 assertLookup(detector=3, timespan=Timespan(t1, t2), expected=bias3a) 

2252 assertLookup(detector=3, timespan=Timespan(t1, t3), expected=bias3a) 

2253 assertLookup(detector=3, timespan=Timespan(t1, t4), expected=bias3a) 

2254 assertLookup(detector=3, timespan=Timespan(t1, t5), expected=bias3a) 

2255 assertLookup(detector=3, timespan=Timespan(t1, None), expected=Ambiguous) 

2256 assertLookup(detector=3, timespan=Timespan(t2, t3), expected=bias3a) 

2257 assertLookup(detector=3, timespan=Timespan(t2, t4), expected=bias3a) 

2258 assertLookup(detector=3, timespan=Timespan(t2, t5), expected=bias3a) 

2259 assertLookup(detector=3, timespan=Timespan(t2, None), expected=Ambiguous) 

2260 assertLookup(detector=3, timespan=Timespan(t3, t4), expected=None) 

2261 assertLookup(detector=3, timespan=Timespan(t3, t5), expected=None) 

2262 assertLookup(detector=3, timespan=Timespan(t3, None), expected=bias3b) 

2263 assertLookup(detector=3, timespan=Timespan(t4, t5), expected=None) 

2264 assertLookup(detector=3, timespan=Timespan(t4, None), expected=bias3b) 

2265 assertLookup(detector=3, timespan=Timespan(t5, None), expected=bias3b) 

2266 

2267 # Decertify everything, this time with explicit data IDs, then check 

2268 # that no lookups succeed. 

2269 registry.decertify( 

2270 collection, 

2271 "bias", 

2272 Timespan(None, None), 

2273 dataIds=[ 

2274 dict(instrument="Cam1", detector=2), 

2275 dict(instrument="Cam1", detector=3), 

2276 ], 

2277 ) 

2278 for detector in (2, 3): 

2279 for timespan in allTimespans: 

2280 assertLookup(detector=detector, timespan=timespan, expected=None) 

2281 # Certify bias2a and bias3a over (-∞, ∞), check that all lookups return 

2282 # those. 

2283 registry.certify( 

2284 collection, 

2285 [bias2a, bias3a], 

2286 Timespan(None, None), 

2287 ) 

2288 for timespan in allTimespans: 

2289 assertLookup(detector=2, timespan=timespan, expected=bias2a) 

2290 assertLookup(detector=3, timespan=timespan, expected=bias3a) 

2291 # Decertify just bias2 over [t2, t4). 

2292 # This should split a single certification row into two (and leave the 

2293 # other existing row, for bias3a, alone). 

2294 registry.decertify( 

2295 collection, "bias", Timespan(t2, t4), dataIds=[dict(instrument="Cam1", detector=2)] 

2296 ) 

2297 for timespan in allTimespans: 

2298 assertLookup(detector=3, timespan=timespan, expected=bias3a) 

2299 overlapsBefore = timespan.overlaps(Timespan(None, t2)) 

2300 overlapsAfter = timespan.overlaps(Timespan(t4, None)) 

2301 if overlapsBefore and overlapsAfter: 

2302 expected = Ambiguous 

2303 elif overlapsBefore or overlapsAfter: 

2304 expected = bias2a 

2305 else: 

2306 expected = None 

2307 assertLookup(detector=2, timespan=timespan, expected=expected) 

2308 

2309 def testSkipCalibs(self): 

2310 """Test how queries handle skipping of calibration collections.""" 

2311 registry = self.makeRegistry() 

2312 self.loadData(registry, "base.yaml") 

2313 self.loadData(registry, "datasets.yaml") 

2314 

2315 coll_calib = "Cam1/calibs/default" 

2316 registry.registerCollection(coll_calib, type=CollectionType.CALIBRATION) 

2317 

2318 # Add all biases to the calibration collection. 

2319 # Without this, the logic that prunes dataset subqueries based on 

2320 # datasetType-collection summary information will fire before the logic 

2321 # we want to test below. This is a good thing (it avoids the dreaded 

2322 # NotImplementedError a bit more often) everywhere but here. 

2323 registry.certify(coll_calib, registry.queryDatasets("bias", collections=...), Timespan(None, None)) 

2324 

2325 coll_list = [coll_calib, "imported_g", "imported_r"] 

2326 chain = "Cam1/chain" 

2327 registry.registerCollection(chain, type=CollectionType.CHAINED) 

2328 registry.setCollectionChain(chain, coll_list) 

2329 

2330 # explicit list will raise if findFirst=True or there are temporal 

2331 # dimensions 

2332 with self.assertRaises(NotImplementedError): 

2333 registry.queryDatasets("bias", collections=coll_list, findFirst=True) 

2334 with self.assertRaises(NotImplementedError): 

2335 registry.queryDataIds( 

2336 ["instrument", "detector", "exposure"], datasets="bias", collections=coll_list 

2337 ).count() 

2338 

2339 # chain will skip 

2340 datasets = list(registry.queryDatasets("bias", collections=chain)) 

2341 self.assertGreater(len(datasets), 0) 

2342 

2343 dataIds = list(registry.queryDataIds(["instrument", "detector"], datasets="bias", collections=chain)) 

2344 self.assertGreater(len(dataIds), 0) 

2345 

2346 # glob will skip too 

2347 datasets = list(registry.queryDatasets("bias", collections="*d*")) 

2348 self.assertGreater(len(datasets), 0) 

2349 

2350 # regular expression will skip too 

2351 pattern = re.compile(".*") 

2352 datasets = list(registry.queryDatasets("bias", collections=pattern)) 

2353 self.assertGreater(len(datasets), 0) 

2354 

2355 # ellipsis should work as usual 

2356 datasets = list(registry.queryDatasets("bias", collections=...)) 

2357 self.assertGreater(len(datasets), 0) 

2358 

2359 # few tests with findFirst 

2360 datasets = list(registry.queryDatasets("bias", collections=chain, findFirst=True)) 

2361 self.assertGreater(len(datasets), 0) 

2362 

2363 def testIngestTimeQuery(self): 

2364 registry = self.makeRegistry() 

2365 self.loadData(registry, "base.yaml") 

2366 dt0 = datetime.utcnow() 

2367 self.loadData(registry, "datasets.yaml") 

2368 dt1 = datetime.utcnow() 

2369 

2370 datasets = list(registry.queryDatasets(..., collections=...)) 

2371 len0 = len(datasets) 

2372 self.assertGreater(len0, 0) 

2373 

2374 where = "ingest_date > T'2000-01-01'" 

2375 datasets = list(registry.queryDatasets(..., collections=..., where=where)) 

2376 len1 = len(datasets) 

2377 self.assertEqual(len0, len1) 

2378 

2379 # no one will ever use this piece of software in 30 years 

2380 where = "ingest_date > T'2050-01-01'" 

2381 datasets = list(registry.queryDatasets(..., collections=..., where=where)) 

2382 len2 = len(datasets) 

2383 self.assertEqual(len2, 0) 

2384 

2385 # Check more exact timing to make sure there is no 37 seconds offset 

2386 # (after fixing DM-30124). SQLite time precision is 1 second, make 

2387 # sure that we don't test with higher precision. 

2388 tests = [ 

2389 # format: (timestamp, operator, expected_len) 

2390 (dt0 - timedelta(seconds=1), ">", len0), 

2391 (dt0 - timedelta(seconds=1), "<", 0), 

2392 (dt1 + timedelta(seconds=1), "<", len0), 

2393 (dt1 + timedelta(seconds=1), ">", 0), 

2394 ] 

2395 for dt, op, expect_len in tests: 

2396 dt_str = dt.isoformat(sep=" ") 

2397 

2398 where = f"ingest_date {op} T'{dt_str}'" 

2399 datasets = list(registry.queryDatasets(..., collections=..., where=where)) 

2400 self.assertEqual(len(datasets), expect_len) 

2401 

2402 # same with bind using datetime or astropy Time 

2403 where = f"ingest_date {op} ingest_time" 

2404 datasets = list( 

2405 registry.queryDatasets(..., collections=..., where=where, bind={"ingest_time": dt}) 

2406 ) 

2407 self.assertEqual(len(datasets), expect_len) 

2408 

2409 dt_astropy = astropy.time.Time(dt, format="datetime") 

2410 datasets = list( 

2411 registry.queryDatasets(..., collections=..., where=where, bind={"ingest_time": dt_astropy}) 

2412 ) 

2413 self.assertEqual(len(datasets), expect_len) 

2414 

2415 def testTimespanQueries(self): 

2416 """Test query expressions involving timespans.""" 

2417 registry = self.makeRegistry() 

2418 self.loadData(registry, "hsc-rc2-subset.yaml") 

2419 # All exposures in the database; mapping from ID to timespan. 

2420 visits = {record.id: record.timespan for record in registry.queryDimensionRecords("visit")} 

2421 # Just those IDs, sorted (which is also temporal sorting, because HSC 

2422 # exposure IDs are monotonically increasing). 

2423 ids = sorted(visits.keys()) 

2424 self.assertGreater(len(ids), 20) 

2425 # Pick some quasi-random indexes into `ids` to play with. 

2426 i1 = int(len(ids) * 0.1) 

2427 i2 = int(len(ids) * 0.3) 

2428 i3 = int(len(ids) * 0.6) 

2429 i4 = int(len(ids) * 0.8) 

2430 # Extract some times from those: just before the beginning of i1 (which 

2431 # should be after the end of the exposure before), exactly the 

2432 # beginning of i2, just after the beginning of i3 (and before its end), 

2433 # and the exact end of i4. 

2434 t1 = visits[ids[i1]].begin - astropy.time.TimeDelta(1.0, format="sec") 

2435 self.assertGreater(t1, visits[ids[i1 - 1]].end) 

2436 t2 = visits[ids[i2]].begin 

2437 t3 = visits[ids[i3]].begin + astropy.time.TimeDelta(1.0, format="sec") 

2438 self.assertLess(t3, visits[ids[i3]].end) 

2439 t4 = visits[ids[i4]].end 

2440 # Make sure those are actually in order. 

2441 self.assertEqual([t1, t2, t3, t4], sorted([t4, t3, t2, t1])) 

2442 

2443 bind = { 

2444 "t1": t1, 

2445 "t2": t2, 

2446 "t3": t3, 

2447 "t4": t4, 

2448 "ts23": Timespan(t2, t3), 

2449 } 

2450 

2451 def query(where): 

2452 """Return results as a sorted, deduplicated list of visit IDs.""" 

2453 return sorted( 

2454 { 

2455 dataId["visit"] 

2456 for dataId in registry.queryDataIds("visit", instrument="HSC", bind=bind, where=where) 

2457 } 

2458 ) 

2459 

2460 # Try a bunch of timespan queries, mixing up the bounds themselves, 

2461 # where they appear in the expression, and how we get the timespan into 

2462 # the expression. 

2463 

2464 # t1 is before the start of i1, so this should not include i1. 

2465 self.assertEqual(ids[:i1], query("visit.timespan OVERLAPS (null, t1)")) 

2466 # t2 is exactly at the start of i2, but ends are exclusive, so these 

2467 # should not include i2. 

2468 self.assertEqual(ids[i1:i2], query("(t1, t2) OVERLAPS visit.timespan")) 

2469 self.assertEqual(ids[:i2], query("visit.timespan < (t2, t4)")) 

2470 # t3 is in the middle of i3, so this should include i3. 

2471 self.assertEqual(ids[i2 : i3 + 1], query("visit.timespan OVERLAPS ts23")) 

2472 # This one should not include t3 by the same reasoning. 

2473 self.assertEqual(ids[i3 + 1 :], query("visit.timespan > (t1, t3)")) 

2474 # t4 is exactly at the end of i4, so this should include i4. 

2475 self.assertEqual(ids[i3 : i4 + 1], query(f"visit.timespan OVERLAPS (T'{t3.tai.isot}', t4)")) 

2476 # i4's upper bound of t4 is exclusive so this should not include t4. 

2477 self.assertEqual(ids[i4 + 1 :], query("visit.timespan OVERLAPS (t4, NULL)")) 

2478 

2479 # Now some timespan vs. time scalar queries. 

2480 self.assertEqual(ids[:i2], query("visit.timespan < t2")) 

2481 self.assertEqual(ids[:i2], query("t2 > visit.timespan")) 

2482 self.assertEqual(ids[i3 + 1 :], query("visit.timespan > t3")) 

2483 self.assertEqual(ids[i3 + 1 :], query("t3 < visit.timespan")) 

2484 self.assertEqual(ids[i3 : i3 + 1], query("visit.timespan OVERLAPS t3")) 

2485 self.assertEqual(ids[i3 : i3 + 1], query(f"T'{t3.tai.isot}' OVERLAPS visit.timespan")) 

2486 

2487 # Empty timespans should not overlap anything. 

2488 self.assertEqual([], query("visit.timespan OVERLAPS (t3, t2)")) 

2489 

2490 def testCollectionSummaries(self): 

2491 """Test recording and retrieval of collection summaries.""" 

2492 self.maxDiff = None 

2493 registry = self.makeRegistry() 

2494 # Importing datasets from yaml should go through the code path where 

2495 # we update collection summaries as we insert datasets. 

2496 self.loadData(registry, "base.yaml") 

2497 self.loadData(registry, "datasets.yaml") 

2498 flat = registry.getDatasetType("flat") 

2499 expected1 = CollectionSummary() 

2500 expected1.dataset_types.add(registry.getDatasetType("bias")) 

2501 expected1.add_data_ids( 

2502 flat, [DataCoordinate.standardize(instrument="Cam1", universe=registry.dimensions)] 

2503 ) 

2504 self.assertEqual(registry.getCollectionSummary("imported_g"), expected1) 

2505 self.assertEqual(registry.getCollectionSummary("imported_r"), expected1) 

2506 # Create a chained collection with both of the imported runs; the 

2507 # summary should be the same, because it's a union with itself. 

2508 chain = "chain" 

2509 registry.registerCollection(chain, CollectionType.CHAINED) 

2510 registry.setCollectionChain(chain, ["imported_r", "imported_g"]) 

2511 self.assertEqual(registry.getCollectionSummary(chain), expected1) 

2512 # Associate flats only into a tagged collection and a calibration 

2513 # collection to check summaries of those. 

2514 tag = "tag" 

2515 registry.registerCollection(tag, CollectionType.TAGGED) 

2516 registry.associate(tag, registry.queryDatasets(flat, collections="imported_g")) 

2517 calibs = "calibs" 

2518 registry.registerCollection(calibs, CollectionType.CALIBRATION) 

2519 registry.certify( 

2520 calibs, registry.queryDatasets(flat, collections="imported_g"), timespan=Timespan(None, None) 

2521 ) 

2522 expected2 = expected1.copy() 

2523 expected2.dataset_types.discard("bias") 

2524 self.assertEqual(registry.getCollectionSummary(tag), expected2) 

2525 self.assertEqual(registry.getCollectionSummary(calibs), expected2) 

2526 # Explicitly calling Registry.refresh() should load those same 

2527 # summaries, via a totally different code path. 

2528 registry.refresh() 

2529 self.assertEqual(registry.getCollectionSummary("imported_g"), expected1) 

2530 self.assertEqual(registry.getCollectionSummary("imported_r"), expected1) 

2531 self.assertEqual(registry.getCollectionSummary(tag), expected2) 

2532 self.assertEqual(registry.getCollectionSummary(calibs), expected2) 

2533 

2534 def testBindInQueryDatasets(self): 

2535 """Test that the bind parameter is correctly forwarded in 

2536 queryDatasets recursion. 

2537 """ 

2538 registry = self.makeRegistry() 

2539 # Importing datasets from yaml should go through the code path where 

2540 # we update collection summaries as we insert datasets. 

2541 self.loadData(registry, "base.yaml") 

2542 self.loadData(registry, "datasets.yaml") 

2543 self.assertEqual( 

2544 set(registry.queryDatasets("flat", band="r", collections=...)), 

2545 set(registry.queryDatasets("flat", where="band=my_band", bind={"my_band": "r"}, collections=...)), 

2546 ) 

2547 

2548 def testQueryIntRangeExpressions(self): 

2549 """Test integer range expressions in ``where`` arguments. 

2550 

2551 Note that our expressions use inclusive stop values, unlike Python's. 

2552 """ 

2553 registry = self.makeRegistry() 

2554 self.loadData(registry, "base.yaml") 

2555 self.assertEqual( 

2556 set(registry.queryDataIds(["detector"], instrument="Cam1", where="detector IN (1..2)")), 

2557 {registry.expandDataId(instrument="Cam1", detector=n) for n in [1, 2]}, 

2558 ) 

2559 self.assertEqual( 

2560 set(registry.queryDataIds(["detector"], instrument="Cam1", where="detector IN (1..4:2)")), 

2561 {registry.expandDataId(instrument="Cam1", detector=n) for n in [1, 3]}, 

2562 ) 

2563 self.assertEqual( 

2564 set(registry.queryDataIds(["detector"], instrument="Cam1", where="detector IN (2..4:2)")), 

2565 {registry.expandDataId(instrument="Cam1", detector=n) for n in [2, 4]}, 

2566 ) 

2567 

2568 def testQueryResultSummaries(self): 

2569 """Test summary methods like `count`, `any`, and `explain_no_results` 

2570 on `DataCoordinateQueryResults` and `DatasetQueryResults`. 

2571 """ 

2572 registry = self.makeRegistry() 

2573 self.loadData(registry, "base.yaml") 

2574 self.loadData(registry, "datasets.yaml") 

2575 self.loadData(registry, "spatial.yaml") 

2576 # Default test dataset has two collections, each with both flats and 

2577 # biases. Add a new collection with only biases. 

2578 registry.registerCollection("biases", CollectionType.TAGGED) 

2579 registry.associate("biases", registry.queryDatasets("bias", collections=["imported_g"])) 

2580 # First query yields two results, and involves no postprocessing. 

2581 query1 = registry.queryDataIds(["physical_filter"], band="r") 

2582 self.assertTrue(query1.any(execute=False, exact=False)) 

2583 self.assertTrue(query1.any(execute=True, exact=False)) 

2584 self.assertTrue(query1.any(execute=True, exact=True)) 

2585 self.assertEqual(query1.count(exact=False), 2) 

2586 self.assertEqual(query1.count(exact=True), 2) 

2587 self.assertFalse(list(query1.explain_no_results())) 

2588 # Second query should yield no results, which we should see when 

2589 # we attempt to expand the data ID. 

2590 query2 = registry.queryDataIds(["physical_filter"], band="h") 

2591 # There's no execute=False, exact=Fals test here because the behavior 

2592 # not something we want to guarantee in this case (and exact=False 

2593 # says either answer is legal). 

2594 self.assertFalse(query2.any(execute=True, exact=False)) 

2595 self.assertFalse(query2.any(execute=True, exact=True)) 

2596 self.assertEqual(query2.count(exact=False), 0) 

2597 self.assertEqual(query2.count(exact=True), 0) 

2598 self.assertTrue(list(query2.explain_no_results())) 

2599 # These queries yield no results due to various problems that can be 

2600 # spotted prior to execution, yielding helpful diagnostics. 

2601 base_query = registry.queryDataIds(["detector", "physical_filter"]) 

2602 queries_and_snippets = [ 

2603 ( 

2604 # Dataset type name doesn't match any existing dataset types. 

2605 registry.queryDatasets("nonexistent", collections=...), 

2606 ["nonexistent"], 

2607 ), 

2608 ( 

2609 # Dataset type object isn't registered. 

2610 registry.queryDatasets( 

2611 DatasetType( 

2612 "nonexistent", 

2613 dimensions=["instrument"], 

2614 universe=registry.dimensions, 

2615 storageClass="Image", 

2616 ), 

2617 collections=..., 

2618 ), 

2619 ["nonexistent"], 

2620 ), 

2621 ( 

2622 # No datasets of this type in this collection. 

2623 registry.queryDatasets("flat", collections=["biases"]), 

2624 ["flat", "biases"], 

2625 ), 

2626 ( 

2627 # No datasets of this type in this collection. 

2628 base_query.findDatasets("flat", collections=["biases"]), 

2629 ["flat", "biases"], 

2630 ), 

2631 ( 

2632 # No collections matching at all. 

2633 registry.queryDatasets("flat", collections=re.compile("potato.+")), 

2634 ["potato"], 

2635 ), 

2636 ] 

2637 # The behavior of these additional queries is slated to change in the 

2638 # future, so we also check for deprecation warnings. 

2639 with self.assertWarns(FutureWarning): 

2640 queries_and_snippets.append( 

2641 ( 

2642 # Dataset type name doesn't match any existing dataset 

2643 # types. 

2644 registry.queryDataIds(["detector"], datasets=["nonexistent"], collections=...), 

2645 ["nonexistent"], 

2646 ) 

2647 ) 

2648 with self.assertWarns(FutureWarning): 

2649 queries_and_snippets.append( 

2650 ( 

2651 # Dataset type name doesn't match any existing dataset 

2652 # types. 

2653 registry.queryDimensionRecords("detector", datasets=["nonexistent"], collections=...), 

2654 ["nonexistent"], 

2655 ) 

2656 ) 

2657 for query, snippets in queries_and_snippets: 

2658 self.assertFalse(query.any(execute=False, exact=False)) 

2659 self.assertFalse(query.any(execute=True, exact=False)) 

2660 self.assertFalse(query.any(execute=True, exact=True)) 

2661 self.assertEqual(query.count(exact=False), 0) 

2662 self.assertEqual(query.count(exact=True), 0) 

2663 messages = list(query.explain_no_results()) 

2664 self.assertTrue(messages) 

2665 # Want all expected snippets to appear in at least one message. 

2666 self.assertTrue( 

2667 any( 

2668 all(snippet in message for snippet in snippets) for message in query.explain_no_results() 

2669 ), 

2670 messages, 

2671 ) 

2672 

2673 # This query does yield results, but should also emit a warning because 

2674 # dataset type patterns to queryDataIds is deprecated; just look for 

2675 # the warning. 

2676 with self.assertWarns(FutureWarning): 

2677 registry.queryDataIds(["detector"], datasets=re.compile("^nonexistent$"), collections=...) 

2678 

2679 # These queries yield no results due to problems that can be identified 

2680 # by cheap follow-up queries, yielding helpful diagnostics. 

2681 for query, snippets in [ 

2682 ( 

2683 # No records for one of the involved dimensions. 

2684 registry.queryDataIds(["subfilter"]), 

2685 ["no rows", "subfilter"], 

2686 ), 

2687 ( 

2688 # No records for one of the involved dimensions. 

2689 registry.queryDimensionRecords("subfilter"), 

2690 ["no rows", "subfilter"], 

2691 ), 

2692 ]: 

2693 self.assertFalse(query.any(execute=True, exact=False)) 

2694 self.assertFalse(query.any(execute=True, exact=True)) 

2695 self.assertEqual(query.count(exact=True), 0) 

2696 messages = list(query.explain_no_results()) 

2697 self.assertTrue(messages) 

2698 # Want all expected snippets to appear in at least one message. 

2699 self.assertTrue( 

2700 any( 

2701 all(snippet in message for snippet in snippets) for message in query.explain_no_results() 

2702 ), 

2703 messages, 

2704 ) 

2705 

2706 # This query yields four overlaps in the database, but one is filtered 

2707 # out in postprocessing. The count queries aren't accurate because 

2708 # they don't account for duplication that happens due to an internal 

2709 # join against commonSkyPix. 

2710 query3 = registry.queryDataIds(["visit", "tract"], instrument="Cam1", skymap="SkyMap1") 

2711 self.assertEqual( 

2712 { 

2713 DataCoordinate.standardize( 

2714 instrument="Cam1", 

2715 skymap="SkyMap1", 

2716 visit=v, 

2717 tract=t, 

2718 universe=registry.dimensions, 

2719 ) 

2720 for v, t in [(1, 0), (2, 0), (2, 1)] 

2721 }, 

2722 set(query3), 

2723 ) 

2724 self.assertTrue(query3.any(execute=False, exact=False)) 

2725 self.assertTrue(query3.any(execute=True, exact=False)) 

2726 self.assertTrue(query3.any(execute=True, exact=True)) 

2727 self.assertGreaterEqual(query3.count(exact=False), 4) 

2728 self.assertGreaterEqual(query3.count(exact=True, discard=True), 3) 

2729 self.assertFalse(list(query3.explain_no_results())) 

2730 # This query yields overlaps in the database, but all are filtered 

2731 # out in postprocessing. The count queries again aren't very useful. 

2732 # We have to use `where=` here to avoid an optimization that 

2733 # (currently) skips the spatial postprocess-filtering because it 

2734 # recognizes that no spatial join is necessary. That's not ideal, but 

2735 # fixing it is out of scope for this ticket. 

2736 query4 = registry.queryDataIds( 

2737 ["visit", "tract"], 

2738 instrument="Cam1", 

2739 skymap="SkyMap1", 

2740 where="visit=1 AND detector=1 AND tract=0 AND patch=4", 

2741 ) 

2742 self.assertFalse(set(query4)) 

2743 self.assertTrue(query4.any(execute=False, exact=False)) 

2744 self.assertTrue(query4.any(execute=True, exact=False)) 

2745 self.assertFalse(query4.any(execute=True, exact=True)) 

2746 self.assertGreaterEqual(query4.count(exact=False), 1) 

2747 self.assertEqual(query4.count(exact=True, discard=True), 0) 

2748 messages = query4.explain_no_results() 

2749 self.assertTrue(messages) 

2750 self.assertTrue(any("overlap" in message for message in messages)) 

2751 # This query should yield results from one dataset type but not the 

2752 # other, which is not registered. 

2753 query5 = registry.queryDatasets(["bias", "nonexistent"], collections=["biases"]) 

2754 self.assertTrue(set(query5)) 

2755 self.assertTrue(query5.any(execute=False, exact=False)) 

2756 self.assertTrue(query5.any(execute=True, exact=False)) 

2757 self.assertTrue(query5.any(execute=True, exact=True)) 

2758 self.assertGreaterEqual(query5.count(exact=False), 1) 

2759 self.assertGreaterEqual(query5.count(exact=True), 1) 

2760 self.assertFalse(list(query5.explain_no_results())) 

2761 # This query applies a selection that yields no results, fully in the 

2762 # database. Explaining why it fails involves traversing the relation 

2763 # tree and running a LIMIT 1 query at each level that has the potential 

2764 # to remove rows. 

2765 query6 = registry.queryDimensionRecords( 

2766 "detector", where="detector.purpose = 'no-purpose'", instrument="Cam1" 

2767 ) 

2768 self.assertEqual(query6.count(exact=True), 0) 

2769 messages = query6.explain_no_results() 

2770 self.assertTrue(messages) 

2771 self.assertTrue(any("no-purpose" in message for message in messages)) 

2772 

2773 def testQueryDataIdsExpressionError(self): 

2774 """Test error checking of 'where' expressions in queryDataIds.""" 

2775 registry = self.makeRegistry() 

2776 self.loadData(registry, "base.yaml") 

2777 bind = {"time": astropy.time.Time("2020-01-01T01:00:00", format="isot", scale="tai")} 

2778 with self.assertRaisesRegex(LookupError, r"No dimension element with name 'foo' in 'foo\.bar'\."): 

2779 registry.queryDataIds(["detector"], where="foo.bar = 12") 

2780 with self.assertRaisesRegex( 

2781 LookupError, "Dimension element name cannot be inferred in this context." 

2782 ): 

2783 registry.queryDataIds(["detector"], where="timespan.end < time", bind=bind) 

2784 

2785 def testQueryDataIdsOrderBy(self): 

2786 """Test order_by and limit on result returned by queryDataIds().""" 

2787 registry = self.makeRegistry() 

2788 self.loadData(registry, "base.yaml") 

2789 self.loadData(registry, "datasets.yaml") 

2790 self.loadData(registry, "spatial.yaml") 

2791 

2792 def do_query(dimensions=("visit", "tract"), datasets=None, collections=None): 

2793 return registry.queryDataIds( 

2794 dimensions, datasets=datasets, collections=collections, instrument="Cam1", skymap="SkyMap1" 

2795 ) 

2796 

2797 Test = namedtuple( 

2798 "testQueryDataIdsOrderByTest", 

2799 ("order_by", "keys", "result", "limit", "datasets", "collections"), 

2800 defaults=(None, None, None), 

2801 ) 

2802 

2803 test_data = ( 

2804 Test("tract,visit", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))), 

2805 Test("-tract,visit", "tract,visit", ((1, 2), (1, 2), (0, 1), (0, 1), (0, 2), (0, 2))), 

2806 Test("tract,-visit", "tract,visit", ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2))), 

2807 Test("-tract,-visit", "tract,visit", ((1, 2), (1, 2), (0, 2), (0, 2), (0, 1), (0, 1))), 

2808 Test( 

2809 "tract.id,visit.id", 

2810 "tract,visit", 

2811 ((0, 1), (0, 1), (0, 2)), 

2812 limit=(3,), 

2813 ), 

2814 Test("-tract,-visit", "tract,visit", ((1, 2), (1, 2), (0, 2)), limit=(3,)), 

2815 Test("tract,visit", "tract,visit", ((0, 2), (1, 2), (1, 2)), limit=(3, 3)), 

2816 Test("-tract,-visit", "tract,visit", ((0, 1),), limit=(3, 5)), 

2817 Test( 

2818 "tract,visit.exposure_time", "tract,visit", ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2)) 

2819 ), 

2820 Test( 

2821 "-tract,-visit.exposure_time", "tract,visit", ((1, 2), (1, 2), (0, 1), (0, 1), (0, 2), (0, 2)) 

2822 ), 

2823 Test("tract,-exposure_time", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))), 

2824 Test("tract,visit.name", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))), 

2825 Test( 

2826 "tract,-timespan.begin,timespan.end", 

2827 "tract,visit", 

2828 ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2)), 

2829 ), 

2830 Test("visit.day_obs,exposure.day_obs", "visit,exposure", ()), 

2831 Test("visit.timespan.begin,-exposure.timespan.begin", "visit,exposure", ()), 

2832 Test( 

2833 "tract,detector", 

2834 "tract,detector", 

2835 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)), 

2836 datasets="flat", 

2837 collections="imported_r", 

2838 ), 

2839 Test( 

2840 "tract,detector.full_name", 

2841 "tract,detector", 

2842 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)), 

2843 datasets="flat", 

2844 collections="imported_r", 

2845 ), 

2846 Test( 

2847 "tract,detector.raft,detector.name_in_raft", 

2848 "tract,detector", 

2849 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)), 

2850 datasets="flat", 

2851 collections="imported_r", 

2852 ), 

2853 ) 

2854 

2855 for test in test_data: 

2856 order_by = test.order_by.split(",") 

2857 keys = test.keys.split(",") 

2858 query = do_query(keys, test.datasets, test.collections).order_by(*order_by) 

2859 if test.limit is not None: 

2860 query = query.limit(*test.limit) 

2861 dataIds = tuple(tuple(dataId[k] for k in keys) for dataId in query) 

2862 self.assertEqual(dataIds, test.result) 

2863 

2864 # and materialize 

2865 query = do_query(keys).order_by(*order_by) 

2866 if test.limit is not None: 

2867 query = query.limit(*test.limit) 

2868 with self.assertRaises(RelationalAlgebraError): 

2869 with query.materialize(): 

2870 pass 

2871 

2872 # errors in a name 

2873 for order_by in ("", "-"): 

2874 with self.assertRaisesRegex(ValueError, "Empty dimension name in ORDER BY"): 

2875 list(do_query().order_by(order_by)) 

2876 

2877 for order_by in ("undimension.name", "-undimension.name"): 

2878 with self.assertRaisesRegex(ValueError, "Unknown dimension element 'undimension'"): 

2879 list(do_query().order_by(order_by)) 

2880 

2881 for order_by in ("attract", "-attract"): 

2882 with self.assertRaisesRegex(ValueError, "Metadata 'attract' cannot be found in any dimension"): 

2883 list(do_query().order_by(order_by)) 

2884 

2885 with self.assertRaisesRegex(ValueError, "Metadata 'exposure_time' exists in more than one dimension"): 

2886 list(do_query(("exposure", "visit")).order_by("exposure_time")) 

2887 

2888 with self.assertRaisesRegex( 

2889 ValueError, 

2890 r"Timespan exists in more than one dimension element \(exposure, visit\); " 

2891 r"qualify timespan with specific dimension name\.", 

2892 ): 

2893 list(do_query(("exposure", "visit")).order_by("timespan.begin")) 

2894 

2895 with self.assertRaisesRegex( 

2896 ValueError, "Cannot find any temporal dimension element for 'timespan.begin'" 

2897 ): 

2898 list(do_query("tract").order_by("timespan.begin")) 

2899 

2900 with self.assertRaisesRegex(ValueError, "Cannot use 'timespan.begin' with non-temporal element"): 

2901 list(do_query("tract").order_by("tract.timespan.begin")) 

2902 

2903 with self.assertRaisesRegex(ValueError, "Field 'name' does not exist in 'tract'."): 

2904 list(do_query("tract").order_by("tract.name")) 

2905 

2906 with self.assertRaisesRegex( 

2907 ValueError, r"Unknown dimension element 'timestamp'; perhaps you meant 'timespan.begin'\?" 

2908 ): 

2909 list(do_query("visit").order_by("timestamp.begin")) 

2910 

2911 def testQueryDataIdsGovernorExceptions(self): 

2912 """Test exceptions raised by queryDataIds() for incorrect governors.""" 

2913 registry = self.makeRegistry() 

2914 self.loadData(registry, "base.yaml") 

2915 self.loadData(registry, "datasets.yaml") 

2916 self.loadData(registry, "spatial.yaml") 

2917 

2918 def do_query(dimensions, dataId=None, where="", bind=None, **kwargs): 

2919 return registry.queryDataIds(dimensions, dataId=dataId, where=where, bind=bind, **kwargs) 

2920 

2921 Test = namedtuple( 

2922 "testQueryDataIdExceptionsTest", 

2923 ("dimensions", "dataId", "where", "bind", "kwargs", "exception", "count"), 

2924 defaults=(None, None, None, {}, None, 0), 

2925 ) 

2926 

2927 test_data = ( 

2928 Test("tract,visit", count=6), 

2929 Test("tract,visit", kwargs={"instrument": "Cam1", "skymap": "SkyMap1"}, count=6), 

2930 Test( 

2931 "tract,visit", kwargs={"instrument": "Cam2", "skymap": "SkyMap1"}, exception=DataIdValueError 

2932 ), 

2933 Test("tract,visit", dataId={"instrument": "Cam1", "skymap": "SkyMap1"}, count=6), 

2934 Test( 

2935 "tract,visit", dataId={"instrument": "Cam1", "skymap": "SkyMap2"}, exception=DataIdValueError 

2936 ), 

2937 Test("tract,visit", where="instrument='Cam1' AND skymap='SkyMap1'", count=6), 

2938 Test("tract,visit", where="instrument='Cam1' AND skymap='SkyMap5'", exception=DataIdValueError), 

2939 Test( 

2940 "tract,visit", 

2941 where="instrument=cam AND skymap=map", 

2942 bind={"cam": "Cam1", "map": "SkyMap1"}, 

2943 count=6, 

2944 ), 

2945 Test( 

2946 "tract,visit", 

2947 where="instrument=cam AND skymap=map", 

2948 bind={"cam": "Cam", "map": "SkyMap"}, 

2949 exception=DataIdValueError, 

2950 ), 

2951 ) 

2952 

2953 for test in test_data: 

2954 dimensions = test.dimensions.split(",") 

2955 if test.exception: 

2956 with self.assertRaises(test.exception): 

2957 do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs).count() 

2958 else: 

2959 query = do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs) 

2960 self.assertEqual(query.count(discard=True), test.count) 

2961 

2962 # and materialize 

2963 if test.exception: 

2964 with self.assertRaises(test.exception): 

2965 query = do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs) 

2966 with query.materialize() as materialized: 

2967 materialized.count(discard=True) 

2968 else: 

2969 query = do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs) 

2970 with query.materialize() as materialized: 

2971 self.assertEqual(materialized.count(discard=True), test.count) 

2972 

2973 def testQueryDimensionRecordsOrderBy(self): 

2974 """Test order_by and limit on result returned by 

2975 queryDimensionRecords(). 

2976 """ 

2977 registry = self.makeRegistry() 

2978 self.loadData(registry, "base.yaml") 

2979 self.loadData(registry, "datasets.yaml") 

2980 self.loadData(registry, "spatial.yaml") 

2981 

2982 def do_query(element, datasets=None, collections=None): 

2983 return registry.queryDimensionRecords( 

2984 element, instrument="Cam1", datasets=datasets, collections=collections 

2985 ) 

2986 

2987 query = do_query("detector") 

2988 self.assertEqual(len(list(query)), 4) 

2989 

2990 Test = namedtuple( 

2991 "testQueryDataIdsOrderByTest", 

2992 ("element", "order_by", "result", "limit", "datasets", "collections"), 

2993 defaults=(None, None, None), 

2994 ) 

2995 

2996 test_data = ( 

2997 Test("detector", "detector", (1, 2, 3, 4)), 

2998 Test("detector", "-detector", (4, 3, 2, 1)), 

2999 Test("detector", "raft,-name_in_raft", (2, 1, 4, 3)), 

3000 Test("detector", "-detector.purpose", (4,), limit=(1,)), 

3001 Test("detector", "-purpose,detector.raft,name_in_raft", (2, 3), limit=(2, 2)), 

3002 Test("visit", "visit", (1, 2)), 

3003 Test("visit", "-visit.id", (2, 1)), 

3004 Test("visit", "zenith_angle", (1, 2)), 

3005 Test("visit", "-visit.name", (2, 1)), 

3006 Test("visit", "day_obs,-timespan.begin", (2, 1)), 

3007 ) 

3008 

3009 for test in test_data: 

3010 order_by = test.order_by.split(",") 

3011 query = do_query(test.element).order_by(*order_by) 

3012 if test.limit is not None: 

3013 query = query.limit(*test.limit) 

3014 dataIds = tuple(rec.id for rec in query) 

3015 self.assertEqual(dataIds, test.result) 

3016 

3017 # errors in a name 

3018 for order_by in ("", "-"): 

3019 with self.assertRaisesRegex(ValueError, "Empty dimension name in ORDER BY"): 

3020 list(do_query("detector").order_by(order_by)) 

3021 

3022 for order_by in ("undimension.name", "-undimension.name"): 

3023 with self.assertRaisesRegex(ValueError, "Element name mismatch: 'undimension'"): 

3024 list(do_query("detector").order_by(order_by)) 

3025 

3026 for order_by in ("attract", "-attract"): 

3027 with self.assertRaisesRegex(ValueError, "Field 'attract' does not exist in 'detector'."): 

3028 list(do_query("detector").order_by(order_by)) 

3029 

3030 for order_by in ("timestamp.begin", "-timestamp.begin"): 

3031 with self.assertRaisesRegex( 

3032 ValueError, 

3033 r"Element name mismatch: 'timestamp' instead of 'visit'; " 

3034 r"perhaps you meant 'timespan.begin'\?", 

3035 ): 

3036 list(do_query("visit").order_by(order_by)) 

3037 

3038 def testQueryDimensionRecordsExceptions(self): 

3039 """Test exceptions raised by queryDimensionRecords().""" 

3040 registry = self.makeRegistry() 

3041 self.loadData(registry, "base.yaml") 

3042 self.loadData(registry, "datasets.yaml") 

3043 self.loadData(registry, "spatial.yaml") 

3044 

3045 result = registry.queryDimensionRecords("detector") 

3046 self.assertEqual(result.count(), 4) 

3047 result = registry.queryDimensionRecords("detector", instrument="Cam1") 

3048 self.assertEqual(result.count(), 4) 

3049 result = registry.queryDimensionRecords("detector", dataId={"instrument": "Cam1"}) 

3050 self.assertEqual(result.count(), 4) 

3051 result = registry.queryDimensionRecords("detector", where="instrument='Cam1'") 

3052 self.assertEqual(result.count(), 4) 

3053 result = registry.queryDimensionRecords("detector", where="instrument=instr", bind={"instr": "Cam1"}) 

3054 self.assertEqual(result.count(), 4) 

3055 

3056 with self.assertRaisesRegex(DataIdValueError, "dimension instrument"): 

3057 result = registry.queryDimensionRecords("detector", instrument="NotCam1") 

3058 result.count() 

3059 

3060 with self.assertRaisesRegex(DataIdValueError, "dimension instrument"): 

3061 result = registry.queryDimensionRecords("detector", dataId={"instrument": "NotCam1"}) 

3062 result.count() 

3063 

3064 with self.assertRaisesRegex(DataIdValueError, "Unknown values specified for governor dimension"): 

3065 result = registry.queryDimensionRecords("detector", where="instrument='NotCam1'") 

3066 result.count() 

3067 

3068 with self.assertRaisesRegex(DataIdValueError, "Unknown values specified for governor dimension"): 

3069 result = registry.queryDimensionRecords( 

3070 "detector", where="instrument=instr", bind={"instr": "NotCam1"} 

3071 ) 

3072 result.count() 

3073 

3074 def testDatasetConstrainedDimensionRecordQueries(self): 

3075 """Test that queryDimensionRecords works even when given a dataset 

3076 constraint whose dimensions extend beyond the requested dimension 

3077 element's. 

3078 """ 

3079 registry = self.makeRegistry() 

3080 self.loadData(registry, "base.yaml") 

3081 self.loadData(registry, "datasets.yaml") 

3082 # Query for physical_filter dimension records, using a dataset that 

3083 # has both physical_filter and dataset dimensions. 

3084 records = registry.queryDimensionRecords( 

3085 "physical_filter", 

3086 datasets=["flat"], 

3087 collections="imported_r", 

3088 ) 

3089 self.assertEqual({record.name for record in records}, {"Cam1-R1", "Cam1-R2"}) 

3090 # Trying to constrain by all dataset types is an error. 

3091 with self.assertRaises(TypeError): 

3092 list(registry.queryDimensionRecords("physical_filter", datasets=..., collections="imported_r")) 

3093 

3094 def testSkyPixDatasetQueries(self): 

3095 """Test that we can build queries involving skypix dimensions as long 

3096 as a dataset type that uses those dimensions is included. 

3097 """ 

3098 registry = self.makeRegistry() 

3099 self.loadData(registry, "base.yaml") 

3100 dataset_type = DatasetType( 

3101 "a", dimensions=["htm7", "instrument"], universe=registry.dimensions, storageClass="int" 

3102 ) 

3103 registry.registerDatasetType(dataset_type) 

3104 run = "r" 

3105 registry.registerRun(run) 

3106 # First try queries where there are no datasets; the concern is whether 

3107 # we can even build and execute these queries without raising, even 

3108 # when "doomed" query shortcuts are in play. 

3109 self.assertFalse( 

3110 list(registry.queryDataIds(["htm7", "instrument"], datasets=dataset_type, collections=run)) 

3111 ) 

3112 self.assertFalse(list(registry.queryDatasets(dataset_type, collections=run))) 

3113 # Now add a dataset and see that we can get it back. 

3114 htm7 = registry.dimensions.skypix["htm"][7].pixelization 

3115 data_id = registry.expandDataId(instrument="Cam1", htm7=htm7.universe()[0][0]) 

3116 (ref,) = registry.insertDatasets(dataset_type, [data_id], run=run) 

3117 self.assertEqual( 

3118 set(registry.queryDataIds(["htm7", "instrument"], datasets=dataset_type, collections=run)), 

3119 {data_id}, 

3120 ) 

3121 self.assertEqual(set(registry.queryDatasets(dataset_type, collections=run)), {ref}) 

3122 

3123 def testDatasetIdFactory(self): 

3124 """Simple test for DatasetIdFactory, mostly to catch potential changes 

3125 in its API. 

3126 """ 

3127 registry = self.makeRegistry() 

3128 factory = DatasetIdFactory() 

3129 dataset_type = DatasetType( 

3130 "datasetType", 

3131 dimensions=["detector", "instrument"], 

3132 universe=registry.dimensions, 

3133 storageClass="int", 

3134 ) 

3135 run = "run" 

3136 data_id = DataCoordinate.standardize(instrument="Cam1", detector=1, graph=dataset_type.dimensions) 

3137 

3138 datasetId = factory.makeDatasetId(run, dataset_type, data_id, DatasetIdGenEnum.UNIQUE) 

3139 self.assertIsInstance(datasetId, uuid.UUID) 

3140 self.assertEqual(datasetId.version, 4) 

3141 

3142 datasetId = factory.makeDatasetId(run, dataset_type, data_id, DatasetIdGenEnum.DATAID_TYPE) 

3143 self.assertIsInstance(datasetId, uuid.UUID) 

3144 self.assertEqual(datasetId.version, 5) 

3145 

3146 datasetId = factory.makeDatasetId(run, dataset_type, data_id, DatasetIdGenEnum.DATAID_TYPE_RUN) 

3147 self.assertIsInstance(datasetId, uuid.UUID) 

3148 self.assertEqual(datasetId.version, 5) 

3149 

3150 def testExposureQueries(self): 

3151 """Test query methods using arguments sourced from the exposure log 

3152 service. 

3153 

3154 The most complete test dataset currently available to daf_butler tests 

3155 is hsc-rc2-subset.yaml export (which is unfortunately distinct from the 

3156 the lsst/rc2_subset GitHub repo), but that does not have 'exposure' 

3157 dimension records as it was focused on providing nontrivial spatial 

3158 overlaps between visit+detector and tract+patch. So in this test we 

3159 need to translate queries that originally used the exposure dimension 

3160 to use the (very similar) visit dimension instead. 

3161 """ 

3162 registry = self.makeRegistry() 

3163 self.loadData(registry, "hsc-rc2-subset.yaml") 

3164 self.assertEqual( 

3165 [ 

3166 record.id 

3167 for record in registry.queryDimensionRecords("visit", instrument="HSC") 

3168 .order_by("id") 

3169 .limit(5) 

3170 ], 

3171 [318, 322, 326, 330, 332], 

3172 ) 

3173 self.assertEqual( 

3174 [ 

3175 data_id["visit"] 

3176 for data_id in registry.queryDataIds(["visit"], instrument="HSC").order_by("id").limit(5) 

3177 ], 

3178 [318, 322, 326, 330, 332], 

3179 ) 

3180 self.assertEqual( 

3181 [ 

3182 record.id 

3183 for record in registry.queryDimensionRecords("detector", instrument="HSC") 

3184 .order_by("full_name") 

3185 .limit(5) 

3186 ], 

3187 [73, 72, 71, 70, 65], 

3188 ) 

3189 self.assertEqual( 

3190 [ 

3191 data_id["detector"] 

3192 for data_id in registry.queryDataIds(["detector"], instrument="HSC") 

3193 .order_by("full_name") 

3194 .limit(5) 

3195 ], 

3196 [73, 72, 71, 70, 65], 

3197 ) 

3198 

3199 def test_long_query_names(self) -> None: 

3200 """Test that queries involving very long names are handled correctly. 

3201 

3202 This is especially important for PostgreSQL, which truncates symbols 

3203 longer than 64 chars, but it's worth testing for all DBs. 

3204 """ 

3205 registry = self.makeRegistry() 

3206 name = "abcd" * 17 

3207 registry.registerDatasetType( 

3208 DatasetType( 

3209 name, 

3210 dimensions=(), 

3211 storageClass="Exposure", 

3212 universe=registry.dimensions, 

3213 ) 

3214 ) 

3215 # Need to search more than one collection actually containing a 

3216 # matching dataset to avoid optimizations that sidestep bugs due to 

3217 # truncation by making findFirst=True a no-op. 

3218 run1 = "run1" 

3219 registry.registerRun(run1) 

3220 run2 = "run2" 

3221 registry.registerRun(run2) 

3222 (ref1,) = registry.insertDatasets(name, [DataCoordinate.makeEmpty(registry.dimensions)], run1) 

3223 registry.insertDatasets(name, [DataCoordinate.makeEmpty(registry.dimensions)], run2) 

3224 self.assertEqual( 

3225 set(registry.queryDatasets(name, collections=[run1, run2], findFirst=True)), 

3226 {ref1}, 

3227 ) 

3228 

3229 def test_skypix_constraint_queries(self) -> None: 

3230 """Test queries spatially constrained by a skypix data ID.""" 

3231 registry = self.makeRegistry() 

3232 self.loadData(registry, "hsc-rc2-subset.yaml") 

3233 patch_regions = { 

3234 (data_id["tract"], data_id["patch"]): data_id.region 

3235 for data_id in registry.queryDataIds(["patch"]).expanded() 

3236 } 

3237 skypix_dimension: SkyPixDimension = registry.dimensions["htm11"] 

3238 # This check ensures the test doesn't become trivial due to a config 

3239 # change; if it does, just pick a different HTML level. 

3240 self.assertNotEqual(skypix_dimension, registry.dimensions.commonSkyPix) 

3241 # Gather all skypix IDs that definitely overlap at least one of these 

3242 # patches. 

3243 relevant_skypix_ids = lsst.sphgeom.RangeSet() 

3244 for patch_region in patch_regions.values(): 

3245 relevant_skypix_ids |= skypix_dimension.pixelization.interior(patch_region) 

3246 # Look for a "nontrivial" skypix_id that overlaps at least one patch 

3247 # and does not overlap at least one other patch. 

3248 for skypix_id in itertools.chain.from_iterable( 

3249 range(begin, end) for begin, end in relevant_skypix_ids 

3250 ): 

3251 skypix_region = skypix_dimension.pixelization.pixel(skypix_id) 

3252 overlapping_patches = { 

3253 patch_key 

3254 for patch_key, patch_region in patch_regions.items() 

3255 if not patch_region.isDisjointFrom(skypix_region) 

3256 } 

3257 if overlapping_patches and overlapping_patches != patch_regions.keys(): 

3258 break 

3259 else: 

3260 raise RuntimeError("Could not find usable skypix ID for this dimension configuration.") 

3261 self.assertEqual( 

3262 { 

3263 (data_id["tract"], data_id["patch"]) 

3264 for data_id in registry.queryDataIds( 

3265 ["patch"], 

3266 dataId={skypix_dimension.name: skypix_id}, 

3267 ) 

3268 }, 

3269 overlapping_patches, 

3270 ) 

3271 # Test that a three-way join that includes the common skypix system in 

3272 # the dimensions doesn't generate redundant join terms in the query. 

3273 full_data_ids = set( 

3274 registry.queryDataIds( 

3275 ["tract", "visit", "htm7"], skymap="hsc_rings_v1", instrument="HSC" 

3276 ).expanded() 

3277 ) 

3278 self.assertGreater(len(full_data_ids), 0) 

3279 for data_id in full_data_ids: 

3280 self.assertFalse(data_id.records["tract"].region.isDisjointFrom(data_id.records["htm7"].region)) 

3281 self.assertFalse(data_id.records["visit"].region.isDisjointFrom(data_id.records["htm7"].region)) 

3282 

3283 def test_spatial_constraint_queries(self) -> None: 

3284 """Test queries in which one spatial dimension in the constraint (data 

3285 ID or ``where`` string) constrains a different spatial dimension in the 

3286 query result columns. 

3287 """ 

3288 registry = self.makeRegistry() 

3289 self.loadData(registry, "hsc-rc2-subset.yaml") 

3290 patch_regions = { 

3291 (data_id["tract"], data_id["patch"]): data_id.region 

3292 for data_id in registry.queryDataIds(["patch"]).expanded() 

3293 } 

3294 observation_regions = { 

3295 (data_id["visit"], data_id["detector"]): data_id.region 

3296 for data_id in registry.queryDataIds(["visit", "detector"]).expanded() 

3297 } 

3298 all_combos = { 

3299 (patch_key, observation_key) 

3300 for patch_key, observation_key in itertools.product(patch_regions, observation_regions) 

3301 } 

3302 overlapping_combos = { 

3303 (patch_key, observation_key) 

3304 for patch_key, observation_key in all_combos 

3305 if not patch_regions[patch_key].isDisjointFrom(observation_regions[observation_key]) 

3306 } 

3307 # Check a direct spatial join with no constraint first. 

3308 self.assertEqual( 

3309 { 

3310 ((data_id["tract"], data_id["patch"]), (data_id["visit"], data_id["detector"])) 

3311 for data_id in registry.queryDataIds(["patch", "visit", "detector"]) 

3312 }, 

3313 overlapping_combos, 

3314 ) 

3315 overlaps_by_patch: defaultdict[tuple[int, int], set[tuple[str, str]]] = defaultdict(set) 

3316 overlaps_by_observation: defaultdict[tuple[int, int], set[tuple[str, str]]] = defaultdict(set) 

3317 for patch_key, observation_key in overlapping_combos: 

3318 overlaps_by_patch[patch_key].add(observation_key) 

3319 overlaps_by_observation[observation_key].add(patch_key) 

3320 # Find patches and observations that overlap at least one of the other 

3321 # but not all of the other. 

3322 nontrivial_patch = next( 

3323 iter( 

3324 patch_key 

3325 for patch_key, observation_keys in overlaps_by_patch.items() 

3326 if observation_keys and observation_keys != observation_regions.keys() 

3327 ) 

3328 ) 

3329 nontrivial_observation = next( 

3330 iter( 

3331 observation_key 

3332 for observation_key, patch_keys in overlaps_by_observation.items() 

3333 if patch_keys and patch_keys != patch_regions.keys() 

3334 ) 

3335 ) 

3336 # Use the nontrivial patches and observations as constraints on the 

3337 # other dimensions in various ways, first via a 'where' expression. 

3338 # It's better in general to us 'bind' instead of f-strings, but these 

3339 # all integers so there are no quoting concerns. 

3340 self.assertEqual( 

3341 { 

3342 (data_id["visit"], data_id["detector"]) 

3343 for data_id in registry.queryDataIds( 

3344 ["visit", "detector"], 

3345 where=f"tract={nontrivial_patch[0]} AND patch={nontrivial_patch[1]}", 

3346 skymap="hsc_rings_v1", 

3347 ) 

3348 }, 

3349 overlaps_by_patch[nontrivial_patch], 

3350 ) 

3351 self.assertEqual( 

3352 { 

3353 (data_id["tract"], data_id["patch"]) 

3354 for data_id in registry.queryDataIds( 

3355 ["patch"], 

3356 where=f"visit={nontrivial_observation[0]} AND detector={nontrivial_observation[1]}", 

3357 instrument="HSC", 

3358 ) 

3359 }, 

3360 overlaps_by_observation[nontrivial_observation], 

3361 ) 

3362 # and then via the dataId argument. 

3363 self.assertEqual( 

3364 { 

3365 (data_id["visit"], data_id["detector"]) 

3366 for data_id in registry.queryDataIds( 

3367 ["visit", "detector"], 

3368 dataId={ 

3369 "tract": nontrivial_patch[0], 

3370 "patch": nontrivial_patch[1], 

3371 }, 

3372 skymap="hsc_rings_v1", 

3373 ) 

3374 }, 

3375 overlaps_by_patch[nontrivial_patch], 

3376 ) 

3377 self.assertEqual( 

3378 { 

3379 (data_id["tract"], data_id["patch"]) 

3380 for data_id in registry.queryDataIds( 

3381 ["patch"], 

3382 dataId={ 

3383 "visit": nontrivial_observation[0], 

3384 "detector": nontrivial_observation[1], 

3385 }, 

3386 instrument="HSC", 

3387 ) 

3388 }, 

3389 overlaps_by_observation[nontrivial_observation], 

3390 ) 

3391 

3392 def test_query_projection_drop_postprocessing(self) -> None: 

3393 """Test that projections and deduplications on query objects can 

3394 drop post-query region filtering to ensure the query remains in 

3395 the SQL engine. 

3396 """ 

3397 registry = self.makeRegistry() 

3398 self.loadData(registry, "base.yaml") 

3399 self.loadData(registry, "spatial.yaml") 

3400 

3401 def pop_transfer(tree: Relation) -> Relation: 

3402 """If a relation tree terminates with a transfer to a new engine, 

3403 return the relation prior to that transfer. If not, return the 

3404 original relation. 

3405 """ 

3406 match tree: 

3407 case Transfer(target=target): 

3408 return target 

3409 case _: 

3410 return tree 

3411 

3412 # There's no public way to get a Query object yet, so we get one from a 

3413 # DataCoordinateQueryResults private attribute. When a public API is 

3414 # available this test should use it. 

3415 query = registry.queryDataIds(["visit", "detector", "tract", "patch"])._query 

3416 # We expect this query to terminate in the iteration engine originally, 

3417 # because region-filtering is necessary. 

3418 self.assertIsInstance(pop_transfer(query.relation).engine, iteration.Engine) 

3419 # If we deduplicate, we usually have to do that downstream of the 

3420 # filtering. That means the deduplication has to happen in the 

3421 # iteration engine. 

3422 self.assertIsInstance(pop_transfer(query.projected(unique=True).relation).engine, iteration.Engine) 

3423 # If we pass drop_postprocessing, we instead drop the region filtering 

3424 # so the deduplication can happen in SQL (though there might still be 

3425 # transfer to iteration at the tail of the tree that we can ignore; 

3426 # that's what the pop_transfer takes care of here). 

3427 self.assertIsInstance( 

3428 pop_transfer(query.projected(unique=True, drop_postprocessing=True).relation).engine, 

3429 sql.Engine, 

3430 ) 

3431 

3432 def test_query_find_datasets_drop_postprocessing(self) -> None: 

3433 """Test that DataCoordinateQueryResults.findDatasets avoids commutator 

3434 problems with the FindFirstDataset relation operation. 

3435 """ 

3436 # Setup: load some visit, tract, and patch records, and insert two 

3437 # datasets with dimensions {visit, patch}, with one in each of two 

3438 # RUN collections. 

3439 registry = self.makeRegistry() 

3440 self.loadData(registry, "base.yaml") 

3441 self.loadData(registry, "spatial.yaml") 

3442 storage_class = StorageClass("Warpy") 

3443 registry.storageClasses.registerStorageClass(storage_class) 

3444 dataset_type = DatasetType( 

3445 "warp", {"visit", "patch"}, storageClass=storage_class, universe=registry.dimensions 

3446 ) 

3447 registry.registerDatasetType(dataset_type) 

3448 (data_id,) = registry.queryDataIds(["visit", "patch"]).limit(1) 

3449 registry.registerRun("run1") 

3450 registry.registerRun("run2") 

3451 (ref1,) = registry.insertDatasets(dataset_type, [data_id], run="run1") 

3452 (ref2,) = registry.insertDatasets(dataset_type, [data_id], run="run2") 

3453 # Query for the dataset using queryDataIds(...).findDatasets(...) 

3454 # against only one of the two collections. This should work even 

3455 # though the relation returned by queryDataIds ends with 

3456 # iteration-engine region-filtering, because we can recognize before 

3457 # running the query that there is only one collecton to search and 

3458 # hence the (default) findFirst=True is irrelevant, and joining in the 

3459 # dataset query commutes past the iteration-engine postprocessing. 

3460 query1 = registry.queryDataIds( 

3461 {"visit", "patch"}, visit=data_id["visit"], instrument=data_id["instrument"] 

3462 ) 

3463 self.assertEqual( 

3464 set(query1.findDatasets(dataset_type.name, collections=["run1"])), 

3465 {ref1}, 

3466 ) 

3467 # Query for the dataset using queryDataIds(...).findDatasets(...) 

3468 # against both collections. This can only work if the FindFirstDataset 

3469 # operation can be commuted past the iteration-engine options into SQL. 

3470 query2 = registry.queryDataIds( 

3471 {"visit", "patch"}, visit=data_id["visit"], instrument=data_id["instrument"] 

3472 ) 

3473 self.assertEqual( 

3474 set(query2.findDatasets(dataset_type.name, collections=["run2", "run1"])), 

3475 {ref2}, 

3476 ) 

3477 

3478 def test_query_empty_collections(self) -> None: 

3479 """Test for registry query methods with empty collections. The methods 

3480 should return empty result set (or None when applicable) and provide 

3481 "doomed" diagnostics. 

3482 """ 

3483 registry = self.makeRegistry() 

3484 self.loadData(registry, "base.yaml") 

3485 self.loadData(registry, "datasets.yaml") 

3486 

3487 # Tests for registry.findDataset() 

3488 with self.assertRaises(NoDefaultCollectionError): 

3489 registry.findDataset("bias", instrument="Cam1", detector=1) 

3490 self.assertIsNotNone(registry.findDataset("bias", instrument="Cam1", detector=1, collections=...)) 

3491 self.assertIsNone(registry.findDataset("bias", instrument="Cam1", detector=1, collections=[])) 

3492 

3493 # Tests for registry.queryDatasets() 

3494 with self.assertRaises(NoDefaultCollectionError): 

3495 registry.queryDatasets("bias") 

3496 self.assertTrue(list(registry.queryDatasets("bias", collections=...))) 

3497 

3498 result = registry.queryDatasets("bias", collections=[]) 

3499 self.assertEqual(len(list(result)), 0) 

3500 messages = list(result.explain_no_results()) 

3501 self.assertTrue(messages) 

3502 self.assertTrue(any("because collection list is empty" in message for message in messages)) 

3503 

3504 # Tests for registry.queryDataIds() 

3505 with self.assertRaises(NoDefaultCollectionError): 

3506 registry.queryDataIds("detector", datasets="bias") 

3507 self.assertTrue(list(registry.queryDataIds("detector", datasets="bias", collections=...))) 

3508 

3509 result = registry.queryDataIds("detector", datasets="bias", collections=[]) 

3510 self.assertEqual(len(list(result)), 0) 

3511 messages = list(result.explain_no_results()) 

3512 self.assertTrue(messages) 

3513 self.assertTrue(any("because collection list is empty" in message for message in messages)) 

3514 

3515 # Tests for registry.queryDimensionRecords() 

3516 with self.assertRaises(NoDefaultCollectionError): 

3517 registry.queryDimensionRecords("detector", datasets="bias") 

3518 self.assertTrue(list(registry.queryDimensionRecords("detector", datasets="bias", collections=...))) 

3519 

3520 result = registry.queryDimensionRecords("detector", datasets="bias", collections=[]) 

3521 self.assertEqual(len(list(result)), 0) 

3522 messages = list(result.explain_no_results()) 

3523 self.assertTrue(messages) 

3524 self.assertTrue(any("because collection list is empty" in message for message in messages))