Coverage for python/lsst/daf/butler/registry/tests/_registry.py: 5%

1495 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-08-12 09:20 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = ["RegistryTests"] 

24 

25import itertools 

26import logging 

27import os 

28import re 

29import unittest 

30import uuid 

31from abc import ABC, abstractmethod 

32from collections import defaultdict, namedtuple 

33from collections.abc import Iterator 

34from datetime import datetime, timedelta 

35from typing import TYPE_CHECKING 

36 

37import astropy.time 

38import sqlalchemy 

39 

40try: 

41 import numpy as np 

42except ImportError: 

43 np = None 

44 

45import lsst.sphgeom 

46from lsst.daf.relation import Relation, RelationalAlgebraError, Transfer, iteration, sql 

47 

48from ...core import ( 

49 DataCoordinate, 

50 DataCoordinateSet, 

51 DatasetAssociation, 

52 DatasetIdFactory, 

53 DatasetIdGenEnum, 

54 DatasetRef, 

55 DatasetType, 

56 DimensionGraph, 

57 NamedValueSet, 

58 SkyPixDimension, 

59 StorageClass, 

60 Timespan, 

61 ddl, 

62) 

63from .._collection_summary import CollectionSummary 

64from .._collectionType import CollectionType 

65from .._config import RegistryConfig 

66from .._exceptions import ( 

67 ArgumentError, 

68 CollectionError, 

69 CollectionTypeError, 

70 ConflictingDefinitionError, 

71 DataIdValueError, 

72 DatasetTypeError, 

73 InconsistentDataIdError, 

74 MissingCollectionError, 

75 MissingDatasetTypeError, 

76 NoDefaultCollectionError, 

77 OrphanedRecordError, 

78) 

79from ..interfaces import ButlerAttributeExistsError 

80 

81if TYPE_CHECKING: 

82 from .._registry import Registry 

83 

84 

85class RegistryTests(ABC): 

86 """Generic tests for the `Registry` class that can be subclassed to 

87 generate tests for different configurations. 

88 """ 

89 

90 collectionsManager: str | None = None 

91 """Name of the collections manager class, if subclass provides value for 

92 this member then it overrides name specified in default configuration 

93 (`str`). 

94 """ 

95 

96 datasetsManager: str | dict[str, str] | None = None 

97 """Name or configuration dictionary of the datasets manager class, if 

98 subclass provides value for this member then it overrides name specified 

99 in default configuration (`str` or `dict`). 

100 """ 

101 

102 @classmethod 

103 @abstractmethod 

104 def getDataDir(cls) -> str: 

105 """Return the root directory containing test data YAML files.""" 

106 raise NotImplementedError() 

107 

108 def makeRegistryConfig(self) -> RegistryConfig: 

109 """Create RegistryConfig used to create a registry. 

110 

111 This method should be called by a subclass from `makeRegistry`. 

112 Returned instance will be pre-configured based on the values of class 

113 members, and default-configured for all other parameters. Subclasses 

114 that need default configuration should just instantiate 

115 `RegistryConfig` directly. 

116 """ 

117 config = RegistryConfig() 

118 if self.collectionsManager: 

119 config["managers", "collections"] = self.collectionsManager 

120 if self.datasetsManager: 

121 config["managers", "datasets"] = self.datasetsManager 

122 return config 

123 

124 @abstractmethod 

125 def makeRegistry(self, share_repo_with: Registry | None = None) -> Registry | None: 

126 """Return the Registry instance to be tested. 

127 

128 Parameters 

129 ---------- 

130 share_repo_with : `Registry`, optional 

131 If provided, the new registry should point to the same data 

132 repository as this existing registry. 

133 

134 Returns 

135 ------- 

136 registry : `Registry` 

137 New `Registry` instance, or `None` *only* if `share_repo_with` is 

138 not `None` and this test case does not support that argument 

139 (e.g. it is impossible with in-memory SQLite DBs). 

140 """ 

141 raise NotImplementedError() 

142 

143 def loadData(self, registry: Registry, filename: str): 

144 """Load registry test data from ``getDataDir/<filename>``, 

145 which should be a YAML import/export file. 

146 """ 

147 from ...transfers import YamlRepoImportBackend 

148 

149 with open(os.path.join(self.getDataDir(), filename)) as stream: 

150 backend = YamlRepoImportBackend(stream, registry) 

151 backend.register() 

152 backend.load(datastore=None) 

153 

154 def checkQueryResults(self, results, expected): 

155 """Check that a query results object contains expected values. 

156 

157 Parameters 

158 ---------- 

159 results : `DataCoordinateQueryResults` or `DatasetQueryResults` 

160 A lazy-evaluation query results object. 

161 expected : `list` 

162 A list of `DataCoordinate` o `DatasetRef` objects that should be 

163 equal to results of the query, aside from ordering. 

164 """ 

165 self.assertCountEqual(list(results), expected) 

166 self.assertEqual(results.count(), len(expected)) 

167 if expected: 

168 self.assertTrue(results.any()) 

169 else: 

170 self.assertFalse(results.any()) 

171 

172 def testOpaque(self): 

173 """Tests for `Registry.registerOpaqueTable`, 

174 `Registry.insertOpaqueData`, `Registry.fetchOpaqueData`, and 

175 `Registry.deleteOpaqueData`. 

176 """ 

177 registry = self.makeRegistry() 

178 table = "opaque_table_for_testing" 

179 registry.registerOpaqueTable( 

180 table, 

181 spec=ddl.TableSpec( 

182 fields=[ 

183 ddl.FieldSpec("id", dtype=sqlalchemy.BigInteger, primaryKey=True), 

184 ddl.FieldSpec("name", dtype=sqlalchemy.String, length=16, nullable=False), 

185 ddl.FieldSpec("count", dtype=sqlalchemy.SmallInteger, nullable=True), 

186 ], 

187 ), 

188 ) 

189 rows = [ 

190 {"id": 1, "name": "one", "count": None}, 

191 {"id": 2, "name": "two", "count": 5}, 

192 {"id": 3, "name": "three", "count": 6}, 

193 ] 

194 registry.insertOpaqueData(table, *rows) 

195 self.assertCountEqual(rows, list(registry.fetchOpaqueData(table))) 

196 self.assertEqual(rows[0:1], list(registry.fetchOpaqueData(table, id=1))) 

197 self.assertEqual(rows[1:2], list(registry.fetchOpaqueData(table, name="two"))) 

198 self.assertEqual(rows[0:1], list(registry.fetchOpaqueData(table, id=(1, 3), name=("one", "two")))) 

199 self.assertEqual(rows, list(registry.fetchOpaqueData(table, id=(1, 2, 3)))) 

200 # Test very long IN clause which exceeds sqlite limit on number of 

201 # parameters. SQLite says the limit is 32k but it looks like it is 

202 # much higher. 

203 self.assertEqual(rows, list(registry.fetchOpaqueData(table, id=list(range(300_000))))) 

204 # Two IN clauses, each longer than 1k batch size, first with 

205 # duplicates, second has matching elements in different batches (after 

206 # sorting). 

207 self.assertEqual( 

208 rows[0:2], 

209 list( 

210 registry.fetchOpaqueData( 

211 table, 

212 id=list(range(1000)) + list(range(100, 0, -1)), 

213 name=["one"] + [f"q{i}" for i in range(2200)] + ["two"], 

214 ) 

215 ), 

216 ) 

217 self.assertEqual([], list(registry.fetchOpaqueData(table, id=1, name="two"))) 

218 registry.deleteOpaqueData(table, id=3) 

219 self.assertCountEqual(rows[:2], list(registry.fetchOpaqueData(table))) 

220 registry.deleteOpaqueData(table) 

221 self.assertEqual([], list(registry.fetchOpaqueData(table))) 

222 

223 def testDatasetType(self): 

224 """Tests for `Registry.registerDatasetType` and 

225 `Registry.getDatasetType`. 

226 """ 

227 registry = self.makeRegistry() 

228 # Check valid insert 

229 datasetTypeName = "test" 

230 storageClass = StorageClass("testDatasetType") 

231 registry.storageClasses.registerStorageClass(storageClass) 

232 dimensions = registry.dimensions.extract(("instrument", "visit")) 

233 differentDimensions = registry.dimensions.extract(("instrument", "patch")) 

234 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

235 # Inserting for the first time should return True 

236 self.assertTrue(registry.registerDatasetType(inDatasetType)) 

237 outDatasetType1 = registry.getDatasetType(datasetTypeName) 

238 self.assertEqual(outDatasetType1, inDatasetType) 

239 

240 # Re-inserting should work 

241 self.assertFalse(registry.registerDatasetType(inDatasetType)) 

242 # Except when they are not identical 

243 with self.assertRaises(ConflictingDefinitionError): 

244 nonIdenticalDatasetType = DatasetType(datasetTypeName, differentDimensions, storageClass) 

245 registry.registerDatasetType(nonIdenticalDatasetType) 

246 

247 # Template can be None 

248 datasetTypeName = "testNoneTemplate" 

249 storageClass = StorageClass("testDatasetType2") 

250 registry.storageClasses.registerStorageClass(storageClass) 

251 dimensions = registry.dimensions.extract(("instrument", "visit")) 

252 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

253 registry.registerDatasetType(inDatasetType) 

254 outDatasetType2 = registry.getDatasetType(datasetTypeName) 

255 self.assertEqual(outDatasetType2, inDatasetType) 

256 

257 allTypes = set(registry.queryDatasetTypes()) 

258 self.assertEqual(allTypes, {outDatasetType1, outDatasetType2}) 

259 

260 def testDimensions(self): 

261 """Tests for `Registry.insertDimensionData`, 

262 `Registry.syncDimensionData`, and `Registry.expandDataId`. 

263 """ 

264 registry = self.makeRegistry() 

265 dimensionName = "instrument" 

266 dimension = registry.dimensions[dimensionName] 

267 dimensionValue = { 

268 "name": "DummyCam", 

269 "visit_max": 10, 

270 "visit_system": 0, 

271 "exposure_max": 10, 

272 "detector_max": 2, 

273 "class_name": "lsst.pipe.base.Instrument", 

274 } 

275 registry.insertDimensionData(dimensionName, dimensionValue) 

276 # Inserting the same value twice should fail 

277 with self.assertRaises(sqlalchemy.exc.IntegrityError): 

278 registry.insertDimensionData(dimensionName, dimensionValue) 

279 # expandDataId should retrieve the record we just inserted 

280 self.assertEqual( 

281 registry.expandDataId(instrument="DummyCam", graph=dimension.graph) 

282 .records[dimensionName] 

283 .toDict(), 

284 dimensionValue, 

285 ) 

286 # expandDataId should raise if there is no record with the given ID. 

287 with self.assertRaises(DataIdValueError): 

288 registry.expandDataId({"instrument": "Unknown"}, graph=dimension.graph) 

289 # band doesn't have a table; insert should fail. 

290 with self.assertRaises(TypeError): 

291 registry.insertDimensionData("band", {"band": "i"}) 

292 dimensionName2 = "physical_filter" 

293 dimension2 = registry.dimensions[dimensionName2] 

294 dimensionValue2 = {"name": "DummyCam_i", "band": "i"} 

295 # Missing required dependency ("instrument") should fail 

296 with self.assertRaises(KeyError): 

297 registry.insertDimensionData(dimensionName2, dimensionValue2) 

298 # Adding required dependency should fix the failure 

299 dimensionValue2["instrument"] = "DummyCam" 

300 registry.insertDimensionData(dimensionName2, dimensionValue2) 

301 # expandDataId should retrieve the record we just inserted. 

302 self.assertEqual( 

303 registry.expandDataId(instrument="DummyCam", physical_filter="DummyCam_i", graph=dimension2.graph) 

304 .records[dimensionName2] 

305 .toDict(), 

306 dimensionValue2, 

307 ) 

308 # Use syncDimensionData to insert a new record successfully. 

309 dimensionName3 = "detector" 

310 dimensionValue3 = { 

311 "instrument": "DummyCam", 

312 "id": 1, 

313 "full_name": "one", 

314 "name_in_raft": "zero", 

315 "purpose": "SCIENCE", 

316 } 

317 self.assertTrue(registry.syncDimensionData(dimensionName3, dimensionValue3)) 

318 # Sync that again. Note that one field ("raft") is NULL, and that 

319 # should be okay. 

320 self.assertFalse(registry.syncDimensionData(dimensionName3, dimensionValue3)) 

321 # Now try that sync with the same primary key but a different value. 

322 # This should fail. 

323 with self.assertRaises(ConflictingDefinitionError): 

324 registry.syncDimensionData( 

325 dimensionName3, 

326 { 

327 "instrument": "DummyCam", 

328 "id": 1, 

329 "full_name": "one", 

330 "name_in_raft": "four", 

331 "purpose": "SCIENCE", 

332 }, 

333 ) 

334 

335 @unittest.skipIf(np is None, "numpy not available.") 

336 def testNumpyDataId(self): 

337 """Test that we can use a numpy int in a dataId.""" 

338 registry = self.makeRegistry() 

339 dimensionEntries = [ 

340 ("instrument", {"instrument": "DummyCam"}), 

341 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}), 

342 # Using an np.int64 here fails unless Records.fromDict is also 

343 # patched to look for numbers.Integral 

344 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}), 

345 ] 

346 for args in dimensionEntries: 

347 registry.insertDimensionData(*args) 

348 

349 # Try a normal integer and something that looks like an int but 

350 # is not. 

351 for visit_id in (42, np.int64(42)): 

352 with self.subTest(visit_id=visit_id, id_type=type(visit_id).__name__): 

353 expanded = registry.expandDataId({"instrument": "DummyCam", "visit": visit_id}) 

354 self.assertEqual(expanded["visit"], int(visit_id)) 

355 self.assertIsInstance(expanded["visit"], int) 

356 

357 def testDataIdRelationships(self): 

358 """Test that `Registry.expandDataId` raises an exception when the given 

359 keys are inconsistent. 

360 """ 

361 registry = self.makeRegistry() 

362 self.loadData(registry, "base.yaml") 

363 # Insert a few more dimension records for the next test. 

364 registry.insertDimensionData( 

365 "exposure", 

366 {"instrument": "Cam1", "id": 1, "obs_id": "one", "physical_filter": "Cam1-G"}, 

367 ) 

368 registry.insertDimensionData( 

369 "exposure", 

370 {"instrument": "Cam1", "id": 2, "obs_id": "two", "physical_filter": "Cam1-G"}, 

371 ) 

372 registry.insertDimensionData( 

373 "visit_system", 

374 {"instrument": "Cam1", "id": 0, "name": "one-to-one"}, 

375 ) 

376 registry.insertDimensionData( 

377 "visit", 

378 {"instrument": "Cam1", "id": 1, "name": "one", "physical_filter": "Cam1-G", "visit_system": 0}, 

379 ) 

380 registry.insertDimensionData( 

381 "visit_definition", 

382 {"instrument": "Cam1", "visit": 1, "exposure": 1, "visit_system": 0}, 

383 ) 

384 with self.assertRaises(InconsistentDataIdError): 

385 registry.expandDataId( 

386 {"instrument": "Cam1", "visit": 1, "exposure": 2}, 

387 ) 

388 

389 def testDataset(self): 

390 """Basic tests for `Registry.insertDatasets`, `Registry.getDataset`, 

391 and `Registry.removeDatasets`. 

392 """ 

393 registry = self.makeRegistry() 

394 self.loadData(registry, "base.yaml") 

395 run = "tésτ" 

396 registry.registerRun(run) 

397 datasetType = registry.getDatasetType("bias") 

398 dataId = {"instrument": "Cam1", "detector": 2} 

399 (ref,) = registry.insertDatasets(datasetType, dataIds=[dataId], run=run) 

400 outRef = registry.getDataset(ref.id) 

401 self.assertIsNotNone(ref.id) 

402 self.assertEqual(ref, outRef) 

403 with self.assertRaises(ConflictingDefinitionError): 

404 registry.insertDatasets(datasetType, dataIds=[dataId], run=run) 

405 registry.removeDatasets([ref]) 

406 self.assertIsNone(registry.findDataset(datasetType, dataId, collections=[run])) 

407 

408 def testFindDataset(self): 

409 """Tests for `Registry.findDataset`.""" 

410 registry = self.makeRegistry() 

411 self.loadData(registry, "base.yaml") 

412 run = "tésτ" 

413 datasetType = registry.getDatasetType("bias") 

414 dataId = {"instrument": "Cam1", "detector": 4} 

415 registry.registerRun(run) 

416 (inputRef,) = registry.insertDatasets(datasetType, dataIds=[dataId], run=run) 

417 outputRef = registry.findDataset(datasetType, dataId, collections=[run]) 

418 self.assertEqual(outputRef, inputRef) 

419 # Check that retrieval with invalid dataId raises 

420 with self.assertRaises(LookupError): 

421 dataId = {"instrument": "Cam1"} # no detector 

422 registry.findDataset(datasetType, dataId, collections=run) 

423 # Check that different dataIds match to different datasets 

424 dataId1 = {"instrument": "Cam1", "detector": 1} 

425 (inputRef1,) = registry.insertDatasets(datasetType, dataIds=[dataId1], run=run) 

426 dataId2 = {"instrument": "Cam1", "detector": 2} 

427 (inputRef2,) = registry.insertDatasets(datasetType, dataIds=[dataId2], run=run) 

428 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef1) 

429 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef2) 

430 self.assertNotEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef2) 

431 self.assertNotEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef1) 

432 # Check that requesting a non-existing dataId returns None 

433 nonExistingDataId = {"instrument": "Cam1", "detector": 3} 

434 self.assertIsNone(registry.findDataset(datasetType, nonExistingDataId, collections=run)) 

435 # Search more than one collection, in which two have the right 

436 # dataset type and another does not. 

437 registry.registerRun("empty") 

438 self.loadData(registry, "datasets-uuid.yaml") 

439 bias1 = registry.findDataset("bias", instrument="Cam1", detector=2, collections=["imported_g"]) 

440 self.assertIsNotNone(bias1) 

441 bias2 = registry.findDataset("bias", instrument="Cam1", detector=2, collections=["imported_r"]) 

442 self.assertIsNotNone(bias2) 

443 self.assertEqual( 

444 bias1, 

445 registry.findDataset( 

446 "bias", instrument="Cam1", detector=2, collections=["empty", "imported_g", "imported_r"] 

447 ), 

448 ) 

449 self.assertEqual( 

450 bias2, 

451 registry.findDataset( 

452 "bias", instrument="Cam1", detector=2, collections=["empty", "imported_r", "imported_g"] 

453 ), 

454 ) 

455 # Search more than one collection, with one of them a CALIBRATION 

456 # collection. 

457 registry.registerCollection("Cam1/calib", CollectionType.CALIBRATION) 

458 timespan = Timespan( 

459 begin=astropy.time.Time("2020-01-01T01:00:00", format="isot", scale="tai"), 

460 end=astropy.time.Time("2020-01-01T02:00:00", format="isot", scale="tai"), 

461 ) 

462 registry.certify("Cam1/calib", [bias2], timespan=timespan) 

463 self.assertEqual( 

464 bias1, 

465 registry.findDataset( 

466 "bias", 

467 instrument="Cam1", 

468 detector=2, 

469 collections=["empty", "imported_g", "Cam1/calib"], 

470 timespan=timespan, 

471 ), 

472 ) 

473 self.assertEqual( 

474 bias2, 

475 registry.findDataset( 

476 "bias", 

477 instrument="Cam1", 

478 detector=2, 

479 collections=["empty", "Cam1/calib", "imported_g"], 

480 timespan=timespan, 

481 ), 

482 ) 

483 # If we try to search those same collections without a timespan, it 

484 # should still work, since the CALIBRATION collection is ignored. 

485 self.assertEqual( 

486 bias1, 

487 registry.findDataset( 

488 "bias", instrument="Cam1", detector=2, collections=["empty", "imported_g", "Cam1/calib"] 

489 ), 

490 ) 

491 self.assertEqual( 

492 bias1, 

493 registry.findDataset( 

494 "bias", instrument="Cam1", detector=2, collections=["empty", "Cam1/calib", "imported_g"] 

495 ), 

496 ) 

497 

498 def testRemoveDatasetTypeSuccess(self): 

499 """Test that Registry.removeDatasetType works when there are no 

500 datasets of that type present. 

501 """ 

502 registry = self.makeRegistry() 

503 self.loadData(registry, "base.yaml") 

504 registry.removeDatasetType("flat") 

505 with self.assertRaises(MissingDatasetTypeError): 

506 registry.getDatasetType("flat") 

507 

508 def testRemoveDatasetTypeFailure(self): 

509 """Test that Registry.removeDatasetType raises when there are datasets 

510 of that type present or if the dataset type is for a component. 

511 """ 

512 registry = self.makeRegistry() 

513 self.loadData(registry, "base.yaml") 

514 self.loadData(registry, "datasets.yaml") 

515 with self.assertRaises(OrphanedRecordError): 

516 registry.removeDatasetType("flat") 

517 with self.assertRaises(ValueError): 

518 registry.removeDatasetType(DatasetType.nameWithComponent("flat", "image")) 

519 

520 def testImportDatasetsUUID(self): 

521 """Test for `Registry._importDatasets` with UUID dataset ID.""" 

522 if isinstance(self.datasetsManager, str): 

523 if not self.datasetsManager.endswith(".ByDimensionsDatasetRecordStorageManagerUUID"): 

524 self.skipTest(f"Unexpected dataset manager {self.datasetsManager}") 

525 elif isinstance(self.datasetsManager, dict) and not self.datasetsManager["cls"].endswith( 

526 ".ByDimensionsDatasetRecordStorageManagerUUID" 

527 ): 

528 self.skipTest(f"Unexpected dataset manager {self.datasetsManager['cls']}") 

529 

530 registry = self.makeRegistry() 

531 self.loadData(registry, "base.yaml") 

532 for run in range(6): 

533 registry.registerRun(f"run{run}") 

534 datasetTypeBias = registry.getDatasetType("bias") 

535 datasetTypeFlat = registry.getDatasetType("flat") 

536 dataIdBias1 = {"instrument": "Cam1", "detector": 1} 

537 dataIdBias2 = {"instrument": "Cam1", "detector": 2} 

538 dataIdFlat1 = {"instrument": "Cam1", "detector": 1, "physical_filter": "Cam1-G", "band": "g"} 

539 

540 ref = DatasetRef(datasetTypeBias, dataIdBias1, run="run0") 

541 (ref1,) = registry._importDatasets([ref]) 

542 # UUID is used without change 

543 self.assertEqual(ref.id, ref1.id) 

544 

545 # All different failure modes 

546 refs = ( 

547 # Importing same DatasetRef with different dataset ID is an error 

548 DatasetRef(datasetTypeBias, dataIdBias1, run="run0"), 

549 # Same DatasetId but different DataId 

550 DatasetRef(datasetTypeBias, dataIdBias2, id=ref1.id, run="run0"), 

551 DatasetRef(datasetTypeFlat, dataIdFlat1, id=ref1.id, run="run0"), 

552 # Same DatasetRef and DatasetId but different run 

553 DatasetRef(datasetTypeBias, dataIdBias1, id=ref1.id, run="run1"), 

554 ) 

555 for ref in refs: 

556 with self.assertRaises(ConflictingDefinitionError): 

557 registry._importDatasets([ref]) 

558 

559 # Test for non-unique IDs, they can be re-imported multiple times. 

560 for run, idGenMode in ((2, DatasetIdGenEnum.DATAID_TYPE), (4, DatasetIdGenEnum.DATAID_TYPE_RUN)): 

561 with self.subTest(idGenMode=idGenMode): 

562 # Make dataset ref with reproducible dataset ID. 

563 ref = DatasetRef(datasetTypeBias, dataIdBias1, run=f"run{run}", id_generation_mode=idGenMode) 

564 (ref1,) = registry._importDatasets([ref]) 

565 self.assertIsInstance(ref1.id, uuid.UUID) 

566 self.assertEqual(ref1.id.version, 5) 

567 self.assertEqual(ref1.id, ref.id) 

568 

569 # Importing it again is OK 

570 (ref2,) = registry._importDatasets([ref1]) 

571 self.assertEqual(ref2.id, ref1.id) 

572 

573 # Cannot import to different run with the same ID 

574 ref = DatasetRef(datasetTypeBias, dataIdBias1, id=ref1.id, run=f"run{run+1}") 

575 with self.assertRaises(ConflictingDefinitionError): 

576 registry._importDatasets([ref]) 

577 

578 ref = DatasetRef( 

579 datasetTypeBias, dataIdBias1, run=f"run{run+1}", id_generation_mode=idGenMode 

580 ) 

581 if idGenMode is DatasetIdGenEnum.DATAID_TYPE: 

582 # Cannot import same DATAID_TYPE ref into a new run 

583 with self.assertRaises(ConflictingDefinitionError): 

584 (ref2,) = registry._importDatasets([ref]) 

585 else: 

586 # DATAID_TYPE_RUN ref can be imported into a new run 

587 (ref2,) = registry._importDatasets([ref]) 

588 

589 def testDatasetTypeComponentQueries(self): 

590 """Test component options when querying for dataset types. 

591 

592 All of the behavior here is deprecated, so many of these tests are 

593 currently wrapped in a context to check that we get a warning whenever 

594 a component dataset is actually returned. 

595 """ 

596 registry = self.makeRegistry() 

597 self.loadData(registry, "base.yaml") 

598 self.loadData(registry, "datasets.yaml") 

599 # Test querying for dataset types with different inputs. 

600 # First query for all dataset types; components should only be included 

601 # when components=True. 

602 self.assertEqual({"bias", "flat"}, NamedValueSet(registry.queryDatasetTypes()).names) 

603 self.assertEqual({"bias", "flat"}, NamedValueSet(registry.queryDatasetTypes(components=False)).names) 

604 with self.assertWarns(FutureWarning): 

605 self.assertLess( 

606 {"bias", "flat", "bias.wcs", "flat.photoCalib"}, 

607 NamedValueSet(registry.queryDatasetTypes(components=True)).names, 

608 ) 

609 # Use a pattern that can match either parent or components. Again, 

610 # components are only returned if components=True. 

611 self.assertEqual({"bias"}, NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"))).names) 

612 self.assertEqual( 

613 {"bias"}, NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"), components=False)).names 

614 ) 

615 with self.assertWarns(FutureWarning): 

616 self.assertLess( 

617 {"bias", "bias.wcs"}, 

618 NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"), components=True)).names, 

619 ) 

620 # This pattern matches only a component. In this case we also return 

621 # that component dataset type if components=None. 

622 with self.assertWarns(FutureWarning): 

623 self.assertEqual( 

624 {"bias.wcs"}, NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"))).names 

625 ) 

626 self.assertEqual( 

627 set(), 

628 NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"), components=False)).names, 

629 ) 

630 with self.assertWarns(FutureWarning): 

631 self.assertEqual( 

632 {"bias.wcs"}, 

633 NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"), components=True)).names, 

634 ) 

635 # Add a dataset type using a StorageClass that we'll then remove; check 

636 # that this does not affect our ability to query for dataset types 

637 # (though it will warn). 

638 tempStorageClass = StorageClass( 

639 name="TempStorageClass", 

640 components={ 

641 "data1": registry.storageClasses.getStorageClass("StructuredDataDict"), 

642 "data2": registry.storageClasses.getStorageClass("StructuredDataDict"), 

643 }, 

644 ) 

645 registry.storageClasses.registerStorageClass(tempStorageClass) 

646 datasetType = DatasetType( 

647 "temporary", 

648 dimensions=["instrument"], 

649 storageClass=tempStorageClass, 

650 universe=registry.dimensions, 

651 ) 

652 registry.registerDatasetType(datasetType) 

653 registry.storageClasses._unregisterStorageClass(tempStorageClass.name) 

654 datasetType._storageClass = None 

655 del tempStorageClass 

656 # Querying for all dataset types, including components, should include 

657 # at least all non-component dataset types (and I don't want to 

658 # enumerate all of the Exposure components for bias and flat here). 

659 with self.assertWarns(FutureWarning): 

660 with self.assertLogs("lsst.daf.butler.registry", logging.WARN) as cm: 

661 everything = NamedValueSet(registry.queryDatasetTypes(components=True)) 

662 self.assertIn("TempStorageClass", cm.output[0]) 

663 self.assertLess({"bias", "flat", "temporary"}, everything.names) 

664 # It should not include "temporary.columns", because we tried to remove 

665 # the storage class that would tell it about that. So if the next line 

666 # fails (i.e. "temporary.columns" _is_ in everything.names), it means 

667 # this part of the test isn't doing anything, because the _unregister 

668 # call about isn't simulating the real-life case we want it to 

669 # simulate, in which different versions of daf_butler in entirely 

670 # different Python processes interact with the same repo. 

671 self.assertNotIn("temporary.data", everything.names) 

672 # Query for dataset types that start with "temp". This should again 

673 # not include the component, and also not fail. 

674 with self.assertLogs("lsst.daf.butler.registry", logging.WARN) as cm: 

675 startsWithTemp = NamedValueSet(registry.queryDatasetTypes(re.compile("temp.*"), components=True)) 

676 self.assertIn("TempStorageClass", cm.output[0]) 

677 self.assertEqual({"temporary"}, startsWithTemp.names) 

678 # Querying with no components should not warn at all. 

679 with self.assertLogs("lsst.daf.butler.registries", logging.WARN) as cm: 

680 startsWithTemp = NamedValueSet(registry.queryDatasetTypes(re.compile("temp.*"), components=False)) 

681 # Must issue a warning of our own to be captured. 

682 logging.getLogger("lsst.daf.butler.registries").warning("test message") 

683 self.assertEqual(len(cm.output), 1) 

684 self.assertIn("test message", cm.output[0]) 

685 

686 def testComponentLookups(self): 

687 """Test searching for component datasets via their parents. 

688 

689 All of the behavior here is deprecated, so many of these tests are 

690 currently wrapped in a context to check that we get a warning whenever 

691 a component dataset is actually returned. 

692 """ 

693 registry = self.makeRegistry() 

694 self.loadData(registry, "base.yaml") 

695 self.loadData(registry, "datasets.yaml") 

696 # Test getting the child dataset type (which does still exist in the 

697 # Registry), and check for consistency with 

698 # DatasetRef.makeComponentRef. 

699 collection = "imported_g" 

700 parentType = registry.getDatasetType("bias") 

701 childType = registry.getDatasetType("bias.wcs") 

702 parentRefResolved = registry.findDataset( 

703 parentType, collections=collection, instrument="Cam1", detector=1 

704 ) 

705 self.assertIsInstance(parentRefResolved, DatasetRef) 

706 self.assertEqual(childType, parentRefResolved.makeComponentRef("wcs").datasetType) 

707 # Search for a single dataset with findDataset. 

708 childRef1 = registry.findDataset("bias.wcs", collections=collection, dataId=parentRefResolved.dataId) 

709 self.assertEqual(childRef1, parentRefResolved.makeComponentRef("wcs")) 

710 # Search for detector data IDs constrained by component dataset 

711 # existence with queryDataIds. 

712 with self.assertWarns(FutureWarning): 

713 dataIds = registry.queryDataIds( 

714 ["detector"], 

715 datasets=["bias.wcs"], 

716 collections=collection, 

717 ).toSet() 

718 self.assertEqual( 

719 dataIds, 

720 DataCoordinateSet( 

721 { 

722 DataCoordinate.standardize(instrument="Cam1", detector=d, graph=parentType.dimensions) 

723 for d in (1, 2, 3) 

724 }, 

725 parentType.dimensions, 

726 ), 

727 ) 

728 # Search for multiple datasets of a single type with queryDatasets. 

729 with self.assertWarns(FutureWarning): 

730 childRefs2 = set( 

731 registry.queryDatasets( 

732 "bias.wcs", 

733 collections=collection, 

734 ) 

735 ) 

736 self.assertEqual({ref.datasetType for ref in childRefs2}, {childType}) 

737 self.assertEqual({ref.dataId for ref in childRefs2}, set(dataIds)) 

738 

739 def testCollections(self): 

740 """Tests for registry methods that manage collections.""" 

741 registry = self.makeRegistry() 

742 other_registry = self.makeRegistry(share_repo_with=registry) 

743 self.loadData(registry, "base.yaml") 

744 self.loadData(registry, "datasets.yaml") 

745 run1 = "imported_g" 

746 run2 = "imported_r" 

747 # Test setting a collection docstring after it has been created. 

748 registry.setCollectionDocumentation(run1, "doc for run1") 

749 self.assertEqual(registry.getCollectionDocumentation(run1), "doc for run1") 

750 registry.setCollectionDocumentation(run1, None) 

751 self.assertIsNone(registry.getCollectionDocumentation(run1)) 

752 datasetType = "bias" 

753 # Find some datasets via their run's collection. 

754 dataId1 = {"instrument": "Cam1", "detector": 1} 

755 ref1 = registry.findDataset(datasetType, dataId1, collections=run1) 

756 self.assertIsNotNone(ref1) 

757 dataId2 = {"instrument": "Cam1", "detector": 2} 

758 ref2 = registry.findDataset(datasetType, dataId2, collections=run1) 

759 self.assertIsNotNone(ref2) 

760 # Associate those into a new collection, then look for them there. 

761 tag1 = "tag1" 

762 registry.registerCollection(tag1, type=CollectionType.TAGGED, doc="doc for tag1") 

763 # Check that we can query for old and new collections by type. 

764 self.assertEqual(set(registry.queryCollections(collectionTypes=CollectionType.RUN)), {run1, run2}) 

765 self.assertEqual( 

766 set(registry.queryCollections(collectionTypes={CollectionType.TAGGED, CollectionType.RUN})), 

767 {tag1, run1, run2}, 

768 ) 

769 self.assertEqual(registry.getCollectionDocumentation(tag1), "doc for tag1") 

770 registry.associate(tag1, [ref1, ref2]) 

771 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1) 

772 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

773 # Disassociate one and verify that we can't it there anymore... 

774 registry.disassociate(tag1, [ref1]) 

775 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=tag1)) 

776 # ...but we can still find ref2 in tag1, and ref1 in the run. 

777 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run1), ref1) 

778 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

779 collections = set(registry.queryCollections()) 

780 self.assertEqual(collections, {run1, run2, tag1}) 

781 # Associate both refs into tag1 again; ref2 is already there, but that 

782 # should be a harmless no-op. 

783 registry.associate(tag1, [ref1, ref2]) 

784 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1) 

785 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

786 # Get a different dataset (from a different run) that has the same 

787 # dataset type and data ID as ref2. 

788 ref2b = registry.findDataset(datasetType, dataId2, collections=run2) 

789 self.assertNotEqual(ref2, ref2b) 

790 # Attempting to associate that into tag1 should be an error. 

791 with self.assertRaises(ConflictingDefinitionError): 

792 registry.associate(tag1, [ref2b]) 

793 # That error shouldn't have messed up what we had before. 

794 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1) 

795 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

796 # Attempt to associate the conflicting dataset again, this time with 

797 # a dataset that isn't in the collection and won't cause a conflict. 

798 # Should also fail without modifying anything. 

799 dataId3 = {"instrument": "Cam1", "detector": 3} 

800 ref3 = registry.findDataset(datasetType, dataId3, collections=run1) 

801 with self.assertRaises(ConflictingDefinitionError): 

802 registry.associate(tag1, [ref3, ref2b]) 

803 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1) 

804 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

805 self.assertIsNone(registry.findDataset(datasetType, dataId3, collections=tag1)) 

806 # Register a chained collection that searches [tag1, run2] 

807 chain1 = "chain1" 

808 registry.registerCollection(chain1, type=CollectionType.CHAINED) 

809 self.assertIs(registry.getCollectionType(chain1), CollectionType.CHAINED) 

810 # Chained collection exists, but has no collections in it. 

811 self.assertFalse(registry.getCollectionChain(chain1)) 

812 # If we query for all collections, we should get the chained collection 

813 # only if we don't ask to flatten it (i.e. yield only its children). 

814 self.assertEqual(set(registry.queryCollections(flattenChains=False)), {tag1, run1, run2, chain1}) 

815 self.assertEqual(set(registry.queryCollections(flattenChains=True)), {tag1, run1, run2}) 

816 # Attempt to set its child collections to something circular; that 

817 # should fail. 

818 with self.assertRaises(ValueError): 

819 registry.setCollectionChain(chain1, [tag1, chain1]) 

820 # Add the child collections. 

821 registry.setCollectionChain(chain1, [tag1, run2]) 

822 self.assertEqual(list(registry.getCollectionChain(chain1)), [tag1, run2]) 

823 self.assertEqual(registry.getCollectionParentChains(tag1), {chain1}) 

824 self.assertEqual(registry.getCollectionParentChains(run2), {chain1}) 

825 # Refresh the other registry that points to the same repo, and make 

826 # sure it can see the things we've done (note that this does require 

827 # an explicit refresh(); that's the documented behavior, because 

828 # caching is ~impossible otherwise). 

829 if other_registry is not None: 

830 other_registry.refresh() 

831 self.assertEqual(list(other_registry.getCollectionChain(chain1)), [tag1, run2]) 

832 self.assertEqual(other_registry.getCollectionParentChains(tag1), {chain1}) 

833 self.assertEqual(other_registry.getCollectionParentChains(run2), {chain1}) 

834 # Searching for dataId1 or dataId2 in the chain should return ref1 and 

835 # ref2, because both are in tag1. 

836 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain1), ref1) 

837 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain1), ref2) 

838 # Now disassociate ref2 from tag1. The search (for bias) with 

839 # dataId2 in chain1 should then: 

840 # 1. not find it in tag1 

841 # 2. find a different dataset in run2 

842 registry.disassociate(tag1, [ref2]) 

843 ref2b = registry.findDataset(datasetType, dataId2, collections=chain1) 

844 self.assertNotEqual(ref2b, ref2) 

845 self.assertEqual(ref2b, registry.findDataset(datasetType, dataId2, collections=run2)) 

846 # Define a new chain so we can test recursive chains. 

847 chain2 = "chain2" 

848 registry.registerCollection(chain2, type=CollectionType.CHAINED) 

849 registry.setCollectionChain(chain2, [run2, chain1]) 

850 self.assertEqual(registry.getCollectionParentChains(chain1), {chain2}) 

851 self.assertEqual(registry.getCollectionParentChains(run2), {chain1, chain2}) 

852 # Query for collections matching a regex. 

853 self.assertCountEqual( 

854 list(registry.queryCollections(re.compile("imported_."), flattenChains=False)), 

855 ["imported_r", "imported_g"], 

856 ) 

857 # Query for collections matching a regex or an explicit str. 

858 self.assertCountEqual( 

859 list(registry.queryCollections([re.compile("imported_."), "chain1"], flattenChains=False)), 

860 ["imported_r", "imported_g", "chain1"], 

861 ) 

862 # Search for bias with dataId1 should find it via tag1 in chain2, 

863 # recursing, because is not in run1. 

864 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=run2)) 

865 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain2), ref1) 

866 # Search for bias with dataId2 should find it in run2 (ref2b). 

867 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain2), ref2b) 

868 # Search for a flat that is in run2. That should not be found 

869 # at the front of chain2, because of the restriction to bias 

870 # on run2 there, but it should be found in at the end of chain1. 

871 dataId4 = {"instrument": "Cam1", "detector": 3, "physical_filter": "Cam1-R2"} 

872 ref4 = registry.findDataset("flat", dataId4, collections=run2) 

873 self.assertIsNotNone(ref4) 

874 self.assertEqual(ref4, registry.findDataset("flat", dataId4, collections=chain2)) 

875 # Deleting a collection that's part of a CHAINED collection is not 

876 # allowed, and is exception-safe. 

877 with self.assertRaises(sqlalchemy.exc.IntegrityError): 

878 registry.removeCollection(run2) 

879 self.assertEqual(registry.getCollectionType(run2), CollectionType.RUN) 

880 with self.assertRaises(sqlalchemy.exc.IntegrityError): 

881 registry.removeCollection(chain1) 

882 self.assertEqual(registry.getCollectionType(chain1), CollectionType.CHAINED) 

883 # Actually remove chain2, test that it's gone by asking for its type. 

884 registry.removeCollection(chain2) 

885 with self.assertRaises(MissingCollectionError): 

886 registry.getCollectionType(chain2) 

887 # Actually remove run2 and chain1, which should work now. 

888 registry.removeCollection(chain1) 

889 registry.removeCollection(run2) 

890 with self.assertRaises(MissingCollectionError): 

891 registry.getCollectionType(run2) 

892 with self.assertRaises(MissingCollectionError): 

893 registry.getCollectionType(chain1) 

894 # Remove tag1 as well, just to test that we can remove TAGGED 

895 # collections. 

896 registry.removeCollection(tag1) 

897 with self.assertRaises(MissingCollectionError): 

898 registry.getCollectionType(tag1) 

899 

900 def testCollectionChainFlatten(self): 

901 """Test that Registry.setCollectionChain obeys its 'flatten' option.""" 

902 registry = self.makeRegistry() 

903 registry.registerCollection("inner", CollectionType.CHAINED) 

904 registry.registerCollection("innermost", CollectionType.RUN) 

905 registry.setCollectionChain("inner", ["innermost"]) 

906 registry.registerCollection("outer", CollectionType.CHAINED) 

907 registry.setCollectionChain("outer", ["inner"], flatten=False) 

908 self.assertEqual(list(registry.getCollectionChain("outer")), ["inner"]) 

909 registry.setCollectionChain("outer", ["inner"], flatten=True) 

910 self.assertEqual(list(registry.getCollectionChain("outer")), ["innermost"]) 

911 

912 def testBasicTransaction(self): 

913 """Test that all operations within a single transaction block are 

914 rolled back if an exception propagates out of the block. 

915 """ 

916 registry = self.makeRegistry() 

917 storageClass = StorageClass("testDatasetType") 

918 registry.storageClasses.registerStorageClass(storageClass) 

919 with registry.transaction(): 

920 registry.insertDimensionData("instrument", {"name": "Cam1", "class_name": "A"}) 

921 with self.assertRaises(ValueError): 

922 with registry.transaction(): 

923 registry.insertDimensionData("instrument", {"name": "Cam2"}) 

924 raise ValueError("Oops, something went wrong") 

925 # Cam1 should exist 

926 self.assertEqual(registry.expandDataId(instrument="Cam1").records["instrument"].class_name, "A") 

927 # But Cam2 and Cam3 should both not exist 

928 with self.assertRaises(DataIdValueError): 

929 registry.expandDataId(instrument="Cam2") 

930 with self.assertRaises(DataIdValueError): 

931 registry.expandDataId(instrument="Cam3") 

932 

933 def testNestedTransaction(self): 

934 """Test that operations within a transaction block are not rolled back 

935 if an exception propagates out of an inner transaction block and is 

936 then caught. 

937 """ 

938 registry = self.makeRegistry() 

939 dimension = registry.dimensions["instrument"] 

940 dataId1 = {"instrument": "DummyCam"} 

941 dataId2 = {"instrument": "DummyCam2"} 

942 checkpointReached = False 

943 with registry.transaction(): 

944 # This should be added and (ultimately) committed. 

945 registry.insertDimensionData(dimension, dataId1) 

946 with self.assertRaises(sqlalchemy.exc.IntegrityError): 

947 with registry.transaction(savepoint=True): 

948 # This does not conflict, and should succeed (but not 

949 # be committed). 

950 registry.insertDimensionData(dimension, dataId2) 

951 checkpointReached = True 

952 # This should conflict and raise, triggerring a rollback 

953 # of the previous insertion within the same transaction 

954 # context, but not the original insertion in the outer 

955 # block. 

956 registry.insertDimensionData(dimension, dataId1) 

957 self.assertTrue(checkpointReached) 

958 self.assertIsNotNone(registry.expandDataId(dataId1, graph=dimension.graph)) 

959 with self.assertRaises(DataIdValueError): 

960 registry.expandDataId(dataId2, graph=dimension.graph) 

961 

962 def testInstrumentDimensions(self): 

963 """Test queries involving only instrument dimensions, with no joins to 

964 skymap. 

965 """ 

966 registry = self.makeRegistry() 

967 

968 # need a bunch of dimensions and datasets for test 

969 registry.insertDimensionData( 

970 "instrument", dict(name="DummyCam", visit_max=25, exposure_max=300, detector_max=6) 

971 ) 

972 registry.insertDimensionData( 

973 "physical_filter", 

974 dict(instrument="DummyCam", name="dummy_r", band="r"), 

975 dict(instrument="DummyCam", name="dummy_i", band="i"), 

976 ) 

977 registry.insertDimensionData( 

978 "detector", *[dict(instrument="DummyCam", id=i, full_name=str(i)) for i in range(1, 6)] 

979 ) 

980 registry.insertDimensionData( 

981 "visit_system", 

982 dict(instrument="DummyCam", id=1, name="default"), 

983 ) 

984 registry.insertDimensionData( 

985 "visit", 

986 dict(instrument="DummyCam", id=10, name="ten", physical_filter="dummy_i", visit_system=1), 

987 dict(instrument="DummyCam", id=11, name="eleven", physical_filter="dummy_r", visit_system=1), 

988 dict(instrument="DummyCam", id=20, name="twelve", physical_filter="dummy_r", visit_system=1), 

989 ) 

990 for i in range(1, 6): 

991 registry.insertDimensionData( 

992 "visit_detector_region", 

993 dict(instrument="DummyCam", visit=10, detector=i), 

994 dict(instrument="DummyCam", visit=11, detector=i), 

995 dict(instrument="DummyCam", visit=20, detector=i), 

996 ) 

997 registry.insertDimensionData( 

998 "exposure", 

999 dict(instrument="DummyCam", id=100, obs_id="100", physical_filter="dummy_i"), 

1000 dict(instrument="DummyCam", id=101, obs_id="101", physical_filter="dummy_i"), 

1001 dict(instrument="DummyCam", id=110, obs_id="110", physical_filter="dummy_r"), 

1002 dict(instrument="DummyCam", id=111, obs_id="111", physical_filter="dummy_r"), 

1003 dict(instrument="DummyCam", id=200, obs_id="200", physical_filter="dummy_r"), 

1004 dict(instrument="DummyCam", id=201, obs_id="201", physical_filter="dummy_r"), 

1005 ) 

1006 registry.insertDimensionData( 

1007 "visit_definition", 

1008 dict(instrument="DummyCam", exposure=100, visit_system=1, visit=10), 

1009 dict(instrument="DummyCam", exposure=101, visit_system=1, visit=10), 

1010 dict(instrument="DummyCam", exposure=110, visit_system=1, visit=11), 

1011 dict(instrument="DummyCam", exposure=111, visit_system=1, visit=11), 

1012 dict(instrument="DummyCam", exposure=200, visit_system=1, visit=20), 

1013 dict(instrument="DummyCam", exposure=201, visit_system=1, visit=20), 

1014 ) 

1015 # dataset types 

1016 run1 = "test1_r" 

1017 run2 = "test2_r" 

1018 tagged2 = "test2_t" 

1019 registry.registerRun(run1) 

1020 registry.registerRun(run2) 

1021 registry.registerCollection(tagged2) 

1022 storageClass = StorageClass("testDataset") 

1023 registry.storageClasses.registerStorageClass(storageClass) 

1024 rawType = DatasetType( 

1025 name="RAW", 

1026 dimensions=registry.dimensions.extract(("instrument", "exposure", "detector")), 

1027 storageClass=storageClass, 

1028 ) 

1029 registry.registerDatasetType(rawType) 

1030 calexpType = DatasetType( 

1031 name="CALEXP", 

1032 dimensions=registry.dimensions.extract(("instrument", "visit", "detector")), 

1033 storageClass=storageClass, 

1034 ) 

1035 registry.registerDatasetType(calexpType) 

1036 

1037 # add pre-existing datasets 

1038 for exposure in (100, 101, 110, 111): 

1039 for detector in (1, 2, 3): 

1040 # note that only 3 of 5 detectors have datasets 

1041 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector) 

1042 (ref,) = registry.insertDatasets(rawType, dataIds=[dataId], run=run1) 

1043 # exposures 100 and 101 appear in both run1 and tagged2. 

1044 # 100 has different datasets in the different collections 

1045 # 101 has the same dataset in both collections. 

1046 if exposure == 100: 

1047 (ref,) = registry.insertDatasets(rawType, dataIds=[dataId], run=run2) 

1048 if exposure in (100, 101): 

1049 registry.associate(tagged2, [ref]) 

1050 # Add pre-existing datasets to tagged2. 

1051 for exposure in (200, 201): 

1052 for detector in (3, 4, 5): 

1053 # note that only 3 of 5 detectors have datasets 

1054 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector) 

1055 (ref,) = registry.insertDatasets(rawType, dataIds=[dataId], run=run2) 

1056 registry.associate(tagged2, [ref]) 

1057 

1058 dimensions = DimensionGraph( 

1059 registry.dimensions, dimensions=(rawType.dimensions.required | calexpType.dimensions.required) 

1060 ) 

1061 # Test that single dim string works as well as list of str 

1062 rows = registry.queryDataIds("visit", datasets=rawType, collections=run1).expanded().toSet() 

1063 rowsI = registry.queryDataIds(["visit"], datasets=rawType, collections=run1).expanded().toSet() 

1064 self.assertEqual(rows, rowsI) 

1065 # with empty expression 

1066 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1).expanded().toSet() 

1067 self.assertEqual(len(rows), 4 * 3) # 4 exposures times 3 detectors 

1068 self.assertCountEqual({dataId["exposure"] for dataId in rows}, (100, 101, 110, 111)) 

1069 self.assertCountEqual({dataId["visit"] for dataId in rows}, (10, 11)) 

1070 self.assertCountEqual({dataId["detector"] for dataId in rows}, (1, 2, 3)) 

1071 

1072 # second collection 

1073 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=tagged2).toSet() 

1074 self.assertEqual(len(rows), 4 * 3) # 4 exposures times 3 detectors 

1075 for dataId in rows: 

1076 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit")) 

1077 self.assertCountEqual({dataId["exposure"] for dataId in rows}, (100, 101, 200, 201)) 

1078 self.assertCountEqual({dataId["visit"] for dataId in rows}, (10, 20)) 

1079 self.assertCountEqual({dataId["detector"] for dataId in rows}, (1, 2, 3, 4, 5)) 

1080 

1081 # with two input datasets 

1082 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=[run1, tagged2]).toSet() 

1083 self.assertEqual(len(set(rows)), 6 * 3) # 6 exposures times 3 detectors; set needed to de-dupe 

1084 for dataId in rows: 

1085 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit")) 

1086 self.assertCountEqual({dataId["exposure"] for dataId in rows}, (100, 101, 110, 111, 200, 201)) 

1087 self.assertCountEqual({dataId["visit"] for dataId in rows}, (10, 11, 20)) 

1088 self.assertCountEqual({dataId["detector"] for dataId in rows}, (1, 2, 3, 4, 5)) 

1089 

1090 # limit to single visit 

1091 rows = registry.queryDataIds( 

1092 dimensions, datasets=rawType, collections=run1, where="visit = 10", instrument="DummyCam" 

1093 ).toSet() 

1094 self.assertEqual(len(rows), 2 * 3) # 2 exposures times 3 detectors 

1095 self.assertCountEqual({dataId["exposure"] for dataId in rows}, (100, 101)) 

1096 self.assertCountEqual({dataId["visit"] for dataId in rows}, (10,)) 

1097 self.assertCountEqual({dataId["detector"] for dataId in rows}, (1, 2, 3)) 

1098 

1099 # more limiting expression, using link names instead of Table.column 

1100 rows = registry.queryDataIds( 

1101 dimensions, 

1102 datasets=rawType, 

1103 collections=run1, 

1104 where="visit = 10 and detector > 1 and 'DummyCam'=instrument", 

1105 ).toSet() 

1106 self.assertEqual(len(rows), 2 * 2) # 2 exposures times 2 detectors 

1107 self.assertCountEqual({dataId["exposure"] for dataId in rows}, (100, 101)) 

1108 self.assertCountEqual({dataId["visit"] for dataId in rows}, (10,)) 

1109 self.assertCountEqual({dataId["detector"] for dataId in rows}, (2, 3)) 

1110 

1111 # queryDataIds with only one of `datasets` and `collections` is an 

1112 # error. 

1113 with self.assertRaises(CollectionError): 

1114 registry.queryDataIds(dimensions, datasets=rawType) 

1115 with self.assertRaises(ArgumentError): 

1116 registry.queryDataIds(dimensions, collections=run1) 

1117 

1118 # expression excludes everything 

1119 rows = registry.queryDataIds( 

1120 dimensions, datasets=rawType, collections=run1, where="visit > 1000", instrument="DummyCam" 

1121 ).toSet() 

1122 self.assertEqual(len(rows), 0) 

1123 

1124 # Selecting by physical_filter, this is not in the dimensions, but it 

1125 # is a part of the full expression so it should work too. 

1126 rows = registry.queryDataIds( 

1127 dimensions, 

1128 datasets=rawType, 

1129 collections=run1, 

1130 where="physical_filter = 'dummy_r'", 

1131 instrument="DummyCam", 

1132 ).toSet() 

1133 self.assertEqual(len(rows), 2 * 3) # 2 exposures times 3 detectors 

1134 self.assertCountEqual({dataId["exposure"] for dataId in rows}, (110, 111)) 

1135 self.assertCountEqual({dataId["visit"] for dataId in rows}, (11,)) 

1136 self.assertCountEqual({dataId["detector"] for dataId in rows}, (1, 2, 3)) 

1137 

1138 def testSkyMapDimensions(self): 

1139 """Tests involving only skymap dimensions, no joins to instrument.""" 

1140 registry = self.makeRegistry() 

1141 

1142 # need a bunch of dimensions and datasets for test, we want 

1143 # "band" in the test so also have to add physical_filter 

1144 # dimensions 

1145 registry.insertDimensionData("instrument", dict(instrument="DummyCam")) 

1146 registry.insertDimensionData( 

1147 "physical_filter", 

1148 dict(instrument="DummyCam", name="dummy_r", band="r"), 

1149 dict(instrument="DummyCam", name="dummy_i", band="i"), 

1150 ) 

1151 registry.insertDimensionData("skymap", dict(name="DummyMap", hash=b"sha!")) 

1152 for tract in range(10): 

1153 registry.insertDimensionData("tract", dict(skymap="DummyMap", id=tract)) 

1154 registry.insertDimensionData( 

1155 "patch", 

1156 *[dict(skymap="DummyMap", tract=tract, id=patch, cell_x=0, cell_y=0) for patch in range(10)], 

1157 ) 

1158 

1159 # dataset types 

1160 run = "tésτ" 

1161 registry.registerRun(run) 

1162 storageClass = StorageClass("testDataset") 

1163 registry.storageClasses.registerStorageClass(storageClass) 

1164 calexpType = DatasetType( 

1165 name="deepCoadd_calexp", 

1166 dimensions=registry.dimensions.extract(("skymap", "tract", "patch", "band")), 

1167 storageClass=storageClass, 

1168 ) 

1169 registry.registerDatasetType(calexpType) 

1170 mergeType = DatasetType( 

1171 name="deepCoadd_mergeDet", 

1172 dimensions=registry.dimensions.extract(("skymap", "tract", "patch")), 

1173 storageClass=storageClass, 

1174 ) 

1175 registry.registerDatasetType(mergeType) 

1176 measType = DatasetType( 

1177 name="deepCoadd_meas", 

1178 dimensions=registry.dimensions.extract(("skymap", "tract", "patch", "band")), 

1179 storageClass=storageClass, 

1180 ) 

1181 registry.registerDatasetType(measType) 

1182 

1183 dimensions = DimensionGraph( 

1184 registry.dimensions, 

1185 dimensions=( 

1186 calexpType.dimensions.required | mergeType.dimensions.required | measType.dimensions.required 

1187 ), 

1188 ) 

1189 

1190 # add pre-existing datasets 

1191 for tract in (1, 3, 5): 

1192 for patch in (2, 4, 6, 7): 

1193 dataId = dict(skymap="DummyMap", tract=tract, patch=patch) 

1194 registry.insertDatasets(mergeType, dataIds=[dataId], run=run) 

1195 for aFilter in ("i", "r"): 

1196 dataId = dict(skymap="DummyMap", tract=tract, patch=patch, band=aFilter) 

1197 registry.insertDatasets(calexpType, dataIds=[dataId], run=run) 

1198 

1199 # with empty expression 

1200 rows = registry.queryDataIds(dimensions, datasets=[calexpType, mergeType], collections=run).toSet() 

1201 self.assertEqual(len(rows), 3 * 4 * 2) # 4 tracts x 4 patches x 2 filters 

1202 for dataId in rows: 

1203 self.assertCountEqual(dataId.keys(), ("skymap", "tract", "patch", "band")) 

1204 self.assertCountEqual({dataId["tract"] for dataId in rows}, (1, 3, 5)) 

1205 self.assertCountEqual({dataId["patch"] for dataId in rows}, (2, 4, 6, 7)) 

1206 self.assertCountEqual({dataId["band"] for dataId in rows}, ("i", "r")) 

1207 

1208 # limit to 2 tracts and 2 patches 

1209 rows = registry.queryDataIds( 

1210 dimensions, 

1211 datasets=[calexpType, mergeType], 

1212 collections=run, 

1213 where="tract IN (1, 5) AND patch IN (2, 7)", 

1214 skymap="DummyMap", 

1215 ).toSet() 

1216 self.assertEqual(len(rows), 2 * 2 * 2) # 2 tracts x 2 patches x 2 filters 

1217 self.assertCountEqual({dataId["tract"] for dataId in rows}, (1, 5)) 

1218 self.assertCountEqual({dataId["patch"] for dataId in rows}, (2, 7)) 

1219 self.assertCountEqual({dataId["band"] for dataId in rows}, ("i", "r")) 

1220 

1221 # limit to single filter 

1222 rows = registry.queryDataIds( 

1223 dimensions, datasets=[calexpType, mergeType], collections=run, where="band = 'i'" 

1224 ).toSet() 

1225 self.assertEqual(len(rows), 3 * 4 * 1) # 4 tracts x 4 patches x 2 filters 

1226 self.assertCountEqual({dataId["tract"] for dataId in rows}, (1, 3, 5)) 

1227 self.assertCountEqual({dataId["patch"] for dataId in rows}, (2, 4, 6, 7)) 

1228 self.assertCountEqual({dataId["band"] for dataId in rows}, ("i",)) 

1229 

1230 # Specifying non-existing skymap is an exception 

1231 with self.assertRaisesRegex(DataIdValueError, "Unknown values specified for governor dimension"): 

1232 rows = registry.queryDataIds( 

1233 dimensions, datasets=[calexpType, mergeType], collections=run, where="skymap = 'Mars'" 

1234 ).toSet() 

1235 

1236 def testSpatialJoin(self): 

1237 """Test queries that involve spatial overlap joins.""" 

1238 registry = self.makeRegistry() 

1239 self.loadData(registry, "hsc-rc2-subset.yaml") 

1240 

1241 # Dictionary of spatial DatabaseDimensionElements, keyed by the name of 

1242 # the TopologicalFamily they belong to. We'll relate all elements in 

1243 # each family to all of the elements in each other family. 

1244 families = defaultdict(set) 

1245 # Dictionary of {element.name: {dataId: region}}. 

1246 regions = {} 

1247 for element in registry.dimensions.getDatabaseElements(): 

1248 if element.spatial is not None: 

1249 families[element.spatial.name].add(element) 

1250 regions[element.name] = { 

1251 record.dataId: record.region for record in registry.queryDimensionRecords(element) 

1252 } 

1253 

1254 # If this check fails, it's not necessarily a problem - it may just be 

1255 # a reasonable change to the default dimension definitions - but the 

1256 # test below depends on there being more than one family to do anything 

1257 # useful. 

1258 self.assertEqual(len(families), 2) 

1259 

1260 # Overlap DatabaseDimensionElements with each other. 

1261 for family1, family2 in itertools.combinations(families, 2): 

1262 for element1, element2 in itertools.product(families[family1], families[family2]): 

1263 graph = DimensionGraph.union(element1.graph, element2.graph) 

1264 # Construct expected set of overlapping data IDs via a 

1265 # brute-force comparison of the regions we've already fetched. 

1266 expected = { 

1267 DataCoordinate.standardize({**dataId1.byName(), **dataId2.byName()}, graph=graph) 

1268 for (dataId1, region1), (dataId2, region2) in itertools.product( 

1269 regions[element1.name].items(), regions[element2.name].items() 

1270 ) 

1271 if not region1.isDisjointFrom(region2) 

1272 } 

1273 self.assertGreater(len(expected), 2, msg="Test that we aren't just comparing empty sets.") 

1274 queried = set(registry.queryDataIds(graph)) 

1275 self.assertEqual(expected, queried) 

1276 

1277 # Overlap each DatabaseDimensionElement with the commonSkyPix system. 

1278 commonSkyPix = registry.dimensions.commonSkyPix 

1279 for elementName, these_regions in regions.items(): 

1280 graph = DimensionGraph.union(registry.dimensions[elementName].graph, commonSkyPix.graph) 

1281 expected = set() 

1282 for dataId, region in these_regions.items(): 

1283 for begin, end in commonSkyPix.pixelization.envelope(region): 

1284 expected.update( 

1285 DataCoordinate.standardize({commonSkyPix.name: index, **dataId.byName()}, graph=graph) 

1286 for index in range(begin, end) 

1287 ) 

1288 self.assertGreater(len(expected), 2, msg="Test that we aren't just comparing empty sets.") 

1289 queried = set(registry.queryDataIds(graph)) 

1290 self.assertEqual(expected, queried) 

1291 

1292 def testAbstractQuery(self): 

1293 """Test that we can run a query that just lists the known 

1294 bands. This is tricky because band is 

1295 backed by a query against physical_filter. 

1296 """ 

1297 registry = self.makeRegistry() 

1298 registry.insertDimensionData("instrument", dict(name="DummyCam")) 

1299 registry.insertDimensionData( 

1300 "physical_filter", 

1301 dict(instrument="DummyCam", name="dummy_i", band="i"), 

1302 dict(instrument="DummyCam", name="dummy_i2", band="i"), 

1303 dict(instrument="DummyCam", name="dummy_r", band="r"), 

1304 ) 

1305 rows = registry.queryDataIds(["band"]).toSet() 

1306 self.assertCountEqual( 

1307 rows, 

1308 [ 

1309 DataCoordinate.standardize(band="i", universe=registry.dimensions), 

1310 DataCoordinate.standardize(band="r", universe=registry.dimensions), 

1311 ], 

1312 ) 

1313 

1314 def testAttributeManager(self): 

1315 """Test basic functionality of attribute manager.""" 

1316 # number of attributes with schema versions in a fresh database, 

1317 # 6 managers with 2 records per manager, plus config for dimensions 

1318 VERSION_COUNT = 6 * 2 + 1 

1319 

1320 registry = self.makeRegistry() 

1321 attributes = registry._managers.attributes 

1322 

1323 # check what get() returns for non-existing key 

1324 self.assertIsNone(attributes.get("attr")) 

1325 self.assertEqual(attributes.get("attr", ""), "") 

1326 self.assertEqual(attributes.get("attr", "Value"), "Value") 

1327 self.assertEqual(len(list(attributes.items())), VERSION_COUNT) 

1328 

1329 # cannot store empty key or value 

1330 with self.assertRaises(ValueError): 

1331 attributes.set("", "value") 

1332 with self.assertRaises(ValueError): 

1333 attributes.set("attr", "") 

1334 

1335 # set value of non-existing key 

1336 attributes.set("attr", "value") 

1337 self.assertEqual(len(list(attributes.items())), VERSION_COUNT + 1) 

1338 self.assertEqual(attributes.get("attr"), "value") 

1339 

1340 # update value of existing key 

1341 with self.assertRaises(ButlerAttributeExistsError): 

1342 attributes.set("attr", "value2") 

1343 

1344 attributes.set("attr", "value2", force=True) 

1345 self.assertEqual(len(list(attributes.items())), VERSION_COUNT + 1) 

1346 self.assertEqual(attributes.get("attr"), "value2") 

1347 

1348 # delete existing key 

1349 self.assertTrue(attributes.delete("attr")) 

1350 self.assertEqual(len(list(attributes.items())), VERSION_COUNT) 

1351 

1352 # delete non-existing key 

1353 self.assertFalse(attributes.delete("non-attr")) 

1354 

1355 # store bunch of keys and get the list back 

1356 data = [ 

1357 ("version.core", "1.2.3"), 

1358 ("version.dimensions", "3.2.1"), 

1359 ("config.managers.opaque", "ByNameOpaqueTableStorageManager"), 

1360 ] 

1361 for key, value in data: 

1362 attributes.set(key, value) 

1363 items = dict(attributes.items()) 

1364 for key, value in data: 

1365 self.assertEqual(items[key], value) 

1366 

1367 def testQueryDatasetsDeduplication(self): 

1368 """Test that the findFirst option to queryDatasets selects datasets 

1369 from collections in the order given". 

1370 """ 

1371 registry = self.makeRegistry() 

1372 self.loadData(registry, "base.yaml") 

1373 self.loadData(registry, "datasets.yaml") 

1374 self.assertCountEqual( 

1375 list(registry.queryDatasets("bias", collections=["imported_g", "imported_r"])), 

1376 [ 

1377 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"), 

1378 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g"), 

1379 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g"), 

1380 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r"), 

1381 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r"), 

1382 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

1383 ], 

1384 ) 

1385 self.assertCountEqual( 

1386 list(registry.queryDatasets("bias", collections=["imported_g", "imported_r"], findFirst=True)), 

1387 [ 

1388 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"), 

1389 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g"), 

1390 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g"), 

1391 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

1392 ], 

1393 ) 

1394 self.assertCountEqual( 

1395 list(registry.queryDatasets("bias", collections=["imported_r", "imported_g"], findFirst=True)), 

1396 [ 

1397 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"), 

1398 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r"), 

1399 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r"), 

1400 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

1401 ], 

1402 ) 

1403 

1404 def testQueryResults(self): 

1405 """Test querying for data IDs and then manipulating the QueryResults 

1406 object returned to perform other queries. 

1407 """ 

1408 registry = self.makeRegistry() 

1409 self.loadData(registry, "base.yaml") 

1410 self.loadData(registry, "datasets.yaml") 

1411 bias = registry.getDatasetType("bias") 

1412 flat = registry.getDatasetType("flat") 

1413 # Obtain expected results from methods other than those we're testing 

1414 # here. That includes: 

1415 # - the dimensions of the data IDs we want to query: 

1416 expectedGraph = DimensionGraph(registry.dimensions, names=["detector", "physical_filter"]) 

1417 # - the dimensions of some other data IDs we'll extract from that: 

1418 expectedSubsetGraph = DimensionGraph(registry.dimensions, names=["detector"]) 

1419 # - the data IDs we expect to obtain from the first queries: 

1420 expectedDataIds = DataCoordinateSet( 

1421 { 

1422 DataCoordinate.standardize( 

1423 instrument="Cam1", detector=d, physical_filter=p, universe=registry.dimensions 

1424 ) 

1425 for d, p in itertools.product({1, 2, 3}, {"Cam1-G", "Cam1-R1", "Cam1-R2"}) 

1426 }, 

1427 graph=expectedGraph, 

1428 hasFull=False, 

1429 hasRecords=False, 

1430 ) 

1431 # - the flat datasets we expect to find from those data IDs, in just 

1432 # one collection (so deduplication is irrelevant): 

1433 expectedFlats = [ 

1434 registry.findDataset( 

1435 flat, instrument="Cam1", detector=1, physical_filter="Cam1-R1", collections="imported_r" 

1436 ), 

1437 registry.findDataset( 

1438 flat, instrument="Cam1", detector=2, physical_filter="Cam1-R1", collections="imported_r" 

1439 ), 

1440 registry.findDataset( 

1441 flat, instrument="Cam1", detector=3, physical_filter="Cam1-R2", collections="imported_r" 

1442 ), 

1443 ] 

1444 # - the data IDs we expect to extract from that: 

1445 expectedSubsetDataIds = expectedDataIds.subset(expectedSubsetGraph) 

1446 # - the bias datasets we expect to find from those data IDs, after we 

1447 # subset-out the physical_filter dimension, both with duplicates: 

1448 expectedAllBiases = [ 

1449 registry.findDataset(bias, instrument="Cam1", detector=1, collections="imported_g"), 

1450 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_g"), 

1451 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_g"), 

1452 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_r"), 

1453 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_r"), 

1454 ] 

1455 # - ...and without duplicates: 

1456 expectedDeduplicatedBiases = [ 

1457 registry.findDataset(bias, instrument="Cam1", detector=1, collections="imported_g"), 

1458 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_r"), 

1459 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_r"), 

1460 ] 

1461 # Test against those expected results, using a "lazy" query for the 

1462 # data IDs (which re-executes that query each time we use it to do 

1463 # something new). 

1464 dataIds = registry.queryDataIds( 

1465 ["detector", "physical_filter"], 

1466 where="detector.purpose = 'SCIENCE'", # this rejects detector=4 

1467 instrument="Cam1", 

1468 ) 

1469 self.assertEqual(dataIds.graph, expectedGraph) 

1470 self.assertEqual(dataIds.toSet(), expectedDataIds) 

1471 self.assertCountEqual( 

1472 list( 

1473 dataIds.findDatasets( 

1474 flat, 

1475 collections=["imported_r"], 

1476 ) 

1477 ), 

1478 expectedFlats, 

1479 ) 

1480 subsetDataIds = dataIds.subset(expectedSubsetGraph, unique=True) 

1481 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph) 

1482 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds) 

1483 self.assertCountEqual( 

1484 list(subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=False)), 

1485 expectedAllBiases, 

1486 ) 

1487 self.assertCountEqual( 

1488 list(subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=True)), 

1489 expectedDeduplicatedBiases, 

1490 ) 

1491 

1492 # Check dimensions match. 

1493 with self.assertRaises(ValueError): 

1494 subsetDataIds.findDatasets("flat", collections=["imported_r", "imported_g"], findFirst=True) 

1495 

1496 # Use a component dataset type. 

1497 self.assertCountEqual( 

1498 [ 

1499 ref.makeComponentRef("image") 

1500 for ref in subsetDataIds.findDatasets( 

1501 bias, 

1502 collections=["imported_r", "imported_g"], 

1503 findFirst=False, 

1504 ) 

1505 ], 

1506 [ref.makeComponentRef("image") for ref in expectedAllBiases], 

1507 ) 

1508 

1509 # Use a named dataset type that does not exist and a dataset type 

1510 # object that does not exist. 

1511 unknown_type = DatasetType("not_known", dimensions=bias.dimensions, storageClass="Exposure") 

1512 

1513 # Test both string name and dataset type object. 

1514 test_type: str | DatasetType 

1515 for test_type, test_type_name in ( 

1516 (unknown_type, unknown_type.name), 

1517 (unknown_type.name, unknown_type.name), 

1518 ): 

1519 with self.assertRaisesRegex(DatasetTypeError, expected_regex=test_type_name): 

1520 list( 

1521 subsetDataIds.findDatasets( 

1522 test_type, collections=["imported_r", "imported_g"], findFirst=True 

1523 ) 

1524 ) 

1525 

1526 # Materialize the bias dataset queries (only) by putting the results 

1527 # into temporary tables, then repeat those tests. 

1528 with subsetDataIds.findDatasets( 

1529 bias, collections=["imported_r", "imported_g"], findFirst=False 

1530 ).materialize() as biases: 

1531 self.assertCountEqual(list(biases), expectedAllBiases) 

1532 with subsetDataIds.findDatasets( 

1533 bias, collections=["imported_r", "imported_g"], findFirst=True 

1534 ).materialize() as biases: 

1535 self.assertCountEqual(list(biases), expectedDeduplicatedBiases) 

1536 # Materialize the data ID subset query, but not the dataset queries. 

1537 with subsetDataIds.materialize() as subsetDataIds: 

1538 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph) 

1539 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds) 

1540 self.assertCountEqual( 

1541 list( 

1542 subsetDataIds.findDatasets( 

1543 bias, collections=["imported_r", "imported_g"], findFirst=False 

1544 ) 

1545 ), 

1546 expectedAllBiases, 

1547 ) 

1548 self.assertCountEqual( 

1549 list( 

1550 subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=True) 

1551 ), 

1552 expectedDeduplicatedBiases, 

1553 ) 

1554 # Materialize the dataset queries, too. 

1555 with subsetDataIds.findDatasets( 

1556 bias, collections=["imported_r", "imported_g"], findFirst=False 

1557 ).materialize() as biases: 

1558 self.assertCountEqual(list(biases), expectedAllBiases) 

1559 with subsetDataIds.findDatasets( 

1560 bias, collections=["imported_r", "imported_g"], findFirst=True 

1561 ).materialize() as biases: 

1562 self.assertCountEqual(list(biases), expectedDeduplicatedBiases) 

1563 # Materialize the original query, but none of the follow-up queries. 

1564 with dataIds.materialize() as dataIds: 

1565 self.assertEqual(dataIds.graph, expectedGraph) 

1566 self.assertEqual(dataIds.toSet(), expectedDataIds) 

1567 self.assertCountEqual( 

1568 list( 

1569 dataIds.findDatasets( 

1570 flat, 

1571 collections=["imported_r"], 

1572 ) 

1573 ), 

1574 expectedFlats, 

1575 ) 

1576 subsetDataIds = dataIds.subset(expectedSubsetGraph, unique=True) 

1577 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph) 

1578 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds) 

1579 self.assertCountEqual( 

1580 list( 

1581 subsetDataIds.findDatasets( 

1582 bias, collections=["imported_r", "imported_g"], findFirst=False 

1583 ) 

1584 ), 

1585 expectedAllBiases, 

1586 ) 

1587 self.assertCountEqual( 

1588 list( 

1589 subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=True) 

1590 ), 

1591 expectedDeduplicatedBiases, 

1592 ) 

1593 # Materialize just the bias dataset queries. 

1594 with subsetDataIds.findDatasets( 

1595 bias, collections=["imported_r", "imported_g"], findFirst=False 

1596 ).materialize() as biases: 

1597 self.assertCountEqual(list(biases), expectedAllBiases) 

1598 with subsetDataIds.findDatasets( 

1599 bias, collections=["imported_r", "imported_g"], findFirst=True 

1600 ).materialize() as biases: 

1601 self.assertCountEqual(list(biases), expectedDeduplicatedBiases) 

1602 # Materialize the subset data ID query, but not the dataset 

1603 # queries. 

1604 with subsetDataIds.materialize() as subsetDataIds: 

1605 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph) 

1606 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds) 

1607 self.assertCountEqual( 

1608 list( 

1609 subsetDataIds.findDatasets( 

1610 bias, collections=["imported_r", "imported_g"], findFirst=False 

1611 ) 

1612 ), 

1613 expectedAllBiases, 

1614 ) 

1615 self.assertCountEqual( 

1616 list( 

1617 subsetDataIds.findDatasets( 

1618 bias, collections=["imported_r", "imported_g"], findFirst=True 

1619 ) 

1620 ), 

1621 expectedDeduplicatedBiases, 

1622 ) 

1623 # Materialize the bias dataset queries, too, so now we're 

1624 # materializing every single step. 

1625 with subsetDataIds.findDatasets( 

1626 bias, collections=["imported_r", "imported_g"], findFirst=False 

1627 ).materialize() as biases: 

1628 self.assertCountEqual(list(biases), expectedAllBiases) 

1629 with subsetDataIds.findDatasets( 

1630 bias, collections=["imported_r", "imported_g"], findFirst=True 

1631 ).materialize() as biases: 

1632 self.assertCountEqual(list(biases), expectedDeduplicatedBiases) 

1633 

1634 def testStorageClassPropagation(self): 

1635 """Test that queries for datasets respect the storage class passed in 

1636 as part of a full dataset type. 

1637 """ 

1638 registry = self.makeRegistry() 

1639 self.loadData(registry, "base.yaml") 

1640 dataset_type_in_registry = DatasetType( 

1641 "tbl", dimensions=["instrument"], storageClass="Packages", universe=registry.dimensions 

1642 ) 

1643 registry.registerDatasetType(dataset_type_in_registry) 

1644 run = "run1" 

1645 registry.registerRun(run) 

1646 (inserted_ref,) = registry.insertDatasets( 

1647 dataset_type_in_registry, [registry.expandDataId(instrument="Cam1")], run=run 

1648 ) 

1649 self.assertEqual(inserted_ref.datasetType, dataset_type_in_registry) 

1650 query_dataset_type = DatasetType( 

1651 "tbl", dimensions=["instrument"], storageClass="StructuredDataDict", universe=registry.dimensions 

1652 ) 

1653 self.assertNotEqual(dataset_type_in_registry, query_dataset_type) 

1654 query_datasets_result = registry.queryDatasets(query_dataset_type, collections=[run]) 

1655 self.assertEqual(query_datasets_result.parentDatasetType, query_dataset_type) # type: ignore 

1656 (query_datasets_ref,) = query_datasets_result 

1657 self.assertEqual(query_datasets_ref.datasetType, query_dataset_type) 

1658 query_data_ids_find_datasets_result = registry.queryDataIds(["instrument"]).findDatasets( 

1659 query_dataset_type, collections=[run] 

1660 ) 

1661 self.assertEqual(query_data_ids_find_datasets_result.parentDatasetType, query_dataset_type) 

1662 (query_data_ids_find_datasets_ref,) = query_data_ids_find_datasets_result 

1663 self.assertEqual(query_data_ids_find_datasets_ref.datasetType, query_dataset_type) 

1664 query_dataset_types_result = registry.queryDatasetTypes(query_dataset_type) 

1665 self.assertEqual(list(query_dataset_types_result), [query_dataset_type]) 

1666 find_dataset_ref = registry.findDataset(query_dataset_type, instrument="Cam1", collections=[run]) 

1667 self.assertEqual(find_dataset_ref.datasetType, query_dataset_type) 

1668 

1669 def testEmptyDimensionsQueries(self): 

1670 """Test Query and QueryResults objects in the case where there are no 

1671 dimensions. 

1672 """ 

1673 # Set up test data: one dataset type, two runs, one dataset in each. 

1674 registry = self.makeRegistry() 

1675 self.loadData(registry, "base.yaml") 

1676 schema = DatasetType("schema", dimensions=registry.dimensions.empty, storageClass="Catalog") 

1677 registry.registerDatasetType(schema) 

1678 dataId = DataCoordinate.makeEmpty(registry.dimensions) 

1679 run1 = "run1" 

1680 run2 = "run2" 

1681 registry.registerRun(run1) 

1682 registry.registerRun(run2) 

1683 (dataset1,) = registry.insertDatasets(schema, dataIds=[dataId], run=run1) 

1684 (dataset2,) = registry.insertDatasets(schema, dataIds=[dataId], run=run2) 

1685 # Query directly for both of the datasets, and each one, one at a time. 

1686 self.checkQueryResults( 

1687 registry.queryDatasets(schema, collections=[run1, run2], findFirst=False), [dataset1, dataset2] 

1688 ) 

1689 self.checkQueryResults( 

1690 registry.queryDatasets(schema, collections=[run1, run2], findFirst=True), 

1691 [dataset1], 

1692 ) 

1693 self.checkQueryResults( 

1694 registry.queryDatasets(schema, collections=[run2, run1], findFirst=True), 

1695 [dataset2], 

1696 ) 

1697 # Query for data IDs with no dimensions. 

1698 dataIds = registry.queryDataIds([]) 

1699 self.checkQueryResults(dataIds, [dataId]) 

1700 # Use queried data IDs to find the datasets. 

1701 self.checkQueryResults( 

1702 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False), 

1703 [dataset1, dataset2], 

1704 ) 

1705 self.checkQueryResults( 

1706 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), 

1707 [dataset1], 

1708 ) 

1709 self.checkQueryResults( 

1710 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), 

1711 [dataset2], 

1712 ) 

1713 # Now materialize the data ID query results and repeat those tests. 

1714 with dataIds.materialize() as dataIds: 

1715 self.checkQueryResults(dataIds, [dataId]) 

1716 self.checkQueryResults( 

1717 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), 

1718 [dataset1], 

1719 ) 

1720 self.checkQueryResults( 

1721 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), 

1722 [dataset2], 

1723 ) 

1724 # Query for non-empty data IDs, then subset that to get the empty one. 

1725 # Repeat the above tests starting from that. 

1726 dataIds = registry.queryDataIds(["instrument"]).subset(registry.dimensions.empty, unique=True) 

1727 self.checkQueryResults(dataIds, [dataId]) 

1728 self.checkQueryResults( 

1729 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False), 

1730 [dataset1, dataset2], 

1731 ) 

1732 self.checkQueryResults( 

1733 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), 

1734 [dataset1], 

1735 ) 

1736 self.checkQueryResults( 

1737 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), 

1738 [dataset2], 

1739 ) 

1740 with dataIds.materialize() as dataIds: 

1741 self.checkQueryResults(dataIds, [dataId]) 

1742 self.checkQueryResults( 

1743 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False), 

1744 [dataset1, dataset2], 

1745 ) 

1746 self.checkQueryResults( 

1747 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), 

1748 [dataset1], 

1749 ) 

1750 self.checkQueryResults( 

1751 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), 

1752 [dataset2], 

1753 ) 

1754 # Query for non-empty data IDs, then materialize, then subset to get 

1755 # the empty one. Repeat again. 

1756 with registry.queryDataIds(["instrument"]).materialize() as nonEmptyDataIds: 

1757 dataIds = nonEmptyDataIds.subset(registry.dimensions.empty, unique=True) 

1758 self.checkQueryResults(dataIds, [dataId]) 

1759 self.checkQueryResults( 

1760 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False), 

1761 [dataset1, dataset2], 

1762 ) 

1763 self.checkQueryResults( 

1764 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), 

1765 [dataset1], 

1766 ) 

1767 self.checkQueryResults( 

1768 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), 

1769 [dataset2], 

1770 ) 

1771 with dataIds.materialize() as dataIds: 

1772 self.checkQueryResults(dataIds, [dataId]) 

1773 self.checkQueryResults( 

1774 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False), 

1775 [dataset1, dataset2], 

1776 ) 

1777 self.checkQueryResults( 

1778 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), 

1779 [dataset1], 

1780 ) 

1781 self.checkQueryResults( 

1782 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), 

1783 [dataset2], 

1784 ) 

1785 # Query for non-empty data IDs with a constraint on an empty-data-ID 

1786 # dataset that exists. 

1787 dataIds = registry.queryDataIds(["instrument"], datasets="schema", collections=...) 

1788 self.checkQueryResults( 

1789 dataIds.subset(unique=True), 

1790 [DataCoordinate.standardize(instrument="Cam1", universe=registry.dimensions)], 

1791 ) 

1792 # Again query for non-empty data IDs with a constraint on empty-data-ID 

1793 # datasets, but when the datasets don't exist. We delete the existing 

1794 # dataset and query just that collection rather than creating a new 

1795 # empty collection because this is a bit less likely for our build-time 

1796 # logic to shortcut-out (via the collection summaries), and such a 

1797 # shortcut would make this test a bit more trivial than we'd like. 

1798 registry.removeDatasets([dataset2]) 

1799 dataIds = registry.queryDataIds(["instrument"], datasets="schema", collections=run2) 

1800 self.checkQueryResults(dataIds, []) 

1801 

1802 def testDimensionDataModifications(self): 

1803 """Test that modifying dimension records via: 

1804 syncDimensionData(..., update=True) and 

1805 insertDimensionData(..., replace=True) works as expected, even in the 

1806 presence of datasets using those dimensions and spatial overlap 

1807 relationships. 

1808 """ 

1809 

1810 def unpack_range_set(ranges: lsst.sphgeom.RangeSet) -> Iterator[int]: 

1811 """Unpack a sphgeom.RangeSet into the integers it contains.""" 

1812 for begin, end in ranges: 

1813 yield from range(begin, end) 

1814 

1815 def range_set_hull( 

1816 ranges: lsst.sphgeom.RangeSet, 

1817 pixelization: lsst.sphgeom.HtmPixelization, 

1818 ) -> lsst.sphgeom.ConvexPolygon: 

1819 """Create a ConvexPolygon hull of the region defined by a set of 

1820 HTM pixelization index ranges. 

1821 """ 

1822 points = [] 

1823 for index in unpack_range_set(ranges): 

1824 points.extend(pixelization.triangle(index).getVertices()) 

1825 return lsst.sphgeom.ConvexPolygon(points) 

1826 

1827 # Use HTM to set up an initial parent region (one arbitrary trixel) 

1828 # and four child regions (the trixels within the parent at the next 

1829 # level. We'll use the parent as a tract/visit region and the children 

1830 # as its patch/visit_detector regions. 

1831 registry = self.makeRegistry() 

1832 htm6 = registry.dimensions.skypix["htm"][6].pixelization 

1833 commonSkyPix = registry.dimensions.commonSkyPix.pixelization 

1834 index = 12288 

1835 child_ranges_small = lsst.sphgeom.RangeSet(index).scaled(4) 

1836 assert htm6.universe().contains(child_ranges_small) 

1837 child_regions_small = [htm6.triangle(i) for i in unpack_range_set(child_ranges_small)] 

1838 parent_region_small = lsst.sphgeom.ConvexPolygon( 

1839 list(itertools.chain.from_iterable(c.getVertices() for c in child_regions_small)) 

1840 ) 

1841 assert all(parent_region_small.contains(c) for c in child_regions_small) 

1842 # Make a larger version of each child region, defined to be the set of 

1843 # htm6 trixels that overlap the original's bounding circle. Make a new 

1844 # parent that's the convex hull of the new children. 

1845 child_regions_large = [ 

1846 range_set_hull(htm6.envelope(c.getBoundingCircle()), htm6) for c in child_regions_small 

1847 ] 

1848 assert all( 

1849 large.contains(small) 

1850 for large, small in zip(child_regions_large, child_regions_small, strict=True) 

1851 ) 

1852 parent_region_large = lsst.sphgeom.ConvexPolygon( 

1853 list(itertools.chain.from_iterable(c.getVertices() for c in child_regions_large)) 

1854 ) 

1855 assert all(parent_region_large.contains(c) for c in child_regions_large) 

1856 assert parent_region_large.contains(parent_region_small) 

1857 assert not parent_region_small.contains(parent_region_large) 

1858 assert not all(parent_region_small.contains(c) for c in child_regions_large) 

1859 # Find some commonSkyPix indices that overlap the large regions but not 

1860 # overlap the small regions. We use commonSkyPix here to make sure the 

1861 # real tests later involve what's in the database, not just post-query 

1862 # filtering of regions. 

1863 child_difference_indices = [] 

1864 for large, small in zip(child_regions_large, child_regions_small, strict=True): 

1865 difference = list(unpack_range_set(commonSkyPix.envelope(large) - commonSkyPix.envelope(small))) 

1866 assert difference, "if this is empty, we can't test anything useful with these regions" 

1867 assert all( 

1868 not commonSkyPix.triangle(d).isDisjointFrom(large) 

1869 and commonSkyPix.triangle(d).isDisjointFrom(small) 

1870 for d in difference 

1871 ) 

1872 child_difference_indices.append(difference) 

1873 parent_difference_indices = list( 

1874 unpack_range_set( 

1875 commonSkyPix.envelope(parent_region_large) - commonSkyPix.envelope(parent_region_small) 

1876 ) 

1877 ) 

1878 assert parent_difference_indices, "if this is empty, we can't test anything useful with these regions" 

1879 assert all( 

1880 ( 

1881 not commonSkyPix.triangle(d).isDisjointFrom(parent_region_large) 

1882 and commonSkyPix.triangle(d).isDisjointFrom(parent_region_small) 

1883 ) 

1884 for d in parent_difference_indices 

1885 ) 

1886 # Now that we've finally got those regions, we'll insert the large ones 

1887 # as tract/patch dimension records. 

1888 skymap_name = "testing_v1" 

1889 registry.insertDimensionData( 

1890 "skymap", 

1891 { 

1892 "name": skymap_name, 

1893 "hash": bytes([42]), 

1894 "tract_max": 1, 

1895 "patch_nx_max": 2, 

1896 "patch_ny_max": 2, 

1897 }, 

1898 ) 

1899 registry.insertDimensionData("tract", {"skymap": skymap_name, "id": 0, "region": parent_region_large}) 

1900 registry.insertDimensionData( 

1901 "patch", 

1902 *[ 

1903 {"skymap": skymap_name, "tract": 0, "id": n, "cell_x": n % 2, "cell_y": n // 2, "region": c} 

1904 for n, c in enumerate(child_regions_large) 

1905 ], 

1906 ) 

1907 # Add at dataset that uses these dimensions to make sure that modifying 

1908 # them doesn't disrupt foreign keys (need to make sure DB doesn't 

1909 # implement insert with replace=True as delete-then-insert). 

1910 dataset_type = DatasetType( 

1911 "coadd", 

1912 dimensions=["tract", "patch"], 

1913 universe=registry.dimensions, 

1914 storageClass="Exposure", 

1915 ) 

1916 registry.registerDatasetType(dataset_type) 

1917 registry.registerCollection("the_run", CollectionType.RUN) 

1918 registry.insertDatasets( 

1919 dataset_type, 

1920 [{"skymap": skymap_name, "tract": 0, "patch": 2}], 

1921 run="the_run", 

1922 ) 

1923 # Query for tracts and patches that overlap some "difference" htm9 

1924 # pixels; there should be overlaps, because the database has 

1925 # the "large" suite of regions. 

1926 self.assertEqual( 

1927 {0}, 

1928 { 

1929 data_id["tract"] 

1930 for data_id in registry.queryDataIds( 

1931 ["tract"], 

1932 skymap=skymap_name, 

1933 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]}, 

1934 ) 

1935 }, 

1936 ) 

1937 for patch_id, patch_difference_indices in enumerate(child_difference_indices): 

1938 self.assertIn( 

1939 patch_id, 

1940 { 

1941 data_id["patch"] 

1942 for data_id in registry.queryDataIds( 

1943 ["patch"], 

1944 skymap=skymap_name, 

1945 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]}, 

1946 ) 

1947 }, 

1948 ) 

1949 # Use sync to update the tract region and insert to update the regions 

1950 # of the patches, to the "small" suite. 

1951 updated = registry.syncDimensionData( 

1952 "tract", 

1953 {"skymap": skymap_name, "id": 0, "region": parent_region_small}, 

1954 update=True, 

1955 ) 

1956 self.assertEqual(updated, {"region": parent_region_large}) 

1957 registry.insertDimensionData( 

1958 "patch", 

1959 *[ 

1960 {"skymap": skymap_name, "tract": 0, "id": n, "cell_x": n % 2, "cell_y": n // 2, "region": c} 

1961 for n, c in enumerate(child_regions_small) 

1962 ], 

1963 replace=True, 

1964 ) 

1965 # Query again; there now should be no such overlaps, because the 

1966 # database has the "small" suite of regions. 

1967 self.assertFalse( 

1968 set( 

1969 registry.queryDataIds( 

1970 ["tract"], 

1971 skymap=skymap_name, 

1972 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]}, 

1973 ) 

1974 ) 

1975 ) 

1976 for patch_id, patch_difference_indices in enumerate(child_difference_indices): 

1977 self.assertNotIn( 

1978 patch_id, 

1979 { 

1980 data_id["patch"] 

1981 for data_id in registry.queryDataIds( 

1982 ["patch"], 

1983 skymap=skymap_name, 

1984 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]}, 

1985 ) 

1986 }, 

1987 ) 

1988 # Update back to the large regions and query one more time. 

1989 updated = registry.syncDimensionData( 

1990 "tract", 

1991 {"skymap": skymap_name, "id": 0, "region": parent_region_large}, 

1992 update=True, 

1993 ) 

1994 self.assertEqual(updated, {"region": parent_region_small}) 

1995 registry.insertDimensionData( 

1996 "patch", 

1997 *[ 

1998 {"skymap": skymap_name, "tract": 0, "id": n, "cell_x": n % 2, "cell_y": n // 2, "region": c} 

1999 for n, c in enumerate(child_regions_large) 

2000 ], 

2001 replace=True, 

2002 ) 

2003 self.assertEqual( 

2004 {0}, 

2005 { 

2006 data_id["tract"] 

2007 for data_id in registry.queryDataIds( 

2008 ["tract"], 

2009 skymap=skymap_name, 

2010 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]}, 

2011 ) 

2012 }, 

2013 ) 

2014 for patch_id, patch_difference_indices in enumerate(child_difference_indices): 

2015 self.assertIn( 

2016 patch_id, 

2017 { 

2018 data_id["patch"] 

2019 for data_id in registry.queryDataIds( 

2020 ["patch"], 

2021 skymap=skymap_name, 

2022 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]}, 

2023 ) 

2024 }, 

2025 ) 

2026 

2027 def testCalibrationCollections(self): 

2028 """Test operations on `~CollectionType.CALIBRATION` collections, 

2029 including `Registry.certify`, `Registry.decertify`, and 

2030 `Registry.findDataset`. 

2031 """ 

2032 # Setup - make a Registry, fill it with some datasets in 

2033 # non-calibration collections. 

2034 registry = self.makeRegistry() 

2035 self.loadData(registry, "base.yaml") 

2036 self.loadData(registry, "datasets.yaml") 

2037 # Set up some timestamps. 

2038 t1 = astropy.time.Time("2020-01-01T01:00:00", format="isot", scale="tai") 

2039 t2 = astropy.time.Time("2020-01-01T02:00:00", format="isot", scale="tai") 

2040 t3 = astropy.time.Time("2020-01-01T03:00:00", format="isot", scale="tai") 

2041 t4 = astropy.time.Time("2020-01-01T04:00:00", format="isot", scale="tai") 

2042 t5 = astropy.time.Time("2020-01-01T05:00:00", format="isot", scale="tai") 

2043 allTimespans = [ 

2044 Timespan(a, b) for a, b in itertools.combinations([None, t1, t2, t3, t4, t5, None], r=2) 

2045 ] 

2046 # Get references to some datasets. 

2047 bias2a = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g") 

2048 bias3a = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g") 

2049 bias2b = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r") 

2050 bias3b = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r") 

2051 # Register the main calibration collection we'll be working with. 

2052 collection = "Cam1/calibs/default" 

2053 registry.registerCollection(collection, type=CollectionType.CALIBRATION) 

2054 # Cannot associate into a calibration collection (no timespan). 

2055 with self.assertRaises(CollectionTypeError): 

2056 registry.associate(collection, [bias2a]) 

2057 # Certify 2a dataset with [t2, t4) validity. 

2058 registry.certify(collection, [bias2a], Timespan(begin=t2, end=t4)) 

2059 # Test that we can query for this dataset via the new collection, both 

2060 # on its own and with a RUN collection, as long as we don't try to join 

2061 # in temporal dimensions or use findFirst=True. 

2062 self.assertEqual( 

2063 set(registry.queryDatasets("bias", findFirst=False, collections=collection)), 

2064 {bias2a}, 

2065 ) 

2066 self.assertEqual( 

2067 set(registry.queryDatasets("bias", findFirst=False, collections=[collection, "imported_r"])), 

2068 { 

2069 bias2a, 

2070 bias2b, 

2071 bias3b, 

2072 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

2073 }, 

2074 ) 

2075 self.assertEqual( 

2076 set(registry.queryDataIds("detector", datasets="bias", collections=collection)), 

2077 {registry.expandDataId(instrument="Cam1", detector=2)}, 

2078 ) 

2079 self.assertEqual( 

2080 set(registry.queryDataIds("detector", datasets="bias", collections=[collection, "imported_r"])), 

2081 { 

2082 registry.expandDataId(instrument="Cam1", detector=2), 

2083 registry.expandDataId(instrument="Cam1", detector=3), 

2084 registry.expandDataId(instrument="Cam1", detector=4), 

2085 }, 

2086 ) 

2087 

2088 # We should not be able to certify 2b with anything overlapping that 

2089 # window. 

2090 with self.assertRaises(ConflictingDefinitionError): 

2091 registry.certify(collection, [bias2b], Timespan(begin=None, end=t3)) 

2092 with self.assertRaises(ConflictingDefinitionError): 

2093 registry.certify(collection, [bias2b], Timespan(begin=None, end=t5)) 

2094 with self.assertRaises(ConflictingDefinitionError): 

2095 registry.certify(collection, [bias2b], Timespan(begin=t1, end=t3)) 

2096 with self.assertRaises(ConflictingDefinitionError): 

2097 registry.certify(collection, [bias2b], Timespan(begin=t1, end=t5)) 

2098 with self.assertRaises(ConflictingDefinitionError): 

2099 registry.certify(collection, [bias2b], Timespan(begin=t1, end=None)) 

2100 with self.assertRaises(ConflictingDefinitionError): 

2101 registry.certify(collection, [bias2b], Timespan(begin=t2, end=t3)) 

2102 with self.assertRaises(ConflictingDefinitionError): 

2103 registry.certify(collection, [bias2b], Timespan(begin=t2, end=t5)) 

2104 with self.assertRaises(ConflictingDefinitionError): 

2105 registry.certify(collection, [bias2b], Timespan(begin=t2, end=None)) 

2106 # We should be able to certify 3a with a range overlapping that window, 

2107 # because it's for a different detector. 

2108 # We'll certify 3a over [t1, t3). 

2109 registry.certify(collection, [bias3a], Timespan(begin=t1, end=t3)) 

2110 # Now we'll certify 2b and 3b together over [t4, ∞). 

2111 registry.certify(collection, [bias2b, bias3b], Timespan(begin=t4, end=None)) 

2112 

2113 # Fetch all associations and check that they are what we expect. 

2114 self.assertCountEqual( 

2115 list( 

2116 registry.queryDatasetAssociations( 

2117 "bias", 

2118 collections=[collection, "imported_g", "imported_r"], 

2119 ) 

2120 ), 

2121 [ 

2122 DatasetAssociation( 

2123 ref=registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"), 

2124 collection="imported_g", 

2125 timespan=None, 

2126 ), 

2127 DatasetAssociation( 

2128 ref=registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

2129 collection="imported_r", 

2130 timespan=None, 

2131 ), 

2132 DatasetAssociation(ref=bias2a, collection="imported_g", timespan=None), 

2133 DatasetAssociation(ref=bias3a, collection="imported_g", timespan=None), 

2134 DatasetAssociation(ref=bias2b, collection="imported_r", timespan=None), 

2135 DatasetAssociation(ref=bias3b, collection="imported_r", timespan=None), 

2136 DatasetAssociation(ref=bias2a, collection=collection, timespan=Timespan(begin=t2, end=t4)), 

2137 DatasetAssociation(ref=bias3a, collection=collection, timespan=Timespan(begin=t1, end=t3)), 

2138 DatasetAssociation(ref=bias2b, collection=collection, timespan=Timespan(begin=t4, end=None)), 

2139 DatasetAssociation(ref=bias3b, collection=collection, timespan=Timespan(begin=t4, end=None)), 

2140 ], 

2141 ) 

2142 

2143 class Ambiguous: 

2144 """Tag class to denote lookups that should be ambiguous.""" 

2145 

2146 pass 

2147 

2148 def assertLookup( 

2149 detector: int, timespan: Timespan, expected: DatasetRef | type[Ambiguous] | None 

2150 ) -> None: 

2151 """Local function that asserts that a bias lookup returns the given 

2152 expected result. 

2153 """ 

2154 if expected is Ambiguous: 

2155 with self.assertRaises((DatasetTypeError, LookupError)): 

2156 registry.findDataset( 

2157 "bias", 

2158 collections=collection, 

2159 instrument="Cam1", 

2160 detector=detector, 

2161 timespan=timespan, 

2162 ) 

2163 else: 

2164 self.assertEqual( 

2165 expected, 

2166 registry.findDataset( 

2167 "bias", 

2168 collections=collection, 

2169 instrument="Cam1", 

2170 detector=detector, 

2171 timespan=timespan, 

2172 ), 

2173 ) 

2174 

2175 # Systematically test lookups against expected results. 

2176 assertLookup(detector=2, timespan=Timespan(None, t1), expected=None) 

2177 assertLookup(detector=2, timespan=Timespan(None, t2), expected=None) 

2178 assertLookup(detector=2, timespan=Timespan(None, t3), expected=bias2a) 

2179 assertLookup(detector=2, timespan=Timespan(None, t4), expected=bias2a) 

2180 assertLookup(detector=2, timespan=Timespan(None, t5), expected=Ambiguous) 

2181 assertLookup(detector=2, timespan=Timespan(None, None), expected=Ambiguous) 

2182 assertLookup(detector=2, timespan=Timespan(t1, t2), expected=None) 

2183 assertLookup(detector=2, timespan=Timespan(t1, t3), expected=bias2a) 

2184 assertLookup(detector=2, timespan=Timespan(t1, t4), expected=bias2a) 

2185 assertLookup(detector=2, timespan=Timespan(t1, t5), expected=Ambiguous) 

2186 assertLookup(detector=2, timespan=Timespan(t1, None), expected=Ambiguous) 

2187 assertLookup(detector=2, timespan=Timespan(t2, t3), expected=bias2a) 

2188 assertLookup(detector=2, timespan=Timespan(t2, t4), expected=bias2a) 

2189 assertLookup(detector=2, timespan=Timespan(t2, t5), expected=Ambiguous) 

2190 assertLookup(detector=2, timespan=Timespan(t2, None), expected=Ambiguous) 

2191 assertLookup(detector=2, timespan=Timespan(t3, t4), expected=bias2a) 

2192 assertLookup(detector=2, timespan=Timespan(t3, t5), expected=Ambiguous) 

2193 assertLookup(detector=2, timespan=Timespan(t3, None), expected=Ambiguous) 

2194 assertLookup(detector=2, timespan=Timespan(t4, t5), expected=bias2b) 

2195 assertLookup(detector=2, timespan=Timespan(t4, None), expected=bias2b) 

2196 assertLookup(detector=2, timespan=Timespan(t5, None), expected=bias2b) 

2197 assertLookup(detector=3, timespan=Timespan(None, t1), expected=None) 

2198 assertLookup(detector=3, timespan=Timespan(None, t2), expected=bias3a) 

2199 assertLookup(detector=3, timespan=Timespan(None, t3), expected=bias3a) 

2200 assertLookup(detector=3, timespan=Timespan(None, t4), expected=bias3a) 

2201 assertLookup(detector=3, timespan=Timespan(None, t5), expected=Ambiguous) 

2202 assertLookup(detector=3, timespan=Timespan(None, None), expected=Ambiguous) 

2203 assertLookup(detector=3, timespan=Timespan(t1, t2), expected=bias3a) 

2204 assertLookup(detector=3, timespan=Timespan(t1, t3), expected=bias3a) 

2205 assertLookup(detector=3, timespan=Timespan(t1, t4), expected=bias3a) 

2206 assertLookup(detector=3, timespan=Timespan(t1, t5), expected=Ambiguous) 

2207 assertLookup(detector=3, timespan=Timespan(t1, None), expected=Ambiguous) 

2208 assertLookup(detector=3, timespan=Timespan(t2, t3), expected=bias3a) 

2209 assertLookup(detector=3, timespan=Timespan(t2, t4), expected=bias3a) 

2210 assertLookup(detector=3, timespan=Timespan(t2, t5), expected=Ambiguous) 

2211 assertLookup(detector=3, timespan=Timespan(t2, None), expected=Ambiguous) 

2212 assertLookup(detector=3, timespan=Timespan(t3, t4), expected=None) 

2213 assertLookup(detector=3, timespan=Timespan(t3, t5), expected=bias3b) 

2214 assertLookup(detector=3, timespan=Timespan(t3, None), expected=bias3b) 

2215 assertLookup(detector=3, timespan=Timespan(t4, t5), expected=bias3b) 

2216 assertLookup(detector=3, timespan=Timespan(t4, None), expected=bias3b) 

2217 assertLookup(detector=3, timespan=Timespan(t5, None), expected=bias3b) 

2218 

2219 # Decertify [t3, t5) for all data IDs, and do test lookups again. 

2220 # This should truncate bias2a to [t2, t3), leave bias3a unchanged at 

2221 # [t1, t3), and truncate bias2b and bias3b to [t5, ∞). 

2222 registry.decertify(collection=collection, datasetType="bias", timespan=Timespan(t3, t5)) 

2223 assertLookup(detector=2, timespan=Timespan(None, t1), expected=None) 

2224 assertLookup(detector=2, timespan=Timespan(None, t2), expected=None) 

2225 assertLookup(detector=2, timespan=Timespan(None, t3), expected=bias2a) 

2226 assertLookup(detector=2, timespan=Timespan(None, t4), expected=bias2a) 

2227 assertLookup(detector=2, timespan=Timespan(None, t5), expected=bias2a) 

2228 assertLookup(detector=2, timespan=Timespan(None, None), expected=Ambiguous) 

2229 assertLookup(detector=2, timespan=Timespan(t1, t2), expected=None) 

2230 assertLookup(detector=2, timespan=Timespan(t1, t3), expected=bias2a) 

2231 assertLookup(detector=2, timespan=Timespan(t1, t4), expected=bias2a) 

2232 assertLookup(detector=2, timespan=Timespan(t1, t5), expected=bias2a) 

2233 assertLookup(detector=2, timespan=Timespan(t1, None), expected=Ambiguous) 

2234 assertLookup(detector=2, timespan=Timespan(t2, t3), expected=bias2a) 

2235 assertLookup(detector=2, timespan=Timespan(t2, t4), expected=bias2a) 

2236 assertLookup(detector=2, timespan=Timespan(t2, t5), expected=bias2a) 

2237 assertLookup(detector=2, timespan=Timespan(t2, None), expected=Ambiguous) 

2238 assertLookup(detector=2, timespan=Timespan(t3, t4), expected=None) 

2239 assertLookup(detector=2, timespan=Timespan(t3, t5), expected=None) 

2240 assertLookup(detector=2, timespan=Timespan(t3, None), expected=bias2b) 

2241 assertLookup(detector=2, timespan=Timespan(t4, t5), expected=None) 

2242 assertLookup(detector=2, timespan=Timespan(t4, None), expected=bias2b) 

2243 assertLookup(detector=2, timespan=Timespan(t5, None), expected=bias2b) 

2244 assertLookup(detector=3, timespan=Timespan(None, t1), expected=None) 

2245 assertLookup(detector=3, timespan=Timespan(None, t2), expected=bias3a) 

2246 assertLookup(detector=3, timespan=Timespan(None, t3), expected=bias3a) 

2247 assertLookup(detector=3, timespan=Timespan(None, t4), expected=bias3a) 

2248 assertLookup(detector=3, timespan=Timespan(None, t5), expected=bias3a) 

2249 assertLookup(detector=3, timespan=Timespan(None, None), expected=Ambiguous) 

2250 assertLookup(detector=3, timespan=Timespan(t1, t2), expected=bias3a) 

2251 assertLookup(detector=3, timespan=Timespan(t1, t3), expected=bias3a) 

2252 assertLookup(detector=3, timespan=Timespan(t1, t4), expected=bias3a) 

2253 assertLookup(detector=3, timespan=Timespan(t1, t5), expected=bias3a) 

2254 assertLookup(detector=3, timespan=Timespan(t1, None), expected=Ambiguous) 

2255 assertLookup(detector=3, timespan=Timespan(t2, t3), expected=bias3a) 

2256 assertLookup(detector=3, timespan=Timespan(t2, t4), expected=bias3a) 

2257 assertLookup(detector=3, timespan=Timespan(t2, t5), expected=bias3a) 

2258 assertLookup(detector=3, timespan=Timespan(t2, None), expected=Ambiguous) 

2259 assertLookup(detector=3, timespan=Timespan(t3, t4), expected=None) 

2260 assertLookup(detector=3, timespan=Timespan(t3, t5), expected=None) 

2261 assertLookup(detector=3, timespan=Timespan(t3, None), expected=bias3b) 

2262 assertLookup(detector=3, timespan=Timespan(t4, t5), expected=None) 

2263 assertLookup(detector=3, timespan=Timespan(t4, None), expected=bias3b) 

2264 assertLookup(detector=3, timespan=Timespan(t5, None), expected=bias3b) 

2265 

2266 # Decertify everything, this time with explicit data IDs, then check 

2267 # that no lookups succeed. 

2268 registry.decertify( 

2269 collection, 

2270 "bias", 

2271 Timespan(None, None), 

2272 dataIds=[ 

2273 dict(instrument="Cam1", detector=2), 

2274 dict(instrument="Cam1", detector=3), 

2275 ], 

2276 ) 

2277 for detector in (2, 3): 

2278 for timespan in allTimespans: 

2279 assertLookup(detector=detector, timespan=timespan, expected=None) 

2280 # Certify bias2a and bias3a over (-∞, ∞), check that all lookups return 

2281 # those. 

2282 registry.certify( 

2283 collection, 

2284 [bias2a, bias3a], 

2285 Timespan(None, None), 

2286 ) 

2287 for timespan in allTimespans: 

2288 assertLookup(detector=2, timespan=timespan, expected=bias2a) 

2289 assertLookup(detector=3, timespan=timespan, expected=bias3a) 

2290 # Decertify just bias2 over [t2, t4). 

2291 # This should split a single certification row into two (and leave the 

2292 # other existing row, for bias3a, alone). 

2293 registry.decertify( 

2294 collection, "bias", Timespan(t2, t4), dataIds=[dict(instrument="Cam1", detector=2)] 

2295 ) 

2296 for timespan in allTimespans: 

2297 assertLookup(detector=3, timespan=timespan, expected=bias3a) 

2298 overlapsBefore = timespan.overlaps(Timespan(None, t2)) 

2299 overlapsAfter = timespan.overlaps(Timespan(t4, None)) 

2300 if overlapsBefore and overlapsAfter: 

2301 expected = Ambiguous 

2302 elif overlapsBefore or overlapsAfter: 

2303 expected = bias2a 

2304 else: 

2305 expected = None 

2306 assertLookup(detector=2, timespan=timespan, expected=expected) 

2307 

2308 def testSkipCalibs(self): 

2309 """Test how queries handle skipping of calibration collections.""" 

2310 registry = self.makeRegistry() 

2311 self.loadData(registry, "base.yaml") 

2312 self.loadData(registry, "datasets.yaml") 

2313 

2314 coll_calib = "Cam1/calibs/default" 

2315 registry.registerCollection(coll_calib, type=CollectionType.CALIBRATION) 

2316 

2317 # Add all biases to the calibration collection. 

2318 # Without this, the logic that prunes dataset subqueries based on 

2319 # datasetType-collection summary information will fire before the logic 

2320 # we want to test below. This is a good thing (it avoids the dreaded 

2321 # NotImplementedError a bit more often) everywhere but here. 

2322 registry.certify(coll_calib, registry.queryDatasets("bias", collections=...), Timespan(None, None)) 

2323 

2324 coll_list = [coll_calib, "imported_g", "imported_r"] 

2325 chain = "Cam1/chain" 

2326 registry.registerCollection(chain, type=CollectionType.CHAINED) 

2327 registry.setCollectionChain(chain, coll_list) 

2328 

2329 # explicit list will raise if findFirst=True or there are temporal 

2330 # dimensions 

2331 with self.assertRaises(NotImplementedError): 

2332 registry.queryDatasets("bias", collections=coll_list, findFirst=True) 

2333 with self.assertRaises(NotImplementedError): 

2334 registry.queryDataIds( 

2335 ["instrument", "detector", "exposure"], datasets="bias", collections=coll_list 

2336 ).count() 

2337 

2338 # chain will skip 

2339 datasets = list(registry.queryDatasets("bias", collections=chain)) 

2340 self.assertGreater(len(datasets), 0) 

2341 

2342 dataIds = list(registry.queryDataIds(["instrument", "detector"], datasets="bias", collections=chain)) 

2343 self.assertGreater(len(dataIds), 0) 

2344 

2345 # glob will skip too 

2346 datasets = list(registry.queryDatasets("bias", collections="*d*")) 

2347 self.assertGreater(len(datasets), 0) 

2348 

2349 # regular expression will skip too 

2350 pattern = re.compile(".*") 

2351 datasets = list(registry.queryDatasets("bias", collections=pattern)) 

2352 self.assertGreater(len(datasets), 0) 

2353 

2354 # ellipsis should work as usual 

2355 datasets = list(registry.queryDatasets("bias", collections=...)) 

2356 self.assertGreater(len(datasets), 0) 

2357 

2358 # few tests with findFirst 

2359 datasets = list(registry.queryDatasets("bias", collections=chain, findFirst=True)) 

2360 self.assertGreater(len(datasets), 0) 

2361 

2362 def testIngestTimeQuery(self): 

2363 registry = self.makeRegistry() 

2364 self.loadData(registry, "base.yaml") 

2365 dt0 = datetime.utcnow() 

2366 self.loadData(registry, "datasets.yaml") 

2367 dt1 = datetime.utcnow() 

2368 

2369 datasets = list(registry.queryDatasets(..., collections=...)) 

2370 len0 = len(datasets) 

2371 self.assertGreater(len0, 0) 

2372 

2373 where = "ingest_date > T'2000-01-01'" 

2374 datasets = list(registry.queryDatasets(..., collections=..., where=where)) 

2375 len1 = len(datasets) 

2376 self.assertEqual(len0, len1) 

2377 

2378 # no one will ever use this piece of software in 30 years 

2379 where = "ingest_date > T'2050-01-01'" 

2380 datasets = list(registry.queryDatasets(..., collections=..., where=where)) 

2381 len2 = len(datasets) 

2382 self.assertEqual(len2, 0) 

2383 

2384 # Check more exact timing to make sure there is no 37 seconds offset 

2385 # (after fixing DM-30124). SQLite time precision is 1 second, make 

2386 # sure that we don't test with higher precision. 

2387 tests = [ 

2388 # format: (timestamp, operator, expected_len) 

2389 (dt0 - timedelta(seconds=1), ">", len0), 

2390 (dt0 - timedelta(seconds=1), "<", 0), 

2391 (dt1 + timedelta(seconds=1), "<", len0), 

2392 (dt1 + timedelta(seconds=1), ">", 0), 

2393 ] 

2394 for dt, op, expect_len in tests: 

2395 dt_str = dt.isoformat(sep=" ") 

2396 

2397 where = f"ingest_date {op} T'{dt_str}'" 

2398 datasets = list(registry.queryDatasets(..., collections=..., where=where)) 

2399 self.assertEqual(len(datasets), expect_len) 

2400 

2401 # same with bind using datetime or astropy Time 

2402 where = f"ingest_date {op} ingest_time" 

2403 datasets = list( 

2404 registry.queryDatasets(..., collections=..., where=where, bind={"ingest_time": dt}) 

2405 ) 

2406 self.assertEqual(len(datasets), expect_len) 

2407 

2408 dt_astropy = astropy.time.Time(dt, format="datetime") 

2409 datasets = list( 

2410 registry.queryDatasets(..., collections=..., where=where, bind={"ingest_time": dt_astropy}) 

2411 ) 

2412 self.assertEqual(len(datasets), expect_len) 

2413 

2414 def testTimespanQueries(self): 

2415 """Test query expressions involving timespans.""" 

2416 registry = self.makeRegistry() 

2417 self.loadData(registry, "hsc-rc2-subset.yaml") 

2418 # All exposures in the database; mapping from ID to timespan. 

2419 visits = {record.id: record.timespan for record in registry.queryDimensionRecords("visit")} 

2420 # Just those IDs, sorted (which is also temporal sorting, because HSC 

2421 # exposure IDs are monotonically increasing). 

2422 ids = sorted(visits.keys()) 

2423 self.assertGreater(len(ids), 20) 

2424 # Pick some quasi-random indexes into `ids` to play with. 

2425 i1 = int(len(ids) * 0.1) 

2426 i2 = int(len(ids) * 0.3) 

2427 i3 = int(len(ids) * 0.6) 

2428 i4 = int(len(ids) * 0.8) 

2429 # Extract some times from those: just before the beginning of i1 (which 

2430 # should be after the end of the exposure before), exactly the 

2431 # beginning of i2, just after the beginning of i3 (and before its end), 

2432 # and the exact end of i4. 

2433 t1 = visits[ids[i1]].begin - astropy.time.TimeDelta(1.0, format="sec") 

2434 self.assertGreater(t1, visits[ids[i1 - 1]].end) 

2435 t2 = visits[ids[i2]].begin 

2436 t3 = visits[ids[i3]].begin + astropy.time.TimeDelta(1.0, format="sec") 

2437 self.assertLess(t3, visits[ids[i3]].end) 

2438 t4 = visits[ids[i4]].end 

2439 # Make sure those are actually in order. 

2440 self.assertEqual([t1, t2, t3, t4], sorted([t4, t3, t2, t1])) 

2441 

2442 bind = { 

2443 "t1": t1, 

2444 "t2": t2, 

2445 "t3": t3, 

2446 "t4": t4, 

2447 "ts23": Timespan(t2, t3), 

2448 } 

2449 

2450 def query(where): 

2451 """Return results as a sorted, deduplicated list of visit IDs.""" 

2452 return sorted( 

2453 { 

2454 dataId["visit"] 

2455 for dataId in registry.queryDataIds("visit", instrument="HSC", bind=bind, where=where) 

2456 } 

2457 ) 

2458 

2459 # Try a bunch of timespan queries, mixing up the bounds themselves, 

2460 # where they appear in the expression, and how we get the timespan into 

2461 # the expression. 

2462 

2463 # t1 is before the start of i1, so this should not include i1. 

2464 self.assertEqual(ids[:i1], query("visit.timespan OVERLAPS (null, t1)")) 

2465 # t2 is exactly at the start of i2, but ends are exclusive, so these 

2466 # should not include i2. 

2467 self.assertEqual(ids[i1:i2], query("(t1, t2) OVERLAPS visit.timespan")) 

2468 self.assertEqual(ids[:i2], query("visit.timespan < (t2, t4)")) 

2469 # t3 is in the middle of i3, so this should include i3. 

2470 self.assertEqual(ids[i2 : i3 + 1], query("visit.timespan OVERLAPS ts23")) 

2471 # This one should not include t3 by the same reasoning. 

2472 self.assertEqual(ids[i3 + 1 :], query("visit.timespan > (t1, t3)")) 

2473 # t4 is exactly at the end of i4, so this should include i4. 

2474 self.assertEqual(ids[i3 : i4 + 1], query(f"visit.timespan OVERLAPS (T'{t3.tai.isot}', t4)")) 

2475 # i4's upper bound of t4 is exclusive so this should not include t4. 

2476 self.assertEqual(ids[i4 + 1 :], query("visit.timespan OVERLAPS (t4, NULL)")) 

2477 

2478 # Now some timespan vs. time scalar queries. 

2479 self.assertEqual(ids[:i2], query("visit.timespan < t2")) 

2480 self.assertEqual(ids[:i2], query("t2 > visit.timespan")) 

2481 self.assertEqual(ids[i3 + 1 :], query("visit.timespan > t3")) 

2482 self.assertEqual(ids[i3 + 1 :], query("t3 < visit.timespan")) 

2483 self.assertEqual(ids[i3 : i3 + 1], query("visit.timespan OVERLAPS t3")) 

2484 self.assertEqual(ids[i3 : i3 + 1], query(f"T'{t3.tai.isot}' OVERLAPS visit.timespan")) 

2485 

2486 # Empty timespans should not overlap anything. 

2487 self.assertEqual([], query("visit.timespan OVERLAPS (t3, t2)")) 

2488 

2489 def testCollectionSummaries(self): 

2490 """Test recording and retrieval of collection summaries.""" 

2491 self.maxDiff = None 

2492 registry = self.makeRegistry() 

2493 # Importing datasets from yaml should go through the code path where 

2494 # we update collection summaries as we insert datasets. 

2495 self.loadData(registry, "base.yaml") 

2496 self.loadData(registry, "datasets.yaml") 

2497 flat = registry.getDatasetType("flat") 

2498 expected1 = CollectionSummary() 

2499 expected1.dataset_types.add(registry.getDatasetType("bias")) 

2500 expected1.add_data_ids( 

2501 flat, [DataCoordinate.standardize(instrument="Cam1", universe=registry.dimensions)] 

2502 ) 

2503 self.assertEqual(registry.getCollectionSummary("imported_g"), expected1) 

2504 self.assertEqual(registry.getCollectionSummary("imported_r"), expected1) 

2505 # Create a chained collection with both of the imported runs; the 

2506 # summary should be the same, because it's a union with itself. 

2507 chain = "chain" 

2508 registry.registerCollection(chain, CollectionType.CHAINED) 

2509 registry.setCollectionChain(chain, ["imported_r", "imported_g"]) 

2510 self.assertEqual(registry.getCollectionSummary(chain), expected1) 

2511 # Associate flats only into a tagged collection and a calibration 

2512 # collection to check summaries of those. 

2513 tag = "tag" 

2514 registry.registerCollection(tag, CollectionType.TAGGED) 

2515 registry.associate(tag, registry.queryDatasets(flat, collections="imported_g")) 

2516 calibs = "calibs" 

2517 registry.registerCollection(calibs, CollectionType.CALIBRATION) 

2518 registry.certify( 

2519 calibs, registry.queryDatasets(flat, collections="imported_g"), timespan=Timespan(None, None) 

2520 ) 

2521 expected2 = expected1.copy() 

2522 expected2.dataset_types.discard("bias") 

2523 self.assertEqual(registry.getCollectionSummary(tag), expected2) 

2524 self.assertEqual(registry.getCollectionSummary(calibs), expected2) 

2525 # Explicitly calling Registry.refresh() should load those same 

2526 # summaries, via a totally different code path. 

2527 registry.refresh() 

2528 self.assertEqual(registry.getCollectionSummary("imported_g"), expected1) 

2529 self.assertEqual(registry.getCollectionSummary("imported_r"), expected1) 

2530 self.assertEqual(registry.getCollectionSummary(tag), expected2) 

2531 self.assertEqual(registry.getCollectionSummary(calibs), expected2) 

2532 

2533 def testBindInQueryDatasets(self): 

2534 """Test that the bind parameter is correctly forwarded in 

2535 queryDatasets recursion. 

2536 """ 

2537 registry = self.makeRegistry() 

2538 # Importing datasets from yaml should go through the code path where 

2539 # we update collection summaries as we insert datasets. 

2540 self.loadData(registry, "base.yaml") 

2541 self.loadData(registry, "datasets.yaml") 

2542 self.assertEqual( 

2543 set(registry.queryDatasets("flat", band="r", collections=...)), 

2544 set(registry.queryDatasets("flat", where="band=my_band", bind={"my_band": "r"}, collections=...)), 

2545 ) 

2546 

2547 def testQueryIntRangeExpressions(self): 

2548 """Test integer range expressions in ``where`` arguments. 

2549 

2550 Note that our expressions use inclusive stop values, unlike Python's. 

2551 """ 

2552 registry = self.makeRegistry() 

2553 self.loadData(registry, "base.yaml") 

2554 self.assertEqual( 

2555 set(registry.queryDataIds(["detector"], instrument="Cam1", where="detector IN (1..2)")), 

2556 {registry.expandDataId(instrument="Cam1", detector=n) for n in [1, 2]}, 

2557 ) 

2558 self.assertEqual( 

2559 set(registry.queryDataIds(["detector"], instrument="Cam1", where="detector IN (1..4:2)")), 

2560 {registry.expandDataId(instrument="Cam1", detector=n) for n in [1, 3]}, 

2561 ) 

2562 self.assertEqual( 

2563 set(registry.queryDataIds(["detector"], instrument="Cam1", where="detector IN (2..4:2)")), 

2564 {registry.expandDataId(instrument="Cam1", detector=n) for n in [2, 4]}, 

2565 ) 

2566 

2567 def testQueryResultSummaries(self): 

2568 """Test summary methods like `count`, `any`, and `explain_no_results` 

2569 on `DataCoordinateQueryResults` and `DatasetQueryResults`. 

2570 """ 

2571 registry = self.makeRegistry() 

2572 self.loadData(registry, "base.yaml") 

2573 self.loadData(registry, "datasets.yaml") 

2574 self.loadData(registry, "spatial.yaml") 

2575 # Default test dataset has two collections, each with both flats and 

2576 # biases. Add a new collection with only biases. 

2577 registry.registerCollection("biases", CollectionType.TAGGED) 

2578 registry.associate("biases", registry.queryDatasets("bias", collections=["imported_g"])) 

2579 # First query yields two results, and involves no postprocessing. 

2580 query1 = registry.queryDataIds(["physical_filter"], band="r") 

2581 self.assertTrue(query1.any(execute=False, exact=False)) 

2582 self.assertTrue(query1.any(execute=True, exact=False)) 

2583 self.assertTrue(query1.any(execute=True, exact=True)) 

2584 self.assertEqual(query1.count(exact=False), 2) 

2585 self.assertEqual(query1.count(exact=True), 2) 

2586 self.assertFalse(list(query1.explain_no_results())) 

2587 # Second query should yield no results, which we should see when 

2588 # we attempt to expand the data ID. 

2589 query2 = registry.queryDataIds(["physical_filter"], band="h") 

2590 # There's no execute=False, exact=Fals test here because the behavior 

2591 # not something we want to guarantee in this case (and exact=False 

2592 # says either answer is legal). 

2593 self.assertFalse(query2.any(execute=True, exact=False)) 

2594 self.assertFalse(query2.any(execute=True, exact=True)) 

2595 self.assertEqual(query2.count(exact=False), 0) 

2596 self.assertEqual(query2.count(exact=True), 0) 

2597 self.assertTrue(list(query2.explain_no_results())) 

2598 # These queries yield no results due to various problems that can be 

2599 # spotted prior to execution, yielding helpful diagnostics. 

2600 base_query = registry.queryDataIds(["detector", "physical_filter"]) 

2601 queries_and_snippets = [ 

2602 ( 

2603 # Dataset type name doesn't match any existing dataset types. 

2604 registry.queryDatasets("nonexistent", collections=...), 

2605 ["nonexistent"], 

2606 ), 

2607 ( 

2608 # Dataset type object isn't registered. 

2609 registry.queryDatasets( 

2610 DatasetType( 

2611 "nonexistent", 

2612 dimensions=["instrument"], 

2613 universe=registry.dimensions, 

2614 storageClass="Image", 

2615 ), 

2616 collections=..., 

2617 ), 

2618 ["nonexistent"], 

2619 ), 

2620 ( 

2621 # No datasets of this type in this collection. 

2622 registry.queryDatasets("flat", collections=["biases"]), 

2623 ["flat", "biases"], 

2624 ), 

2625 ( 

2626 # No datasets of this type in this collection. 

2627 base_query.findDatasets("flat", collections=["biases"]), 

2628 ["flat", "biases"], 

2629 ), 

2630 ( 

2631 # No collections matching at all. 

2632 registry.queryDatasets("flat", collections=re.compile("potato.+")), 

2633 ["potato"], 

2634 ), 

2635 ] 

2636 # The behavior of these additional queries is slated to change in the 

2637 # future, so we also check for deprecation warnings. 

2638 with self.assertWarns(FutureWarning): 

2639 queries_and_snippets.append( 

2640 ( 

2641 # Dataset type name doesn't match any existing dataset 

2642 # types. 

2643 registry.queryDataIds(["detector"], datasets=["nonexistent"], collections=...), 

2644 ["nonexistent"], 

2645 ) 

2646 ) 

2647 with self.assertWarns(FutureWarning): 

2648 queries_and_snippets.append( 

2649 ( 

2650 # Dataset type name doesn't match any existing dataset 

2651 # types. 

2652 registry.queryDimensionRecords("detector", datasets=["nonexistent"], collections=...), 

2653 ["nonexistent"], 

2654 ) 

2655 ) 

2656 for query, snippets in queries_and_snippets: 

2657 self.assertFalse(query.any(execute=False, exact=False)) 

2658 self.assertFalse(query.any(execute=True, exact=False)) 

2659 self.assertFalse(query.any(execute=True, exact=True)) 

2660 self.assertEqual(query.count(exact=False), 0) 

2661 self.assertEqual(query.count(exact=True), 0) 

2662 messages = list(query.explain_no_results()) 

2663 self.assertTrue(messages) 

2664 # Want all expected snippets to appear in at least one message. 

2665 self.assertTrue( 

2666 any( 

2667 all(snippet in message for snippet in snippets) for message in query.explain_no_results() 

2668 ), 

2669 messages, 

2670 ) 

2671 

2672 # This query does yield results, but should also emit a warning because 

2673 # dataset type patterns to queryDataIds is deprecated; just look for 

2674 # the warning. 

2675 with self.assertWarns(FutureWarning): 

2676 registry.queryDataIds(["detector"], datasets=re.compile("^nonexistent$"), collections=...) 

2677 

2678 # These queries yield no results due to problems that can be identified 

2679 # by cheap follow-up queries, yielding helpful diagnostics. 

2680 for query, snippets in [ 

2681 ( 

2682 # No records for one of the involved dimensions. 

2683 registry.queryDataIds(["subfilter"]), 

2684 ["no rows", "subfilter"], 

2685 ), 

2686 ( 

2687 # No records for one of the involved dimensions. 

2688 registry.queryDimensionRecords("subfilter"), 

2689 ["no rows", "subfilter"], 

2690 ), 

2691 ]: 

2692 self.assertFalse(query.any(execute=True, exact=False)) 

2693 self.assertFalse(query.any(execute=True, exact=True)) 

2694 self.assertEqual(query.count(exact=True), 0) 

2695 messages = list(query.explain_no_results()) 

2696 self.assertTrue(messages) 

2697 # Want all expected snippets to appear in at least one message. 

2698 self.assertTrue( 

2699 any( 

2700 all(snippet in message for snippet in snippets) for message in query.explain_no_results() 

2701 ), 

2702 messages, 

2703 ) 

2704 

2705 # This query yields four overlaps in the database, but one is filtered 

2706 # out in postprocessing. The count queries aren't accurate because 

2707 # they don't account for duplication that happens due to an internal 

2708 # join against commonSkyPix. 

2709 query3 = registry.queryDataIds(["visit", "tract"], instrument="Cam1", skymap="SkyMap1") 

2710 self.assertEqual( 

2711 { 

2712 DataCoordinate.standardize( 

2713 instrument="Cam1", 

2714 skymap="SkyMap1", 

2715 visit=v, 

2716 tract=t, 

2717 universe=registry.dimensions, 

2718 ) 

2719 for v, t in [(1, 0), (2, 0), (2, 1)] 

2720 }, 

2721 set(query3), 

2722 ) 

2723 self.assertTrue(query3.any(execute=False, exact=False)) 

2724 self.assertTrue(query3.any(execute=True, exact=False)) 

2725 self.assertTrue(query3.any(execute=True, exact=True)) 

2726 self.assertGreaterEqual(query3.count(exact=False), 4) 

2727 self.assertGreaterEqual(query3.count(exact=True, discard=True), 3) 

2728 self.assertFalse(list(query3.explain_no_results())) 

2729 # This query yields overlaps in the database, but all are filtered 

2730 # out in postprocessing. The count queries again aren't very useful. 

2731 # We have to use `where=` here to avoid an optimization that 

2732 # (currently) skips the spatial postprocess-filtering because it 

2733 # recognizes that no spatial join is necessary. That's not ideal, but 

2734 # fixing it is out of scope for this ticket. 

2735 query4 = registry.queryDataIds( 

2736 ["visit", "tract"], 

2737 instrument="Cam1", 

2738 skymap="SkyMap1", 

2739 where="visit=1 AND detector=1 AND tract=0 AND patch=4", 

2740 ) 

2741 self.assertFalse(set(query4)) 

2742 self.assertTrue(query4.any(execute=False, exact=False)) 

2743 self.assertTrue(query4.any(execute=True, exact=False)) 

2744 self.assertFalse(query4.any(execute=True, exact=True)) 

2745 self.assertGreaterEqual(query4.count(exact=False), 1) 

2746 self.assertEqual(query4.count(exact=True, discard=True), 0) 

2747 messages = query4.explain_no_results() 

2748 self.assertTrue(messages) 

2749 self.assertTrue(any("overlap" in message for message in messages)) 

2750 # This query should yield results from one dataset type but not the 

2751 # other, which is not registered. 

2752 query5 = registry.queryDatasets(["bias", "nonexistent"], collections=["biases"]) 

2753 self.assertTrue(set(query5)) 

2754 self.assertTrue(query5.any(execute=False, exact=False)) 

2755 self.assertTrue(query5.any(execute=True, exact=False)) 

2756 self.assertTrue(query5.any(execute=True, exact=True)) 

2757 self.assertGreaterEqual(query5.count(exact=False), 1) 

2758 self.assertGreaterEqual(query5.count(exact=True), 1) 

2759 self.assertFalse(list(query5.explain_no_results())) 

2760 # This query applies a selection that yields no results, fully in the 

2761 # database. Explaining why it fails involves traversing the relation 

2762 # tree and running a LIMIT 1 query at each level that has the potential 

2763 # to remove rows. 

2764 query6 = registry.queryDimensionRecords( 

2765 "detector", where="detector.purpose = 'no-purpose'", instrument="Cam1" 

2766 ) 

2767 self.assertEqual(query6.count(exact=True), 0) 

2768 messages = query6.explain_no_results() 

2769 self.assertTrue(messages) 

2770 self.assertTrue(any("no-purpose" in message for message in messages)) 

2771 

2772 def testQueryDataIdsExpressionError(self): 

2773 """Test error checking of 'where' expressions in queryDataIds.""" 

2774 registry = self.makeRegistry() 

2775 self.loadData(registry, "base.yaml") 

2776 bind = {"time": astropy.time.Time("2020-01-01T01:00:00", format="isot", scale="tai")} 

2777 with self.assertRaisesRegex(LookupError, r"No dimension element with name 'foo' in 'foo\.bar'\."): 

2778 registry.queryDataIds(["detector"], where="foo.bar = 12") 

2779 with self.assertRaisesRegex( 

2780 LookupError, "Dimension element name cannot be inferred in this context." 

2781 ): 

2782 registry.queryDataIds(["detector"], where="timespan.end < time", bind=bind) 

2783 

2784 def testQueryDataIdsOrderBy(self): 

2785 """Test order_by and limit on result returned by queryDataIds().""" 

2786 registry = self.makeRegistry() 

2787 self.loadData(registry, "base.yaml") 

2788 self.loadData(registry, "datasets.yaml") 

2789 self.loadData(registry, "spatial.yaml") 

2790 

2791 def do_query(dimensions=("visit", "tract"), datasets=None, collections=None): 

2792 return registry.queryDataIds( 

2793 dimensions, datasets=datasets, collections=collections, instrument="Cam1", skymap="SkyMap1" 

2794 ) 

2795 

2796 Test = namedtuple( 

2797 "testQueryDataIdsOrderByTest", 

2798 ("order_by", "keys", "result", "limit", "datasets", "collections"), 

2799 defaults=(None, None, None), 

2800 ) 

2801 

2802 test_data = ( 

2803 Test("tract,visit", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))), 

2804 Test("-tract,visit", "tract,visit", ((1, 2), (1, 2), (0, 1), (0, 1), (0, 2), (0, 2))), 

2805 Test("tract,-visit", "tract,visit", ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2))), 

2806 Test("-tract,-visit", "tract,visit", ((1, 2), (1, 2), (0, 2), (0, 2), (0, 1), (0, 1))), 

2807 Test( 

2808 "tract.id,visit.id", 

2809 "tract,visit", 

2810 ((0, 1), (0, 1), (0, 2)), 

2811 limit=(3,), 

2812 ), 

2813 Test("-tract,-visit", "tract,visit", ((1, 2), (1, 2), (0, 2)), limit=(3,)), 

2814 Test("tract,visit", "tract,visit", ((0, 2), (1, 2), (1, 2)), limit=(3, 3)), 

2815 Test("-tract,-visit", "tract,visit", ((0, 1),), limit=(3, 5)), 

2816 Test( 

2817 "tract,visit.exposure_time", "tract,visit", ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2)) 

2818 ), 

2819 Test( 

2820 "-tract,-visit.exposure_time", "tract,visit", ((1, 2), (1, 2), (0, 1), (0, 1), (0, 2), (0, 2)) 

2821 ), 

2822 Test("tract,-exposure_time", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))), 

2823 Test("tract,visit.name", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))), 

2824 Test( 

2825 "tract,-timespan.begin,timespan.end", 

2826 "tract,visit", 

2827 ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2)), 

2828 ), 

2829 Test("visit.day_obs,exposure.day_obs", "visit,exposure", ()), 

2830 Test("visit.timespan.begin,-exposure.timespan.begin", "visit,exposure", ()), 

2831 Test( 

2832 "tract,detector", 

2833 "tract,detector", 

2834 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)), 

2835 datasets="flat", 

2836 collections="imported_r", 

2837 ), 

2838 Test( 

2839 "tract,detector.full_name", 

2840 "tract,detector", 

2841 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)), 

2842 datasets="flat", 

2843 collections="imported_r", 

2844 ), 

2845 Test( 

2846 "tract,detector.raft,detector.name_in_raft", 

2847 "tract,detector", 

2848 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)), 

2849 datasets="flat", 

2850 collections="imported_r", 

2851 ), 

2852 ) 

2853 

2854 for test in test_data: 

2855 order_by = test.order_by.split(",") 

2856 keys = test.keys.split(",") 

2857 query = do_query(keys, test.datasets, test.collections).order_by(*order_by) 

2858 if test.limit is not None: 

2859 query = query.limit(*test.limit) 

2860 dataIds = tuple(tuple(dataId[k] for k in keys) for dataId in query) 

2861 self.assertEqual(dataIds, test.result) 

2862 

2863 # and materialize 

2864 query = do_query(keys).order_by(*order_by) 

2865 if test.limit is not None: 

2866 query = query.limit(*test.limit) 

2867 with self.assertRaises(RelationalAlgebraError): 

2868 with query.materialize(): 

2869 pass 

2870 

2871 # errors in a name 

2872 for order_by in ("", "-"): 

2873 with self.assertRaisesRegex(ValueError, "Empty dimension name in ORDER BY"): 

2874 list(do_query().order_by(order_by)) 

2875 

2876 for order_by in ("undimension.name", "-undimension.name"): 

2877 with self.assertRaisesRegex(ValueError, "Unknown dimension element 'undimension'"): 

2878 list(do_query().order_by(order_by)) 

2879 

2880 for order_by in ("attract", "-attract"): 

2881 with self.assertRaisesRegex(ValueError, "Metadata 'attract' cannot be found in any dimension"): 

2882 list(do_query().order_by(order_by)) 

2883 

2884 with self.assertRaisesRegex(ValueError, "Metadata 'exposure_time' exists in more than one dimension"): 

2885 list(do_query(("exposure", "visit")).order_by("exposure_time")) 

2886 

2887 with self.assertRaisesRegex( 

2888 ValueError, 

2889 r"Timespan exists in more than one dimension element \(exposure, visit\); " 

2890 r"qualify timespan with specific dimension name\.", 

2891 ): 

2892 list(do_query(("exposure", "visit")).order_by("timespan.begin")) 

2893 

2894 with self.assertRaisesRegex( 

2895 ValueError, "Cannot find any temporal dimension element for 'timespan.begin'" 

2896 ): 

2897 list(do_query("tract").order_by("timespan.begin")) 

2898 

2899 with self.assertRaisesRegex(ValueError, "Cannot use 'timespan.begin' with non-temporal element"): 

2900 list(do_query("tract").order_by("tract.timespan.begin")) 

2901 

2902 with self.assertRaisesRegex(ValueError, "Field 'name' does not exist in 'tract'."): 

2903 list(do_query("tract").order_by("tract.name")) 

2904 

2905 with self.assertRaisesRegex( 

2906 ValueError, r"Unknown dimension element 'timestamp'; perhaps you meant 'timespan.begin'\?" 

2907 ): 

2908 list(do_query("visit").order_by("timestamp.begin")) 

2909 

2910 def testQueryDataIdsGovernorExceptions(self): 

2911 """Test exceptions raised by queryDataIds() for incorrect governors.""" 

2912 registry = self.makeRegistry() 

2913 self.loadData(registry, "base.yaml") 

2914 self.loadData(registry, "datasets.yaml") 

2915 self.loadData(registry, "spatial.yaml") 

2916 

2917 def do_query(dimensions, dataId=None, where="", bind=None, **kwargs): 

2918 return registry.queryDataIds(dimensions, dataId=dataId, where=where, bind=bind, **kwargs) 

2919 

2920 Test = namedtuple( 

2921 "testQueryDataIdExceptionsTest", 

2922 ("dimensions", "dataId", "where", "bind", "kwargs", "exception", "count"), 

2923 defaults=(None, None, None, {}, None, 0), 

2924 ) 

2925 

2926 test_data = ( 

2927 Test("tract,visit", count=6), 

2928 Test("tract,visit", kwargs={"instrument": "Cam1", "skymap": "SkyMap1"}, count=6), 

2929 Test( 

2930 "tract,visit", kwargs={"instrument": "Cam2", "skymap": "SkyMap1"}, exception=DataIdValueError 

2931 ), 

2932 Test("tract,visit", dataId={"instrument": "Cam1", "skymap": "SkyMap1"}, count=6), 

2933 Test( 

2934 "tract,visit", dataId={"instrument": "Cam1", "skymap": "SkyMap2"}, exception=DataIdValueError 

2935 ), 

2936 Test("tract,visit", where="instrument='Cam1' AND skymap='SkyMap1'", count=6), 

2937 Test("tract,visit", where="instrument='Cam1' AND skymap='SkyMap5'", exception=DataIdValueError), 

2938 Test( 

2939 "tract,visit", 

2940 where="instrument=cam AND skymap=map", 

2941 bind={"cam": "Cam1", "map": "SkyMap1"}, 

2942 count=6, 

2943 ), 

2944 Test( 

2945 "tract,visit", 

2946 where="instrument=cam AND skymap=map", 

2947 bind={"cam": "Cam", "map": "SkyMap"}, 

2948 exception=DataIdValueError, 

2949 ), 

2950 ) 

2951 

2952 for test in test_data: 

2953 dimensions = test.dimensions.split(",") 

2954 if test.exception: 

2955 with self.assertRaises(test.exception): 

2956 do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs).count() 

2957 else: 

2958 query = do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs) 

2959 self.assertEqual(query.count(discard=True), test.count) 

2960 

2961 # and materialize 

2962 if test.exception: 

2963 with self.assertRaises(test.exception): 

2964 query = do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs) 

2965 with query.materialize() as materialized: 

2966 materialized.count(discard=True) 

2967 else: 

2968 query = do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs) 

2969 with query.materialize() as materialized: 

2970 self.assertEqual(materialized.count(discard=True), test.count) 

2971 

2972 def testQueryDimensionRecordsOrderBy(self): 

2973 """Test order_by and limit on result returned by 

2974 queryDimensionRecords(). 

2975 """ 

2976 registry = self.makeRegistry() 

2977 self.loadData(registry, "base.yaml") 

2978 self.loadData(registry, "datasets.yaml") 

2979 self.loadData(registry, "spatial.yaml") 

2980 

2981 def do_query(element, datasets=None, collections=None): 

2982 return registry.queryDimensionRecords( 

2983 element, instrument="Cam1", datasets=datasets, collections=collections 

2984 ) 

2985 

2986 query = do_query("detector") 

2987 self.assertEqual(len(list(query)), 4) 

2988 

2989 Test = namedtuple( 

2990 "testQueryDataIdsOrderByTest", 

2991 ("element", "order_by", "result", "limit", "datasets", "collections"), 

2992 defaults=(None, None, None), 

2993 ) 

2994 

2995 test_data = ( 

2996 Test("detector", "detector", (1, 2, 3, 4)), 

2997 Test("detector", "-detector", (4, 3, 2, 1)), 

2998 Test("detector", "raft,-name_in_raft", (2, 1, 4, 3)), 

2999 Test("detector", "-detector.purpose", (4,), limit=(1,)), 

3000 Test("detector", "-purpose,detector.raft,name_in_raft", (2, 3), limit=(2, 2)), 

3001 Test("visit", "visit", (1, 2)), 

3002 Test("visit", "-visit.id", (2, 1)), 

3003 Test("visit", "zenith_angle", (1, 2)), 

3004 Test("visit", "-visit.name", (2, 1)), 

3005 Test("visit", "day_obs,-timespan.begin", (2, 1)), 

3006 ) 

3007 

3008 for test in test_data: 

3009 order_by = test.order_by.split(",") 

3010 query = do_query(test.element).order_by(*order_by) 

3011 if test.limit is not None: 

3012 query = query.limit(*test.limit) 

3013 dataIds = tuple(rec.id for rec in query) 

3014 self.assertEqual(dataIds, test.result) 

3015 

3016 # errors in a name 

3017 for order_by in ("", "-"): 

3018 with self.assertRaisesRegex(ValueError, "Empty dimension name in ORDER BY"): 

3019 list(do_query("detector").order_by(order_by)) 

3020 

3021 for order_by in ("undimension.name", "-undimension.name"): 

3022 with self.assertRaisesRegex(ValueError, "Element name mismatch: 'undimension'"): 

3023 list(do_query("detector").order_by(order_by)) 

3024 

3025 for order_by in ("attract", "-attract"): 

3026 with self.assertRaisesRegex(ValueError, "Field 'attract' does not exist in 'detector'."): 

3027 list(do_query("detector").order_by(order_by)) 

3028 

3029 for order_by in ("timestamp.begin", "-timestamp.begin"): 

3030 with self.assertRaisesRegex( 

3031 ValueError, 

3032 r"Element name mismatch: 'timestamp' instead of 'visit'; " 

3033 r"perhaps you meant 'timespan.begin'\?", 

3034 ): 

3035 list(do_query("visit").order_by(order_by)) 

3036 

3037 def testQueryDimensionRecordsExceptions(self): 

3038 """Test exceptions raised by queryDimensionRecords().""" 

3039 registry = self.makeRegistry() 

3040 self.loadData(registry, "base.yaml") 

3041 self.loadData(registry, "datasets.yaml") 

3042 self.loadData(registry, "spatial.yaml") 

3043 

3044 result = registry.queryDimensionRecords("detector") 

3045 self.assertEqual(result.count(), 4) 

3046 result = registry.queryDimensionRecords("detector", instrument="Cam1") 

3047 self.assertEqual(result.count(), 4) 

3048 result = registry.queryDimensionRecords("detector", dataId={"instrument": "Cam1"}) 

3049 self.assertEqual(result.count(), 4) 

3050 result = registry.queryDimensionRecords("detector", where="instrument='Cam1'") 

3051 self.assertEqual(result.count(), 4) 

3052 result = registry.queryDimensionRecords("detector", where="instrument=instr", bind={"instr": "Cam1"}) 

3053 self.assertEqual(result.count(), 4) 

3054 

3055 with self.assertRaisesRegex(DataIdValueError, "dimension instrument"): 

3056 result = registry.queryDimensionRecords("detector", instrument="NotCam1") 

3057 result.count() 

3058 

3059 with self.assertRaisesRegex(DataIdValueError, "dimension instrument"): 

3060 result = registry.queryDimensionRecords("detector", dataId={"instrument": "NotCam1"}) 

3061 result.count() 

3062 

3063 with self.assertRaisesRegex(DataIdValueError, "Unknown values specified for governor dimension"): 

3064 result = registry.queryDimensionRecords("detector", where="instrument='NotCam1'") 

3065 result.count() 

3066 

3067 with self.assertRaisesRegex(DataIdValueError, "Unknown values specified for governor dimension"): 

3068 result = registry.queryDimensionRecords( 

3069 "detector", where="instrument=instr", bind={"instr": "NotCam1"} 

3070 ) 

3071 result.count() 

3072 

3073 def testDatasetConstrainedDimensionRecordQueries(self): 

3074 """Test that queryDimensionRecords works even when given a dataset 

3075 constraint whose dimensions extend beyond the requested dimension 

3076 element's. 

3077 """ 

3078 registry = self.makeRegistry() 

3079 self.loadData(registry, "base.yaml") 

3080 self.loadData(registry, "datasets.yaml") 

3081 # Query for physical_filter dimension records, using a dataset that 

3082 # has both physical_filter and dataset dimensions. 

3083 records = registry.queryDimensionRecords( 

3084 "physical_filter", 

3085 datasets=["flat"], 

3086 collections="imported_r", 

3087 ) 

3088 self.assertEqual({record.name for record in records}, {"Cam1-R1", "Cam1-R2"}) 

3089 # Trying to constrain by all dataset types is an error. 

3090 with self.assertRaises(TypeError): 

3091 list(registry.queryDimensionRecords("physical_filter", datasets=..., collections="imported_r")) 

3092 

3093 def testSkyPixDatasetQueries(self): 

3094 """Test that we can build queries involving skypix dimensions as long 

3095 as a dataset type that uses those dimensions is included. 

3096 """ 

3097 registry = self.makeRegistry() 

3098 self.loadData(registry, "base.yaml") 

3099 dataset_type = DatasetType( 

3100 "a", dimensions=["htm7", "instrument"], universe=registry.dimensions, storageClass="int" 

3101 ) 

3102 registry.registerDatasetType(dataset_type) 

3103 run = "r" 

3104 registry.registerRun(run) 

3105 # First try queries where there are no datasets; the concern is whether 

3106 # we can even build and execute these queries without raising, even 

3107 # when "doomed" query shortcuts are in play. 

3108 self.assertFalse( 

3109 list(registry.queryDataIds(["htm7", "instrument"], datasets=dataset_type, collections=run)) 

3110 ) 

3111 self.assertFalse(list(registry.queryDatasets(dataset_type, collections=run))) 

3112 # Now add a dataset and see that we can get it back. 

3113 htm7 = registry.dimensions.skypix["htm"][7].pixelization 

3114 data_id = registry.expandDataId(instrument="Cam1", htm7=htm7.universe()[0][0]) 

3115 (ref,) = registry.insertDatasets(dataset_type, [data_id], run=run) 

3116 self.assertEqual( 

3117 set(registry.queryDataIds(["htm7", "instrument"], datasets=dataset_type, collections=run)), 

3118 {data_id}, 

3119 ) 

3120 self.assertEqual(set(registry.queryDatasets(dataset_type, collections=run)), {ref}) 

3121 

3122 def testDatasetIdFactory(self): 

3123 """Simple test for DatasetIdFactory, mostly to catch potential changes 

3124 in its API. 

3125 """ 

3126 registry = self.makeRegistry() 

3127 factory = DatasetIdFactory() 

3128 dataset_type = DatasetType( 

3129 "datasetType", 

3130 dimensions=["detector", "instrument"], 

3131 universe=registry.dimensions, 

3132 storageClass="int", 

3133 ) 

3134 run = "run" 

3135 data_id = DataCoordinate.standardize(instrument="Cam1", detector=1, graph=dataset_type.dimensions) 

3136 

3137 datasetId = factory.makeDatasetId(run, dataset_type, data_id, DatasetIdGenEnum.UNIQUE) 

3138 self.assertIsInstance(datasetId, uuid.UUID) 

3139 self.assertEqual(datasetId.version, 4) 

3140 

3141 datasetId = factory.makeDatasetId(run, dataset_type, data_id, DatasetIdGenEnum.DATAID_TYPE) 

3142 self.assertIsInstance(datasetId, uuid.UUID) 

3143 self.assertEqual(datasetId.version, 5) 

3144 

3145 datasetId = factory.makeDatasetId(run, dataset_type, data_id, DatasetIdGenEnum.DATAID_TYPE_RUN) 

3146 self.assertIsInstance(datasetId, uuid.UUID) 

3147 self.assertEqual(datasetId.version, 5) 

3148 

3149 def testExposureQueries(self): 

3150 """Test query methods using arguments sourced from the exposure log 

3151 service. 

3152 

3153 The most complete test dataset currently available to daf_butler tests 

3154 is hsc-rc2-subset.yaml export (which is unfortunately distinct from the 

3155 the lsst/rc2_subset GitHub repo), but that does not have 'exposure' 

3156 dimension records as it was focused on providing nontrivial spatial 

3157 overlaps between visit+detector and tract+patch. So in this test we 

3158 need to translate queries that originally used the exposure dimension 

3159 to use the (very similar) visit dimension instead. 

3160 """ 

3161 registry = self.makeRegistry() 

3162 self.loadData(registry, "hsc-rc2-subset.yaml") 

3163 self.assertEqual( 

3164 [ 

3165 record.id 

3166 for record in registry.queryDimensionRecords("visit", instrument="HSC") 

3167 .order_by("id") 

3168 .limit(5) 

3169 ], 

3170 [318, 322, 326, 330, 332], 

3171 ) 

3172 self.assertEqual( 

3173 [ 

3174 data_id["visit"] 

3175 for data_id in registry.queryDataIds(["visit"], instrument="HSC").order_by("id").limit(5) 

3176 ], 

3177 [318, 322, 326, 330, 332], 

3178 ) 

3179 self.assertEqual( 

3180 [ 

3181 record.id 

3182 for record in registry.queryDimensionRecords("detector", instrument="HSC") 

3183 .order_by("full_name") 

3184 .limit(5) 

3185 ], 

3186 [73, 72, 71, 70, 65], 

3187 ) 

3188 self.assertEqual( 

3189 [ 

3190 data_id["detector"] 

3191 for data_id in registry.queryDataIds(["detector"], instrument="HSC") 

3192 .order_by("full_name") 

3193 .limit(5) 

3194 ], 

3195 [73, 72, 71, 70, 65], 

3196 ) 

3197 

3198 def test_long_query_names(self) -> None: 

3199 """Test that queries involving very long names are handled correctly. 

3200 

3201 This is especially important for PostgreSQL, which truncates symbols 

3202 longer than 64 chars, but it's worth testing for all DBs. 

3203 """ 

3204 registry = self.makeRegistry() 

3205 name = "abcd" * 17 

3206 registry.registerDatasetType( 

3207 DatasetType( 

3208 name, 

3209 dimensions=(), 

3210 storageClass="Exposure", 

3211 universe=registry.dimensions, 

3212 ) 

3213 ) 

3214 # Need to search more than one collection actually containing a 

3215 # matching dataset to avoid optimizations that sidestep bugs due to 

3216 # truncation by making findFirst=True a no-op. 

3217 run1 = "run1" 

3218 registry.registerRun(run1) 

3219 run2 = "run2" 

3220 registry.registerRun(run2) 

3221 (ref1,) = registry.insertDatasets(name, [DataCoordinate.makeEmpty(registry.dimensions)], run1) 

3222 registry.insertDatasets(name, [DataCoordinate.makeEmpty(registry.dimensions)], run2) 

3223 self.assertEqual( 

3224 set(registry.queryDatasets(name, collections=[run1, run2], findFirst=True)), 

3225 {ref1}, 

3226 ) 

3227 

3228 def test_skypix_constraint_queries(self) -> None: 

3229 """Test queries spatially constrained by a skypix data ID.""" 

3230 registry = self.makeRegistry() 

3231 self.loadData(registry, "hsc-rc2-subset.yaml") 

3232 patch_regions = { 

3233 (data_id["tract"], data_id["patch"]): data_id.region 

3234 for data_id in registry.queryDataIds(["patch"]).expanded() 

3235 } 

3236 skypix_dimension: SkyPixDimension = registry.dimensions["htm11"] 

3237 # This check ensures the test doesn't become trivial due to a config 

3238 # change; if it does, just pick a different HTML level. 

3239 self.assertNotEqual(skypix_dimension, registry.dimensions.commonSkyPix) 

3240 # Gather all skypix IDs that definitely overlap at least one of these 

3241 # patches. 

3242 relevant_skypix_ids = lsst.sphgeom.RangeSet() 

3243 for patch_region in patch_regions.values(): 

3244 relevant_skypix_ids |= skypix_dimension.pixelization.interior(patch_region) 

3245 # Look for a "nontrivial" skypix_id that overlaps at least one patch 

3246 # and does not overlap at least one other patch. 

3247 for skypix_id in itertools.chain.from_iterable( 

3248 range(begin, end) for begin, end in relevant_skypix_ids 

3249 ): 

3250 skypix_region = skypix_dimension.pixelization.pixel(skypix_id) 

3251 overlapping_patches = { 

3252 patch_key 

3253 for patch_key, patch_region in patch_regions.items() 

3254 if not patch_region.isDisjointFrom(skypix_region) 

3255 } 

3256 if overlapping_patches and overlapping_patches != patch_regions.keys(): 

3257 break 

3258 else: 

3259 raise RuntimeError("Could not find usable skypix ID for this dimension configuration.") 

3260 self.assertEqual( 

3261 { 

3262 (data_id["tract"], data_id["patch"]) 

3263 for data_id in registry.queryDataIds( 

3264 ["patch"], 

3265 dataId={skypix_dimension.name: skypix_id}, 

3266 ) 

3267 }, 

3268 overlapping_patches, 

3269 ) 

3270 # Test that a three-way join that includes the common skypix system in 

3271 # the dimensions doesn't generate redundant join terms in the query. 

3272 full_data_ids = set( 

3273 registry.queryDataIds( 

3274 ["tract", "visit", "htm7"], skymap="hsc_rings_v1", instrument="HSC" 

3275 ).expanded() 

3276 ) 

3277 self.assertGreater(len(full_data_ids), 0) 

3278 for data_id in full_data_ids: 

3279 self.assertFalse(data_id.records["tract"].region.isDisjointFrom(data_id.records["htm7"].region)) 

3280 self.assertFalse(data_id.records["visit"].region.isDisjointFrom(data_id.records["htm7"].region)) 

3281 

3282 def test_spatial_constraint_queries(self) -> None: 

3283 """Test queries in which one spatial dimension in the constraint (data 

3284 ID or ``where`` string) constrains a different spatial dimension in the 

3285 query result columns. 

3286 """ 

3287 registry = self.makeRegistry() 

3288 self.loadData(registry, "hsc-rc2-subset.yaml") 

3289 patch_regions = { 

3290 (data_id["tract"], data_id["patch"]): data_id.region 

3291 for data_id in registry.queryDataIds(["patch"]).expanded() 

3292 } 

3293 observation_regions = { 

3294 (data_id["visit"], data_id["detector"]): data_id.region 

3295 for data_id in registry.queryDataIds(["visit", "detector"]).expanded() 

3296 } 

3297 all_combos = { 

3298 (patch_key, observation_key) 

3299 for patch_key, observation_key in itertools.product(patch_regions, observation_regions) 

3300 } 

3301 overlapping_combos = { 

3302 (patch_key, observation_key) 

3303 for patch_key, observation_key in all_combos 

3304 if not patch_regions[patch_key].isDisjointFrom(observation_regions[observation_key]) 

3305 } 

3306 # Check a direct spatial join with no constraint first. 

3307 self.assertEqual( 

3308 { 

3309 ((data_id["tract"], data_id["patch"]), (data_id["visit"], data_id["detector"])) 

3310 for data_id in registry.queryDataIds(["patch", "visit", "detector"]) 

3311 }, 

3312 overlapping_combos, 

3313 ) 

3314 overlaps_by_patch: defaultdict[tuple[int, int], set[tuple[str, str]]] = defaultdict(set) 

3315 overlaps_by_observation: defaultdict[tuple[int, int], set[tuple[str, str]]] = defaultdict(set) 

3316 for patch_key, observation_key in overlapping_combos: 

3317 overlaps_by_patch[patch_key].add(observation_key) 

3318 overlaps_by_observation[observation_key].add(patch_key) 

3319 # Find patches and observations that overlap at least one of the other 

3320 # but not all of the other. 

3321 nontrivial_patch = next( 

3322 iter( 

3323 patch_key 

3324 for patch_key, observation_keys in overlaps_by_patch.items() 

3325 if observation_keys and observation_keys != observation_regions.keys() 

3326 ) 

3327 ) 

3328 nontrivial_observation = next( 

3329 iter( 

3330 observation_key 

3331 for observation_key, patch_keys in overlaps_by_observation.items() 

3332 if patch_keys and patch_keys != patch_regions.keys() 

3333 ) 

3334 ) 

3335 # Use the nontrivial patches and observations as constraints on the 

3336 # other dimensions in various ways, first via a 'where' expression. 

3337 # It's better in general to us 'bind' instead of f-strings, but these 

3338 # all integers so there are no quoting concerns. 

3339 self.assertEqual( 

3340 { 

3341 (data_id["visit"], data_id["detector"]) 

3342 for data_id in registry.queryDataIds( 

3343 ["visit", "detector"], 

3344 where=f"tract={nontrivial_patch[0]} AND patch={nontrivial_patch[1]}", 

3345 skymap="hsc_rings_v1", 

3346 ) 

3347 }, 

3348 overlaps_by_patch[nontrivial_patch], 

3349 ) 

3350 self.assertEqual( 

3351 { 

3352 (data_id["tract"], data_id["patch"]) 

3353 for data_id in registry.queryDataIds( 

3354 ["patch"], 

3355 where=f"visit={nontrivial_observation[0]} AND detector={nontrivial_observation[1]}", 

3356 instrument="HSC", 

3357 ) 

3358 }, 

3359 overlaps_by_observation[nontrivial_observation], 

3360 ) 

3361 # and then via the dataId argument. 

3362 self.assertEqual( 

3363 { 

3364 (data_id["visit"], data_id["detector"]) 

3365 for data_id in registry.queryDataIds( 

3366 ["visit", "detector"], 

3367 dataId={ 

3368 "tract": nontrivial_patch[0], 

3369 "patch": nontrivial_patch[1], 

3370 }, 

3371 skymap="hsc_rings_v1", 

3372 ) 

3373 }, 

3374 overlaps_by_patch[nontrivial_patch], 

3375 ) 

3376 self.assertEqual( 

3377 { 

3378 (data_id["tract"], data_id["patch"]) 

3379 for data_id in registry.queryDataIds( 

3380 ["patch"], 

3381 dataId={ 

3382 "visit": nontrivial_observation[0], 

3383 "detector": nontrivial_observation[1], 

3384 }, 

3385 instrument="HSC", 

3386 ) 

3387 }, 

3388 overlaps_by_observation[nontrivial_observation], 

3389 ) 

3390 

3391 def test_query_projection_drop_postprocessing(self) -> None: 

3392 """Test that projections and deduplications on query objects can 

3393 drop post-query region filtering to ensure the query remains in 

3394 the SQL engine. 

3395 """ 

3396 registry = self.makeRegistry() 

3397 self.loadData(registry, "base.yaml") 

3398 self.loadData(registry, "spatial.yaml") 

3399 

3400 def pop_transfer(tree: Relation) -> Relation: 

3401 """If a relation tree terminates with a transfer to a new engine, 

3402 return the relation prior to that transfer. If not, return the 

3403 original relation. 

3404 """ 

3405 match tree: 

3406 case Transfer(target=target): 

3407 return target 

3408 case _: 

3409 return tree 

3410 

3411 # There's no public way to get a Query object yet, so we get one from a 

3412 # DataCoordinateQueryResults private attribute. When a public API is 

3413 # available this test should use it. 

3414 query = registry.queryDataIds(["visit", "detector", "tract", "patch"])._query 

3415 # We expect this query to terminate in the iteration engine originally, 

3416 # because region-filtering is necessary. 

3417 self.assertIsInstance(pop_transfer(query.relation).engine, iteration.Engine) 

3418 # If we deduplicate, we usually have to do that downstream of the 

3419 # filtering. That means the deduplication has to happen in the 

3420 # iteration engine. 

3421 self.assertIsInstance(pop_transfer(query.projected(unique=True).relation).engine, iteration.Engine) 

3422 # If we pass drop_postprocessing, we instead drop the region filtering 

3423 # so the deduplication can happen in SQL (though there might still be 

3424 # transfer to iteration at the tail of the tree that we can ignore; 

3425 # that's what the pop_transfer takes care of here). 

3426 self.assertIsInstance( 

3427 pop_transfer(query.projected(unique=True, drop_postprocessing=True).relation).engine, 

3428 sql.Engine, 

3429 ) 

3430 

3431 def test_query_find_datasets_drop_postprocessing(self) -> None: 

3432 """Test that DataCoordinateQueryResults.findDatasets avoids commutator 

3433 problems with the FindFirstDataset relation operation. 

3434 """ 

3435 # Setup: load some visit, tract, and patch records, and insert two 

3436 # datasets with dimensions {visit, patch}, with one in each of two 

3437 # RUN collections. 

3438 registry = self.makeRegistry() 

3439 self.loadData(registry, "base.yaml") 

3440 self.loadData(registry, "spatial.yaml") 

3441 storage_class = StorageClass("Warpy") 

3442 registry.storageClasses.registerStorageClass(storage_class) 

3443 dataset_type = DatasetType( 

3444 "warp", {"visit", "patch"}, storageClass=storage_class, universe=registry.dimensions 

3445 ) 

3446 registry.registerDatasetType(dataset_type) 

3447 (data_id,) = registry.queryDataIds(["visit", "patch"]).limit(1) 

3448 registry.registerRun("run1") 

3449 registry.registerRun("run2") 

3450 (ref1,) = registry.insertDatasets(dataset_type, [data_id], run="run1") 

3451 (ref2,) = registry.insertDatasets(dataset_type, [data_id], run="run2") 

3452 # Query for the dataset using queryDataIds(...).findDatasets(...) 

3453 # against only one of the two collections. This should work even 

3454 # though the relation returned by queryDataIds ends with 

3455 # iteration-engine region-filtering, because we can recognize before 

3456 # running the query that there is only one collecton to search and 

3457 # hence the (default) findFirst=True is irrelevant, and joining in the 

3458 # dataset query commutes past the iteration-engine postprocessing. 

3459 query1 = registry.queryDataIds( 

3460 {"visit", "patch"}, visit=data_id["visit"], instrument=data_id["instrument"] 

3461 ) 

3462 self.assertEqual( 

3463 set(query1.findDatasets(dataset_type.name, collections=["run1"])), 

3464 {ref1}, 

3465 ) 

3466 # Query for the dataset using queryDataIds(...).findDatasets(...) 

3467 # against both collections. This can only work if the FindFirstDataset 

3468 # operation can be commuted past the iteration-engine options into SQL. 

3469 query2 = registry.queryDataIds( 

3470 {"visit", "patch"}, visit=data_id["visit"], instrument=data_id["instrument"] 

3471 ) 

3472 self.assertEqual( 

3473 set(query2.findDatasets(dataset_type.name, collections=["run2", "run1"])), 

3474 {ref2}, 

3475 ) 

3476 

3477 def test_query_empty_collections(self) -> None: 

3478 """Test for registry query methods with empty collections. The methods 

3479 should return empty result set (or None when applicable) and provide 

3480 "doomed" diagnostics. 

3481 """ 

3482 registry = self.makeRegistry() 

3483 self.loadData(registry, "base.yaml") 

3484 self.loadData(registry, "datasets.yaml") 

3485 

3486 # Tests for registry.findDataset() 

3487 with self.assertRaises(NoDefaultCollectionError): 

3488 registry.findDataset("bias", instrument="Cam1", detector=1) 

3489 self.assertIsNotNone(registry.findDataset("bias", instrument="Cam1", detector=1, collections=...)) 

3490 self.assertIsNone(registry.findDataset("bias", instrument="Cam1", detector=1, collections=[])) 

3491 

3492 # Tests for registry.queryDatasets() 

3493 with self.assertRaises(NoDefaultCollectionError): 

3494 registry.queryDatasets("bias") 

3495 self.assertTrue(list(registry.queryDatasets("bias", collections=...))) 

3496 

3497 result = registry.queryDatasets("bias", collections=[]) 

3498 self.assertEqual(len(list(result)), 0) 

3499 messages = list(result.explain_no_results()) 

3500 self.assertTrue(messages) 

3501 self.assertTrue(any("because collection list is empty" in message for message in messages)) 

3502 

3503 # Tests for registry.queryDataIds() 

3504 with self.assertRaises(NoDefaultCollectionError): 

3505 registry.queryDataIds("detector", datasets="bias") 

3506 self.assertTrue(list(registry.queryDataIds("detector", datasets="bias", collections=...))) 

3507 

3508 result = registry.queryDataIds("detector", datasets="bias", collections=[]) 

3509 self.assertEqual(len(list(result)), 0) 

3510 messages = list(result.explain_no_results()) 

3511 self.assertTrue(messages) 

3512 self.assertTrue(any("because collection list is empty" in message for message in messages)) 

3513 

3514 # Tests for registry.queryDimensionRecords() 

3515 with self.assertRaises(NoDefaultCollectionError): 

3516 registry.queryDimensionRecords("detector", datasets="bias") 

3517 self.assertTrue(list(registry.queryDimensionRecords("detector", datasets="bias", collections=...))) 

3518 

3519 result = registry.queryDimensionRecords("detector", datasets="bias", collections=[]) 

3520 self.assertEqual(len(list(result)), 0) 

3521 messages = list(result.explain_no_results()) 

3522 self.assertTrue(messages) 

3523 self.assertTrue(any("because collection list is empty" in message for message in messages))