Coverage for python/lsst/daf/butler/registry/tests/_registry.py: 4%

1368 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2023-01-12 02:05 -0800

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = ["RegistryTests"] 

24 

25import itertools 

26import logging 

27import os 

28import re 

29import unittest 

30import uuid 

31from abc import ABC, abstractmethod 

32from collections import defaultdict, namedtuple 

33from datetime import datetime, timedelta 

34from typing import TYPE_CHECKING, Iterator, Optional, Type, Union 

35 

36import astropy.time 

37import sqlalchemy 

38 

39try: 

40 import numpy as np 

41except ImportError: 

42 np = None 

43 

44import lsst.sphgeom 

45from lsst.daf.relation import RelationalAlgebraError 

46 

47from ...core import ( 

48 DataCoordinate, 

49 DataCoordinateSet, 

50 DatasetAssociation, 

51 DatasetRef, 

52 DatasetType, 

53 DimensionGraph, 

54 NamedValueSet, 

55 StorageClass, 

56 Timespan, 

57 ddl, 

58) 

59from .._collection_summary import CollectionSummary 

60from .._collectionType import CollectionType 

61from .._config import RegistryConfig 

62from .._exceptions import ( 

63 ArgumentError, 

64 CollectionError, 

65 CollectionTypeError, 

66 ConflictingDefinitionError, 

67 DataIdValueError, 

68 DatasetTypeError, 

69 InconsistentDataIdError, 

70 MissingCollectionError, 

71 MissingDatasetTypeError, 

72 OrphanedRecordError, 

73) 

74from ..interfaces import ButlerAttributeExistsError, DatasetIdGenEnum 

75 

76if TYPE_CHECKING: 76 ↛ 77line 76 didn't jump to line 77, because the condition on line 76 was never true

77 from .._registry import Registry 

78 

79 

80class RegistryTests(ABC): 

81 """Generic tests for the `Registry` class that can be subclassed to 

82 generate tests for different configurations. 

83 """ 

84 

85 collectionsManager: Optional[str] = None 

86 """Name of the collections manager class, if subclass provides value for 

87 this member then it overrides name specified in default configuration 

88 (`str`). 

89 """ 

90 

91 datasetsManager: Optional[str] = None 

92 """Name of the datasets manager class, if subclass provides value for 

93 this member then it overrides name specified in default configuration 

94 (`str`). 

95 """ 

96 

97 @classmethod 

98 @abstractmethod 

99 def getDataDir(cls) -> str: 

100 """Return the root directory containing test data YAML files.""" 

101 raise NotImplementedError() 

102 

103 def makeRegistryConfig(self) -> RegistryConfig: 

104 """Create RegistryConfig used to create a registry. 

105 

106 This method should be called by a subclass from `makeRegistry`. 

107 Returned instance will be pre-configured based on the values of class 

108 members, and default-configured for all other parameters. Subclasses 

109 that need default configuration should just instantiate 

110 `RegistryConfig` directly. 

111 """ 

112 config = RegistryConfig() 

113 if self.collectionsManager: 

114 config["managers", "collections"] = self.collectionsManager 

115 if self.datasetsManager: 

116 config["managers", "datasets"] = self.datasetsManager 

117 return config 

118 

119 @abstractmethod 

120 def makeRegistry(self, share_repo_with: Optional[Registry] = None) -> Optional[Registry]: 

121 """Return the Registry instance to be tested. 

122 

123 Parameters 

124 ---------- 

125 share_repo_with : `Registry`, optional 

126 If provided, the new registry should point to the same data 

127 repository as this existing registry. 

128 

129 Returns 

130 ------- 

131 registry : `Registry` 

132 New `Registry` instance, or `None` *only* if `share_repo_with` is 

133 not `None` and this test case does not support that argument 

134 (e.g. it is impossible with in-memory SQLite DBs). 

135 """ 

136 raise NotImplementedError() 

137 

138 def loadData(self, registry: Registry, filename: str): 

139 """Load registry test data from ``getDataDir/<filename>``, 

140 which should be a YAML import/export file. 

141 """ 

142 from ...transfers import YamlRepoImportBackend 

143 

144 with open(os.path.join(self.getDataDir(), filename), "r") as stream: 

145 backend = YamlRepoImportBackend(stream, registry) 

146 backend.register() 

147 backend.load(datastore=None) 

148 

149 def checkQueryResults(self, results, expected): 

150 """Check that a query results object contains expected values. 

151 

152 Parameters 

153 ---------- 

154 results : `DataCoordinateQueryResults` or `DatasetQueryResults` 

155 A lazy-evaluation query results object. 

156 expected : `list` 

157 A list of `DataCoordinate` o `DatasetRef` objects that should be 

158 equal to results of the query, aside from ordering. 

159 """ 

160 self.assertCountEqual(list(results), expected) 

161 self.assertEqual(results.count(), len(expected)) 

162 if expected: 

163 self.assertTrue(results.any()) 

164 else: 

165 self.assertFalse(results.any()) 

166 

167 def testOpaque(self): 

168 """Tests for `Registry.registerOpaqueTable`, 

169 `Registry.insertOpaqueData`, `Registry.fetchOpaqueData`, and 

170 `Registry.deleteOpaqueData`. 

171 """ 

172 registry = self.makeRegistry() 

173 table = "opaque_table_for_testing" 

174 registry.registerOpaqueTable( 

175 table, 

176 spec=ddl.TableSpec( 

177 fields=[ 

178 ddl.FieldSpec("id", dtype=sqlalchemy.BigInteger, primaryKey=True), 

179 ddl.FieldSpec("name", dtype=sqlalchemy.String, length=16, nullable=False), 

180 ddl.FieldSpec("count", dtype=sqlalchemy.SmallInteger, nullable=True), 

181 ], 

182 ), 

183 ) 

184 rows = [ 

185 {"id": 1, "name": "one", "count": None}, 

186 {"id": 2, "name": "two", "count": 5}, 

187 {"id": 3, "name": "three", "count": 6}, 

188 ] 

189 registry.insertOpaqueData(table, *rows) 

190 self.assertCountEqual(rows, list(registry.fetchOpaqueData(table))) 

191 self.assertEqual(rows[0:1], list(registry.fetchOpaqueData(table, id=1))) 

192 self.assertEqual(rows[1:2], list(registry.fetchOpaqueData(table, name="two"))) 

193 self.assertEqual(rows[0:1], list(registry.fetchOpaqueData(table, id=(1, 3), name=("one", "two")))) 

194 self.assertEqual(rows, list(registry.fetchOpaqueData(table, id=(1, 2, 3)))) 

195 # Test very long IN clause which exceeds sqlite limit on number of 

196 # parameters. SQLite says the limit is 32k but it looks like it is 

197 # much higher. 

198 self.assertEqual(rows, list(registry.fetchOpaqueData(table, id=list(range(300_000))))) 

199 # Two IN clauses, each longer than 1k batch size, first with 

200 # duplicates, second has matching elements in different batches (after 

201 # sorting). 

202 self.assertEqual( 

203 rows[0:2], 

204 list( 

205 registry.fetchOpaqueData( 

206 table, 

207 id=list(range(1000)) + list(range(100, 0, -1)), 

208 name=["one"] + [f"q{i}" for i in range(2200)] + ["two"], 

209 ) 

210 ), 

211 ) 

212 self.assertEqual([], list(registry.fetchOpaqueData(table, id=1, name="two"))) 

213 registry.deleteOpaqueData(table, id=3) 

214 self.assertCountEqual(rows[:2], list(registry.fetchOpaqueData(table))) 

215 registry.deleteOpaqueData(table) 

216 self.assertEqual([], list(registry.fetchOpaqueData(table))) 

217 

218 def testDatasetType(self): 

219 """Tests for `Registry.registerDatasetType` and 

220 `Registry.getDatasetType`. 

221 """ 

222 registry = self.makeRegistry() 

223 # Check valid insert 

224 datasetTypeName = "test" 

225 storageClass = StorageClass("testDatasetType") 

226 registry.storageClasses.registerStorageClass(storageClass) 

227 dimensions = registry.dimensions.extract(("instrument", "visit")) 

228 differentDimensions = registry.dimensions.extract(("instrument", "patch")) 

229 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

230 # Inserting for the first time should return True 

231 self.assertTrue(registry.registerDatasetType(inDatasetType)) 

232 outDatasetType1 = registry.getDatasetType(datasetTypeName) 

233 self.assertEqual(outDatasetType1, inDatasetType) 

234 

235 # Re-inserting should work 

236 self.assertFalse(registry.registerDatasetType(inDatasetType)) 

237 # Except when they are not identical 

238 with self.assertRaises(ConflictingDefinitionError): 

239 nonIdenticalDatasetType = DatasetType(datasetTypeName, differentDimensions, storageClass) 

240 registry.registerDatasetType(nonIdenticalDatasetType) 

241 

242 # Template can be None 

243 datasetTypeName = "testNoneTemplate" 

244 storageClass = StorageClass("testDatasetType2") 

245 registry.storageClasses.registerStorageClass(storageClass) 

246 dimensions = registry.dimensions.extract(("instrument", "visit")) 

247 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

248 registry.registerDatasetType(inDatasetType) 

249 outDatasetType2 = registry.getDatasetType(datasetTypeName) 

250 self.assertEqual(outDatasetType2, inDatasetType) 

251 

252 allTypes = set(registry.queryDatasetTypes()) 

253 self.assertEqual(allTypes, {outDatasetType1, outDatasetType2}) 

254 

255 def testDimensions(self): 

256 """Tests for `Registry.insertDimensionData`, 

257 `Registry.syncDimensionData`, and `Registry.expandDataId`. 

258 """ 

259 registry = self.makeRegistry() 

260 dimensionName = "instrument" 

261 dimension = registry.dimensions[dimensionName] 

262 dimensionValue = { 

263 "name": "DummyCam", 

264 "visit_max": 10, 

265 "visit_system": 0, 

266 "exposure_max": 10, 

267 "detector_max": 2, 

268 "class_name": "lsst.pipe.base.Instrument", 

269 } 

270 registry.insertDimensionData(dimensionName, dimensionValue) 

271 # Inserting the same value twice should fail 

272 with self.assertRaises(sqlalchemy.exc.IntegrityError): 

273 registry.insertDimensionData(dimensionName, dimensionValue) 

274 # expandDataId should retrieve the record we just inserted 

275 self.assertEqual( 

276 registry.expandDataId(instrument="DummyCam", graph=dimension.graph) 

277 .records[dimensionName] 

278 .toDict(), 

279 dimensionValue, 

280 ) 

281 # expandDataId should raise if there is no record with the given ID. 

282 with self.assertRaises(DataIdValueError): 

283 registry.expandDataId({"instrument": "Unknown"}, graph=dimension.graph) 

284 # band doesn't have a table; insert should fail. 

285 with self.assertRaises(TypeError): 

286 registry.insertDimensionData("band", {"band": "i"}) 

287 dimensionName2 = "physical_filter" 

288 dimension2 = registry.dimensions[dimensionName2] 

289 dimensionValue2 = {"name": "DummyCam_i", "band": "i"} 

290 # Missing required dependency ("instrument") should fail 

291 with self.assertRaises(KeyError): 

292 registry.insertDimensionData(dimensionName2, dimensionValue2) 

293 # Adding required dependency should fix the failure 

294 dimensionValue2["instrument"] = "DummyCam" 

295 registry.insertDimensionData(dimensionName2, dimensionValue2) 

296 # expandDataId should retrieve the record we just inserted. 

297 self.assertEqual( 

298 registry.expandDataId(instrument="DummyCam", physical_filter="DummyCam_i", graph=dimension2.graph) 

299 .records[dimensionName2] 

300 .toDict(), 

301 dimensionValue2, 

302 ) 

303 # Use syncDimensionData to insert a new record successfully. 

304 dimensionName3 = "detector" 

305 dimensionValue3 = { 

306 "instrument": "DummyCam", 

307 "id": 1, 

308 "full_name": "one", 

309 "name_in_raft": "zero", 

310 "purpose": "SCIENCE", 

311 } 

312 self.assertTrue(registry.syncDimensionData(dimensionName3, dimensionValue3)) 

313 # Sync that again. Note that one field ("raft") is NULL, and that 

314 # should be okay. 

315 self.assertFalse(registry.syncDimensionData(dimensionName3, dimensionValue3)) 

316 # Now try that sync with the same primary key but a different value. 

317 # This should fail. 

318 with self.assertRaises(ConflictingDefinitionError): 

319 registry.syncDimensionData( 

320 dimensionName3, 

321 { 

322 "instrument": "DummyCam", 

323 "id": 1, 

324 "full_name": "one", 

325 "name_in_raft": "four", 

326 "purpose": "SCIENCE", 

327 }, 

328 ) 

329 

330 @unittest.skipIf(np is None, "numpy not available.") 

331 def testNumpyDataId(self): 

332 """Test that we can use a numpy int in a dataId.""" 

333 registry = self.makeRegistry() 

334 dimensionEntries = [ 

335 ("instrument", {"instrument": "DummyCam"}), 

336 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}), 

337 # Using an np.int64 here fails unless Records.fromDict is also 

338 # patched to look for numbers.Integral 

339 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}), 

340 ] 

341 for args in dimensionEntries: 

342 registry.insertDimensionData(*args) 

343 

344 # Try a normal integer and something that looks like an int but 

345 # is not. 

346 for visit_id in (42, np.int64(42)): 

347 with self.subTest(visit_id=visit_id, id_type=type(visit_id).__name__): 

348 expanded = registry.expandDataId({"instrument": "DummyCam", "visit": visit_id}) 

349 self.assertEqual(expanded["visit"], int(visit_id)) 

350 self.assertIsInstance(expanded["visit"], int) 

351 

352 def testDataIdRelationships(self): 

353 """Test that `Registry.expandDataId` raises an exception when the given 

354 keys are inconsistent. 

355 """ 

356 registry = self.makeRegistry() 

357 self.loadData(registry, "base.yaml") 

358 # Insert a few more dimension records for the next test. 

359 registry.insertDimensionData( 

360 "exposure", 

361 {"instrument": "Cam1", "id": 1, "obs_id": "one", "physical_filter": "Cam1-G"}, 

362 ) 

363 registry.insertDimensionData( 

364 "exposure", 

365 {"instrument": "Cam1", "id": 2, "obs_id": "two", "physical_filter": "Cam1-G"}, 

366 ) 

367 registry.insertDimensionData( 

368 "visit_system", 

369 {"instrument": "Cam1", "id": 0, "name": "one-to-one"}, 

370 ) 

371 registry.insertDimensionData( 

372 "visit", 

373 {"instrument": "Cam1", "id": 1, "name": "one", "physical_filter": "Cam1-G", "visit_system": 0}, 

374 ) 

375 registry.insertDimensionData( 

376 "visit_definition", 

377 {"instrument": "Cam1", "visit": 1, "exposure": 1, "visit_system": 0}, 

378 ) 

379 with self.assertRaises(InconsistentDataIdError): 

380 registry.expandDataId( 

381 {"instrument": "Cam1", "visit": 1, "exposure": 2}, 

382 ) 

383 

384 def testDataset(self): 

385 """Basic tests for `Registry.insertDatasets`, `Registry.getDataset`, 

386 and `Registry.removeDatasets`. 

387 """ 

388 registry = self.makeRegistry() 

389 self.loadData(registry, "base.yaml") 

390 run = "tésτ" 

391 registry.registerRun(run) 

392 datasetType = registry.getDatasetType("bias") 

393 dataId = {"instrument": "Cam1", "detector": 2} 

394 (ref,) = registry.insertDatasets(datasetType, dataIds=[dataId], run=run) 

395 outRef = registry.getDataset(ref.id) 

396 self.assertIsNotNone(ref.id) 

397 self.assertEqual(ref, outRef) 

398 with self.assertRaises(ConflictingDefinitionError): 

399 registry.insertDatasets(datasetType, dataIds=[dataId], run=run) 

400 registry.removeDatasets([ref]) 

401 self.assertIsNone(registry.findDataset(datasetType, dataId, collections=[run])) 

402 

403 def testFindDataset(self): 

404 """Tests for `Registry.findDataset`.""" 

405 registry = self.makeRegistry() 

406 self.loadData(registry, "base.yaml") 

407 run = "tésτ" 

408 datasetType = registry.getDatasetType("bias") 

409 dataId = {"instrument": "Cam1", "detector": 4} 

410 registry.registerRun(run) 

411 (inputRef,) = registry.insertDatasets(datasetType, dataIds=[dataId], run=run) 

412 outputRef = registry.findDataset(datasetType, dataId, collections=[run]) 

413 self.assertEqual(outputRef, inputRef) 

414 # Check that retrieval with invalid dataId raises 

415 with self.assertRaises(LookupError): 

416 dataId = {"instrument": "Cam1"} # no detector 

417 registry.findDataset(datasetType, dataId, collections=run) 

418 # Check that different dataIds match to different datasets 

419 dataId1 = {"instrument": "Cam1", "detector": 1} 

420 (inputRef1,) = registry.insertDatasets(datasetType, dataIds=[dataId1], run=run) 

421 dataId2 = {"instrument": "Cam1", "detector": 2} 

422 (inputRef2,) = registry.insertDatasets(datasetType, dataIds=[dataId2], run=run) 

423 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef1) 

424 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef2) 

425 self.assertNotEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef2) 

426 self.assertNotEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef1) 

427 # Check that requesting a non-existing dataId returns None 

428 nonExistingDataId = {"instrument": "Cam1", "detector": 3} 

429 self.assertIsNone(registry.findDataset(datasetType, nonExistingDataId, collections=run)) 

430 # Search more than one collection, in which two have the right 

431 # dataset type and another does not. 

432 registry.registerRun("empty") 

433 self.loadData(registry, "datasets-uuid.yaml") 

434 bias1 = registry.findDataset("bias", instrument="Cam1", detector=2, collections=["imported_g"]) 

435 self.assertIsNotNone(bias1) 

436 bias2 = registry.findDataset("bias", instrument="Cam1", detector=2, collections=["imported_r"]) 

437 self.assertIsNotNone(bias2) 

438 self.assertEqual( 

439 bias1, 

440 registry.findDataset( 

441 "bias", instrument="Cam1", detector=2, collections=["empty", "imported_g", "imported_r"] 

442 ), 

443 ) 

444 self.assertEqual( 

445 bias2, 

446 registry.findDataset( 

447 "bias", instrument="Cam1", detector=2, collections=["empty", "imported_r", "imported_g"] 

448 ), 

449 ) 

450 # Search more than one collection, with one of them a CALIBRATION 

451 # collection. 

452 registry.registerCollection("Cam1/calib", CollectionType.CALIBRATION) 

453 timespan = Timespan( 

454 begin=astropy.time.Time("2020-01-01T01:00:00", format="isot", scale="tai"), 

455 end=astropy.time.Time("2020-01-01T02:00:00", format="isot", scale="tai"), 

456 ) 

457 registry.certify("Cam1/calib", [bias2], timespan=timespan) 

458 self.assertEqual( 

459 bias1, 

460 registry.findDataset( 

461 "bias", 

462 instrument="Cam1", 

463 detector=2, 

464 collections=["empty", "imported_g", "Cam1/calib"], 

465 timespan=timespan, 

466 ), 

467 ) 

468 self.assertEqual( 

469 bias2, 

470 registry.findDataset( 

471 "bias", 

472 instrument="Cam1", 

473 detector=2, 

474 collections=["empty", "Cam1/calib", "imported_g"], 

475 timespan=timespan, 

476 ), 

477 ) 

478 # If we try to search those same collections without a timespan, the 

479 # first one works, since the CALIBRATION collection is irrelevant after 

480 # the datast is found in the first collection. But the second one 

481 # should raise. 

482 self.assertEqual( 

483 bias1, 

484 registry.findDataset( 

485 "bias", instrument="Cam1", detector=2, collections=["empty", "imported_g", "Cam1/calib"] 

486 ), 

487 ) 

488 with self.assertRaises(TypeError): 

489 self.assertEqual( 

490 bias2, 

491 registry.findDataset( 

492 "bias", instrument="Cam1", detector=2, collections=["empty", "Cam1/calib", "imported_g"] 

493 ), 

494 ) 

495 

496 def testRemoveDatasetTypeSuccess(self): 

497 """Test that Registry.removeDatasetType works when there are no 

498 datasets of that type present. 

499 """ 

500 registry = self.makeRegistry() 

501 self.loadData(registry, "base.yaml") 

502 registry.removeDatasetType("flat") 

503 with self.assertRaises(MissingDatasetTypeError): 

504 registry.getDatasetType("flat") 

505 

506 def testRemoveDatasetTypeFailure(self): 

507 """Test that Registry.removeDatasetType raises when there are datasets 

508 of that type present or if the dataset type is for a component. 

509 """ 

510 registry = self.makeRegistry() 

511 self.loadData(registry, "base.yaml") 

512 self.loadData(registry, "datasets.yaml") 

513 with self.assertRaises(OrphanedRecordError): 

514 registry.removeDatasetType("flat") 

515 with self.assertRaises(ValueError): 

516 registry.removeDatasetType(DatasetType.nameWithComponent("flat", "image")) 

517 

518 def testImportDatasetsUUID(self): 

519 """Test for `Registry._importDatasets` with UUID dataset ID.""" 

520 if not self.datasetsManager.endswith(".ByDimensionsDatasetRecordStorageManagerUUID"): 

521 self.skipTest(f"Unexpected dataset manager {self.datasetsManager}") 

522 

523 registry = self.makeRegistry() 

524 self.loadData(registry, "base.yaml") 

525 for run in range(6): 

526 registry.registerRun(f"run{run}") 

527 datasetTypeBias = registry.getDatasetType("bias") 

528 datasetTypeFlat = registry.getDatasetType("flat") 

529 dataIdBias1 = {"instrument": "Cam1", "detector": 1} 

530 dataIdBias2 = {"instrument": "Cam1", "detector": 2} 

531 dataIdFlat1 = {"instrument": "Cam1", "detector": 1, "physical_filter": "Cam1-G", "band": "g"} 

532 

533 dataset_id = uuid.uuid4() 

534 ref = DatasetRef(datasetTypeBias, dataIdBias1, id=dataset_id, run="run0") 

535 (ref1,) = registry._importDatasets([ref]) 

536 # UUID is used without change 

537 self.assertEqual(ref.id, ref1.id) 

538 

539 # All different failure modes 

540 refs = ( 

541 # Importing same DatasetRef with different dataset ID is an error 

542 DatasetRef(datasetTypeBias, dataIdBias1, id=uuid.uuid4(), run="run0"), 

543 # Same DatasetId but different DataId 

544 DatasetRef(datasetTypeBias, dataIdBias2, id=ref1.id, run="run0"), 

545 DatasetRef(datasetTypeFlat, dataIdFlat1, id=ref1.id, run="run0"), 

546 # Same DatasetRef and DatasetId but different run 

547 DatasetRef(datasetTypeBias, dataIdBias1, id=ref1.id, run="run1"), 

548 ) 

549 for ref in refs: 

550 with self.assertRaises(ConflictingDefinitionError): 

551 registry._importDatasets([ref]) 

552 

553 # Test for non-unique IDs, they can be re-imported multiple times. 

554 for run, idGenMode in ((2, DatasetIdGenEnum.DATAID_TYPE), (4, DatasetIdGenEnum.DATAID_TYPE_RUN)): 

555 with self.subTest(idGenMode=idGenMode): 

556 

557 # Use integer dataset ID to force UUID calculation in _import 

558 ref = DatasetRef(datasetTypeBias, dataIdBias1, id=0, run=f"run{run}") 

559 (ref1,) = registry._importDatasets([ref], idGenerationMode=idGenMode) 

560 self.assertIsInstance(ref1.id, uuid.UUID) 

561 self.assertEqual(ref1.id.version, 5) 

562 

563 # Importing it again is OK 

564 (ref2,) = registry._importDatasets([ref1]) 

565 self.assertEqual(ref2.id, ref1.id) 

566 

567 # Cannot import to different run with the same ID 

568 ref = DatasetRef(datasetTypeBias, dataIdBias1, id=ref1.id, run=f"run{run+1}") 

569 with self.assertRaises(ConflictingDefinitionError): 

570 registry._importDatasets([ref]) 

571 

572 ref = DatasetRef(datasetTypeBias, dataIdBias1, id=0, run=f"run{run+1}") 

573 if idGenMode is DatasetIdGenEnum.DATAID_TYPE: 

574 # Cannot import same DATAID_TYPE ref into a new run 

575 with self.assertRaises(ConflictingDefinitionError): 

576 (ref2,) = registry._importDatasets([ref], idGenerationMode=idGenMode) 

577 else: 

578 # DATAID_TYPE_RUN ref can be imported into a new run 

579 (ref2,) = registry._importDatasets([ref], idGenerationMode=idGenMode) 

580 

581 def testDatasetTypeComponentQueries(self): 

582 """Test component options when querying for dataset types. 

583 

584 All of the behavior here is deprecated, so many of these tests are 

585 currently wrapped in a context to check that we get a warning whenever 

586 a component dataset is actually returned. 

587 """ 

588 registry = self.makeRegistry() 

589 self.loadData(registry, "base.yaml") 

590 self.loadData(registry, "datasets.yaml") 

591 # Test querying for dataset types with different inputs. 

592 # First query for all dataset types; components should only be included 

593 # when components=True. 

594 self.assertEqual({"bias", "flat"}, NamedValueSet(registry.queryDatasetTypes()).names) 

595 self.assertEqual({"bias", "flat"}, NamedValueSet(registry.queryDatasetTypes(components=False)).names) 

596 with self.assertWarns(FutureWarning): 

597 self.assertLess( 

598 {"bias", "flat", "bias.wcs", "flat.photoCalib"}, 

599 NamedValueSet(registry.queryDatasetTypes(components=True)).names, 

600 ) 

601 # Use a pattern that can match either parent or components. Again, 

602 # components are only returned if components=True. 

603 self.assertEqual({"bias"}, NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"))).names) 

604 self.assertEqual( 

605 {"bias"}, NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"), components=False)).names 

606 ) 

607 with self.assertWarns(FutureWarning): 

608 self.assertLess( 

609 {"bias", "bias.wcs"}, 

610 NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"), components=True)).names, 

611 ) 

612 # This pattern matches only a component. In this case we also return 

613 # that component dataset type if components=None. 

614 with self.assertWarns(FutureWarning): 

615 self.assertEqual( 

616 {"bias.wcs"}, NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"))).names 

617 ) 

618 self.assertEqual( 

619 set(), 

620 NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"), components=False)).names, 

621 ) 

622 with self.assertWarns(FutureWarning): 

623 self.assertEqual( 

624 {"bias.wcs"}, 

625 NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"), components=True)).names, 

626 ) 

627 # Add a dataset type using a StorageClass that we'll then remove; check 

628 # that this does not affect our ability to query for dataset types 

629 # (though it will warn). 

630 tempStorageClass = StorageClass( 

631 name="TempStorageClass", 

632 components={ 

633 "data1": registry.storageClasses.getStorageClass("StructuredDataDict"), 

634 "data2": registry.storageClasses.getStorageClass("StructuredDataDict"), 

635 }, 

636 ) 

637 registry.storageClasses.registerStorageClass(tempStorageClass) 

638 datasetType = DatasetType( 

639 "temporary", 

640 dimensions=["instrument"], 

641 storageClass=tempStorageClass, 

642 universe=registry.dimensions, 

643 ) 

644 registry.registerDatasetType(datasetType) 

645 registry.storageClasses._unregisterStorageClass(tempStorageClass.name) 

646 datasetType._storageClass = None 

647 del tempStorageClass 

648 # Querying for all dataset types, including components, should include 

649 # at least all non-component dataset types (and I don't want to 

650 # enumerate all of the Exposure components for bias and flat here). 

651 with self.assertWarns(FutureWarning): 

652 with self.assertLogs("lsst.daf.butler.registry", logging.WARN) as cm: 

653 everything = NamedValueSet(registry.queryDatasetTypes(components=True)) 

654 self.assertIn("TempStorageClass", cm.output[0]) 

655 self.assertLess({"bias", "flat", "temporary"}, everything.names) 

656 # It should not include "temporary.columns", because we tried to remove 

657 # the storage class that would tell it about that. So if the next line 

658 # fails (i.e. "temporary.columns" _is_ in everything.names), it means 

659 # this part of the test isn't doing anything, because the _unregister 

660 # call about isn't simulating the real-life case we want it to 

661 # simulate, in which different versions of daf_butler in entirely 

662 # different Python processes interact with the same repo. 

663 self.assertNotIn("temporary.data", everything.names) 

664 # Query for dataset types that start with "temp". This should again 

665 # not include the component, and also not fail. 

666 with self.assertLogs("lsst.daf.butler.registry", logging.WARN) as cm: 

667 startsWithTemp = NamedValueSet(registry.queryDatasetTypes(re.compile("temp.*"), components=True)) 

668 self.assertIn("TempStorageClass", cm.output[0]) 

669 self.assertEqual({"temporary"}, startsWithTemp.names) 

670 # Querying with no components should not warn at all. 

671 with self.assertLogs("lsst.daf.butler.registries", logging.WARN) as cm: 

672 startsWithTemp = NamedValueSet(registry.queryDatasetTypes(re.compile("temp.*"), components=False)) 

673 # Must issue a warning of our own to be captured. 

674 logging.getLogger("lsst.daf.butler.registries").warning("test message") 

675 self.assertEqual(len(cm.output), 1) 

676 self.assertIn("test message", cm.output[0]) 

677 

678 def testComponentLookups(self): 

679 """Test searching for component datasets via their parents. 

680 

681 All of the behavior here is deprecated, so many of these tests are 

682 currently wrapped in a context to check that we get a warning whenever 

683 a component dataset is actually returned. 

684 """ 

685 registry = self.makeRegistry() 

686 self.loadData(registry, "base.yaml") 

687 self.loadData(registry, "datasets.yaml") 

688 # Test getting the child dataset type (which does still exist in the 

689 # Registry), and check for consistency with 

690 # DatasetRef.makeComponentRef. 

691 collection = "imported_g" 

692 parentType = registry.getDatasetType("bias") 

693 childType = registry.getDatasetType("bias.wcs") 

694 parentRefResolved = registry.findDataset( 

695 parentType, collections=collection, instrument="Cam1", detector=1 

696 ) 

697 self.assertIsInstance(parentRefResolved, DatasetRef) 

698 self.assertEqual(childType, parentRefResolved.makeComponentRef("wcs").datasetType) 

699 # Search for a single dataset with findDataset. 

700 childRef1 = registry.findDataset("bias.wcs", collections=collection, dataId=parentRefResolved.dataId) 

701 self.assertEqual(childRef1, parentRefResolved.makeComponentRef("wcs")) 

702 # Search for detector data IDs constrained by component dataset 

703 # existence with queryDataIds. 

704 with self.assertWarns(FutureWarning): 

705 dataIds = registry.queryDataIds( 

706 ["detector"], 

707 datasets=["bias.wcs"], 

708 collections=collection, 

709 ).toSet() 

710 self.assertEqual( 

711 dataIds, 

712 DataCoordinateSet( 

713 { 

714 DataCoordinate.standardize(instrument="Cam1", detector=d, graph=parentType.dimensions) 

715 for d in (1, 2, 3) 

716 }, 

717 parentType.dimensions, 

718 ), 

719 ) 

720 # Search for multiple datasets of a single type with queryDatasets. 

721 with self.assertWarns(FutureWarning): 

722 childRefs2 = set( 

723 registry.queryDatasets( 

724 "bias.wcs", 

725 collections=collection, 

726 ) 

727 ) 

728 self.assertEqual( 

729 {ref.unresolved() for ref in childRefs2}, {DatasetRef(childType, dataId) for dataId in dataIds} 

730 ) 

731 

732 def testCollections(self): 

733 """Tests for registry methods that manage collections.""" 

734 registry = self.makeRegistry() 

735 other_registry = self.makeRegistry(share_repo_with=registry) 

736 self.loadData(registry, "base.yaml") 

737 self.loadData(registry, "datasets.yaml") 

738 run1 = "imported_g" 

739 run2 = "imported_r" 

740 # Test setting a collection docstring after it has been created. 

741 registry.setCollectionDocumentation(run1, "doc for run1") 

742 self.assertEqual(registry.getCollectionDocumentation(run1), "doc for run1") 

743 registry.setCollectionDocumentation(run1, None) 

744 self.assertIsNone(registry.getCollectionDocumentation(run1)) 

745 datasetType = "bias" 

746 # Find some datasets via their run's collection. 

747 dataId1 = {"instrument": "Cam1", "detector": 1} 

748 ref1 = registry.findDataset(datasetType, dataId1, collections=run1) 

749 self.assertIsNotNone(ref1) 

750 dataId2 = {"instrument": "Cam1", "detector": 2} 

751 ref2 = registry.findDataset(datasetType, dataId2, collections=run1) 

752 self.assertIsNotNone(ref2) 

753 # Associate those into a new collection, then look for them there. 

754 tag1 = "tag1" 

755 registry.registerCollection(tag1, type=CollectionType.TAGGED, doc="doc for tag1") 

756 # Check that we can query for old and new collections by type. 

757 self.assertEqual(set(registry.queryCollections(collectionTypes=CollectionType.RUN)), {run1, run2}) 

758 self.assertEqual( 

759 set(registry.queryCollections(collectionTypes={CollectionType.TAGGED, CollectionType.RUN})), 

760 {tag1, run1, run2}, 

761 ) 

762 self.assertEqual(registry.getCollectionDocumentation(tag1), "doc for tag1") 

763 registry.associate(tag1, [ref1, ref2]) 

764 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1) 

765 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

766 # Disassociate one and verify that we can't it there anymore... 

767 registry.disassociate(tag1, [ref1]) 

768 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=tag1)) 

769 # ...but we can still find ref2 in tag1, and ref1 in the run. 

770 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run1), ref1) 

771 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

772 collections = set(registry.queryCollections()) 

773 self.assertEqual(collections, {run1, run2, tag1}) 

774 # Associate both refs into tag1 again; ref2 is already there, but that 

775 # should be a harmless no-op. 

776 registry.associate(tag1, [ref1, ref2]) 

777 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1) 

778 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

779 # Get a different dataset (from a different run) that has the same 

780 # dataset type and data ID as ref2. 

781 ref2b = registry.findDataset(datasetType, dataId2, collections=run2) 

782 self.assertNotEqual(ref2, ref2b) 

783 # Attempting to associate that into tag1 should be an error. 

784 with self.assertRaises(ConflictingDefinitionError): 

785 registry.associate(tag1, [ref2b]) 

786 # That error shouldn't have messed up what we had before. 

787 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1) 

788 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

789 # Attempt to associate the conflicting dataset again, this time with 

790 # a dataset that isn't in the collection and won't cause a conflict. 

791 # Should also fail without modifying anything. 

792 dataId3 = {"instrument": "Cam1", "detector": 3} 

793 ref3 = registry.findDataset(datasetType, dataId3, collections=run1) 

794 with self.assertRaises(ConflictingDefinitionError): 

795 registry.associate(tag1, [ref3, ref2b]) 

796 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1) 

797 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

798 self.assertIsNone(registry.findDataset(datasetType, dataId3, collections=tag1)) 

799 # Register a chained collection that searches [tag1, run2] 

800 chain1 = "chain1" 

801 registry.registerCollection(chain1, type=CollectionType.CHAINED) 

802 self.assertIs(registry.getCollectionType(chain1), CollectionType.CHAINED) 

803 # Chained collection exists, but has no collections in it. 

804 self.assertFalse(registry.getCollectionChain(chain1)) 

805 # If we query for all collections, we should get the chained collection 

806 # only if we don't ask to flatten it (i.e. yield only its children). 

807 self.assertEqual(set(registry.queryCollections(flattenChains=False)), {tag1, run1, run2, chain1}) 

808 self.assertEqual(set(registry.queryCollections(flattenChains=True)), {tag1, run1, run2}) 

809 # Attempt to set its child collections to something circular; that 

810 # should fail. 

811 with self.assertRaises(ValueError): 

812 registry.setCollectionChain(chain1, [tag1, chain1]) 

813 # Add the child collections. 

814 registry.setCollectionChain(chain1, [tag1, run2]) 

815 self.assertEqual(list(registry.getCollectionChain(chain1)), [tag1, run2]) 

816 self.assertEqual(registry.getCollectionParentChains(tag1), {chain1}) 

817 self.assertEqual(registry.getCollectionParentChains(run2), {chain1}) 

818 # Refresh the other registry that points to the same repo, and make 

819 # sure it can see the things we've done (note that this does require 

820 # an explicit refresh(); that's the documented behavior, because 

821 # caching is ~impossible otherwise). 

822 if other_registry is not None: 

823 other_registry.refresh() 

824 self.assertEqual(list(other_registry.getCollectionChain(chain1)), [tag1, run2]) 

825 self.assertEqual(other_registry.getCollectionParentChains(tag1), {chain1}) 

826 self.assertEqual(other_registry.getCollectionParentChains(run2), {chain1}) 

827 # Searching for dataId1 or dataId2 in the chain should return ref1 and 

828 # ref2, because both are in tag1. 

829 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain1), ref1) 

830 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain1), ref2) 

831 # Now disassociate ref2 from tag1. The search (for bias) with 

832 # dataId2 in chain1 should then: 

833 # 1. not find it in tag1 

834 # 2. find a different dataset in run2 

835 registry.disassociate(tag1, [ref2]) 

836 ref2b = registry.findDataset(datasetType, dataId2, collections=chain1) 

837 self.assertNotEqual(ref2b, ref2) 

838 self.assertEqual(ref2b, registry.findDataset(datasetType, dataId2, collections=run2)) 

839 # Define a new chain so we can test recursive chains. 

840 chain2 = "chain2" 

841 registry.registerCollection(chain2, type=CollectionType.CHAINED) 

842 registry.setCollectionChain(chain2, [run2, chain1]) 

843 self.assertEqual(registry.getCollectionParentChains(chain1), {chain2}) 

844 self.assertEqual(registry.getCollectionParentChains(run2), {chain1, chain2}) 

845 # Query for collections matching a regex. 

846 self.assertCountEqual( 

847 list(registry.queryCollections(re.compile("imported_."), flattenChains=False)), 

848 ["imported_r", "imported_g"], 

849 ) 

850 # Query for collections matching a regex or an explicit str. 

851 self.assertCountEqual( 

852 list(registry.queryCollections([re.compile("imported_."), "chain1"], flattenChains=False)), 

853 ["imported_r", "imported_g", "chain1"], 

854 ) 

855 # Search for bias with dataId1 should find it via tag1 in chain2, 

856 # recursing, because is not in run1. 

857 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=run2)) 

858 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain2), ref1) 

859 # Search for bias with dataId2 should find it in run2 (ref2b). 

860 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain2), ref2b) 

861 # Search for a flat that is in run2. That should not be found 

862 # at the front of chain2, because of the restriction to bias 

863 # on run2 there, but it should be found in at the end of chain1. 

864 dataId4 = {"instrument": "Cam1", "detector": 3, "physical_filter": "Cam1-R2"} 

865 ref4 = registry.findDataset("flat", dataId4, collections=run2) 

866 self.assertIsNotNone(ref4) 

867 self.assertEqual(ref4, registry.findDataset("flat", dataId4, collections=chain2)) 

868 # Deleting a collection that's part of a CHAINED collection is not 

869 # allowed, and is exception-safe. 

870 with self.assertRaises(Exception): 

871 registry.removeCollection(run2) 

872 self.assertEqual(registry.getCollectionType(run2), CollectionType.RUN) 

873 with self.assertRaises(Exception): 

874 registry.removeCollection(chain1) 

875 self.assertEqual(registry.getCollectionType(chain1), CollectionType.CHAINED) 

876 # Actually remove chain2, test that it's gone by asking for its type. 

877 registry.removeCollection(chain2) 

878 with self.assertRaises(MissingCollectionError): 

879 registry.getCollectionType(chain2) 

880 # Actually remove run2 and chain1, which should work now. 

881 registry.removeCollection(chain1) 

882 registry.removeCollection(run2) 

883 with self.assertRaises(MissingCollectionError): 

884 registry.getCollectionType(run2) 

885 with self.assertRaises(MissingCollectionError): 

886 registry.getCollectionType(chain1) 

887 # Remove tag1 as well, just to test that we can remove TAGGED 

888 # collections. 

889 registry.removeCollection(tag1) 

890 with self.assertRaises(MissingCollectionError): 

891 registry.getCollectionType(tag1) 

892 

893 def testCollectionChainFlatten(self): 

894 """Test that Registry.setCollectionChain obeys its 'flatten' option.""" 

895 registry = self.makeRegistry() 

896 registry.registerCollection("inner", CollectionType.CHAINED) 

897 registry.registerCollection("innermost", CollectionType.RUN) 

898 registry.setCollectionChain("inner", ["innermost"]) 

899 registry.registerCollection("outer", CollectionType.CHAINED) 

900 registry.setCollectionChain("outer", ["inner"], flatten=False) 

901 self.assertEqual(list(registry.getCollectionChain("outer")), ["inner"]) 

902 registry.setCollectionChain("outer", ["inner"], flatten=True) 

903 self.assertEqual(list(registry.getCollectionChain("outer")), ["innermost"]) 

904 

905 def testBasicTransaction(self): 

906 """Test that all operations within a single transaction block are 

907 rolled back if an exception propagates out of the block. 

908 """ 

909 registry = self.makeRegistry() 

910 storageClass = StorageClass("testDatasetType") 

911 registry.storageClasses.registerStorageClass(storageClass) 

912 with registry.transaction(): 

913 registry.insertDimensionData("instrument", {"name": "Cam1", "class_name": "A"}) 

914 with self.assertRaises(ValueError): 

915 with registry.transaction(): 

916 registry.insertDimensionData("instrument", {"name": "Cam2"}) 

917 raise ValueError("Oops, something went wrong") 

918 # Cam1 should exist 

919 self.assertEqual(registry.expandDataId(instrument="Cam1").records["instrument"].class_name, "A") 

920 # But Cam2 and Cam3 should both not exist 

921 with self.assertRaises(DataIdValueError): 

922 registry.expandDataId(instrument="Cam2") 

923 with self.assertRaises(DataIdValueError): 

924 registry.expandDataId(instrument="Cam3") 

925 

926 def testNestedTransaction(self): 

927 """Test that operations within a transaction block are not rolled back 

928 if an exception propagates out of an inner transaction block and is 

929 then caught. 

930 """ 

931 registry = self.makeRegistry() 

932 dimension = registry.dimensions["instrument"] 

933 dataId1 = {"instrument": "DummyCam"} 

934 dataId2 = {"instrument": "DummyCam2"} 

935 checkpointReached = False 

936 with registry.transaction(): 

937 # This should be added and (ultimately) committed. 

938 registry.insertDimensionData(dimension, dataId1) 

939 with self.assertRaises(sqlalchemy.exc.IntegrityError): 

940 with registry.transaction(savepoint=True): 

941 # This does not conflict, and should succeed (but not 

942 # be committed). 

943 registry.insertDimensionData(dimension, dataId2) 

944 checkpointReached = True 

945 # This should conflict and raise, triggerring a rollback 

946 # of the previous insertion within the same transaction 

947 # context, but not the original insertion in the outer 

948 # block. 

949 registry.insertDimensionData(dimension, dataId1) 

950 self.assertTrue(checkpointReached) 

951 self.assertIsNotNone(registry.expandDataId(dataId1, graph=dimension.graph)) 

952 with self.assertRaises(DataIdValueError): 

953 registry.expandDataId(dataId2, graph=dimension.graph) 

954 

955 def testInstrumentDimensions(self): 

956 """Test queries involving only instrument dimensions, with no joins to 

957 skymap.""" 

958 registry = self.makeRegistry() 

959 

960 # need a bunch of dimensions and datasets for test 

961 registry.insertDimensionData( 

962 "instrument", dict(name="DummyCam", visit_max=25, exposure_max=300, detector_max=6) 

963 ) 

964 registry.insertDimensionData( 

965 "physical_filter", 

966 dict(instrument="DummyCam", name="dummy_r", band="r"), 

967 dict(instrument="DummyCam", name="dummy_i", band="i"), 

968 ) 

969 registry.insertDimensionData( 

970 "detector", *[dict(instrument="DummyCam", id=i, full_name=str(i)) for i in range(1, 6)] 

971 ) 

972 registry.insertDimensionData( 

973 "visit_system", 

974 dict(instrument="DummyCam", id=1, name="default"), 

975 ) 

976 registry.insertDimensionData( 

977 "visit", 

978 dict(instrument="DummyCam", id=10, name="ten", physical_filter="dummy_i", visit_system=1), 

979 dict(instrument="DummyCam", id=11, name="eleven", physical_filter="dummy_r", visit_system=1), 

980 dict(instrument="DummyCam", id=20, name="twelve", physical_filter="dummy_r", visit_system=1), 

981 ) 

982 for i in range(1, 6): 

983 registry.insertDimensionData( 

984 "visit_detector_region", 

985 dict(instrument="DummyCam", visit=10, detector=i), 

986 dict(instrument="DummyCam", visit=11, detector=i), 

987 dict(instrument="DummyCam", visit=20, detector=i), 

988 ) 

989 registry.insertDimensionData( 

990 "exposure", 

991 dict(instrument="DummyCam", id=100, obs_id="100", physical_filter="dummy_i"), 

992 dict(instrument="DummyCam", id=101, obs_id="101", physical_filter="dummy_i"), 

993 dict(instrument="DummyCam", id=110, obs_id="110", physical_filter="dummy_r"), 

994 dict(instrument="DummyCam", id=111, obs_id="111", physical_filter="dummy_r"), 

995 dict(instrument="DummyCam", id=200, obs_id="200", physical_filter="dummy_r"), 

996 dict(instrument="DummyCam", id=201, obs_id="201", physical_filter="dummy_r"), 

997 ) 

998 registry.insertDimensionData( 

999 "visit_definition", 

1000 dict(instrument="DummyCam", exposure=100, visit_system=1, visit=10), 

1001 dict(instrument="DummyCam", exposure=101, visit_system=1, visit=10), 

1002 dict(instrument="DummyCam", exposure=110, visit_system=1, visit=11), 

1003 dict(instrument="DummyCam", exposure=111, visit_system=1, visit=11), 

1004 dict(instrument="DummyCam", exposure=200, visit_system=1, visit=20), 

1005 dict(instrument="DummyCam", exposure=201, visit_system=1, visit=20), 

1006 ) 

1007 # dataset types 

1008 run1 = "test1_r" 

1009 run2 = "test2_r" 

1010 tagged2 = "test2_t" 

1011 registry.registerRun(run1) 

1012 registry.registerRun(run2) 

1013 registry.registerCollection(tagged2) 

1014 storageClass = StorageClass("testDataset") 

1015 registry.storageClasses.registerStorageClass(storageClass) 

1016 rawType = DatasetType( 

1017 name="RAW", 

1018 dimensions=registry.dimensions.extract(("instrument", "exposure", "detector")), 

1019 storageClass=storageClass, 

1020 ) 

1021 registry.registerDatasetType(rawType) 

1022 calexpType = DatasetType( 

1023 name="CALEXP", 

1024 dimensions=registry.dimensions.extract(("instrument", "visit", "detector")), 

1025 storageClass=storageClass, 

1026 ) 

1027 registry.registerDatasetType(calexpType) 

1028 

1029 # add pre-existing datasets 

1030 for exposure in (100, 101, 110, 111): 

1031 for detector in (1, 2, 3): 

1032 # note that only 3 of 5 detectors have datasets 

1033 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector) 

1034 (ref,) = registry.insertDatasets(rawType, dataIds=[dataId], run=run1) 

1035 # exposures 100 and 101 appear in both run1 and tagged2. 

1036 # 100 has different datasets in the different collections 

1037 # 101 has the same dataset in both collections. 

1038 if exposure == 100: 

1039 (ref,) = registry.insertDatasets(rawType, dataIds=[dataId], run=run2) 

1040 if exposure in (100, 101): 

1041 registry.associate(tagged2, [ref]) 

1042 # Add pre-existing datasets to tagged2. 

1043 for exposure in (200, 201): 

1044 for detector in (3, 4, 5): 

1045 # note that only 3 of 5 detectors have datasets 

1046 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector) 

1047 (ref,) = registry.insertDatasets(rawType, dataIds=[dataId], run=run2) 

1048 registry.associate(tagged2, [ref]) 

1049 

1050 dimensions = DimensionGraph( 

1051 registry.dimensions, dimensions=(rawType.dimensions.required | calexpType.dimensions.required) 

1052 ) 

1053 # Test that single dim string works as well as list of str 

1054 rows = registry.queryDataIds("visit", datasets=rawType, collections=run1).expanded().toSet() 

1055 rowsI = registry.queryDataIds(["visit"], datasets=rawType, collections=run1).expanded().toSet() 

1056 self.assertEqual(rows, rowsI) 

1057 # with empty expression 

1058 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1).expanded().toSet() 

1059 self.assertEqual(len(rows), 4 * 3) # 4 exposures times 3 detectors 

1060 for dataId in rows: 

1061 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit")) 

1062 packer1 = registry.dimensions.makePacker("visit_detector", dataId) 

1063 packer2 = registry.dimensions.makePacker("exposure_detector", dataId) 

1064 self.assertEqual( 

1065 packer1.unpack(packer1.pack(dataId)), 

1066 DataCoordinate.standardize(dataId, graph=packer1.dimensions), 

1067 ) 

1068 self.assertEqual( 

1069 packer2.unpack(packer2.pack(dataId)), 

1070 DataCoordinate.standardize(dataId, graph=packer2.dimensions), 

1071 ) 

1072 self.assertNotEqual(packer1.pack(dataId), packer2.pack(dataId)) 

1073 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101, 110, 111)) 

1074 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 11)) 

1075 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3)) 

1076 

1077 # second collection 

1078 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=tagged2).toSet() 

1079 self.assertEqual(len(rows), 4 * 3) # 4 exposures times 3 detectors 

1080 for dataId in rows: 

1081 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit")) 

1082 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101, 200, 201)) 

1083 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 20)) 

1084 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3, 4, 5)) 

1085 

1086 # with two input datasets 

1087 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=[run1, tagged2]).toSet() 

1088 self.assertEqual(len(set(rows)), 6 * 3) # 6 exposures times 3 detectors; set needed to de-dupe 

1089 for dataId in rows: 

1090 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit")) 

1091 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101, 110, 111, 200, 201)) 

1092 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 11, 20)) 

1093 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3, 4, 5)) 

1094 

1095 # limit to single visit 

1096 rows = registry.queryDataIds( 

1097 dimensions, datasets=rawType, collections=run1, where="visit = 10", instrument="DummyCam" 

1098 ).toSet() 

1099 self.assertEqual(len(rows), 2 * 3) # 2 exposures times 3 detectors 

1100 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101)) 

1101 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10,)) 

1102 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3)) 

1103 

1104 # more limiting expression, using link names instead of Table.column 

1105 rows = registry.queryDataIds( 

1106 dimensions, 

1107 datasets=rawType, 

1108 collections=run1, 

1109 where="visit = 10 and detector > 1 and 'DummyCam'=instrument", 

1110 ).toSet() 

1111 self.assertEqual(len(rows), 2 * 2) # 2 exposures times 2 detectors 

1112 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101)) 

1113 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10,)) 

1114 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (2, 3)) 

1115 

1116 # queryDataIds with only one of `datasets` and `collections` is an 

1117 # error. 

1118 with self.assertRaises(CollectionError): 

1119 registry.queryDataIds(dimensions, datasets=rawType) 

1120 with self.assertRaises(ArgumentError): 

1121 registry.queryDataIds(dimensions, collections=run1) 

1122 

1123 # expression excludes everything 

1124 rows = registry.queryDataIds( 

1125 dimensions, datasets=rawType, collections=run1, where="visit > 1000", instrument="DummyCam" 

1126 ).toSet() 

1127 self.assertEqual(len(rows), 0) 

1128 

1129 # Selecting by physical_filter, this is not in the dimensions, but it 

1130 # is a part of the full expression so it should work too. 

1131 rows = registry.queryDataIds( 

1132 dimensions, 

1133 datasets=rawType, 

1134 collections=run1, 

1135 where="physical_filter = 'dummy_r'", 

1136 instrument="DummyCam", 

1137 ).toSet() 

1138 self.assertEqual(len(rows), 2 * 3) # 2 exposures times 3 detectors 

1139 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (110, 111)) 

1140 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (11,)) 

1141 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3)) 

1142 

1143 def testSkyMapDimensions(self): 

1144 """Tests involving only skymap dimensions, no joins to instrument.""" 

1145 registry = self.makeRegistry() 

1146 

1147 # need a bunch of dimensions and datasets for test, we want 

1148 # "band" in the test so also have to add physical_filter 

1149 # dimensions 

1150 registry.insertDimensionData("instrument", dict(instrument="DummyCam")) 

1151 registry.insertDimensionData( 

1152 "physical_filter", 

1153 dict(instrument="DummyCam", name="dummy_r", band="r"), 

1154 dict(instrument="DummyCam", name="dummy_i", band="i"), 

1155 ) 

1156 registry.insertDimensionData("skymap", dict(name="DummyMap", hash="sha!".encode("utf8"))) 

1157 for tract in range(10): 

1158 registry.insertDimensionData("tract", dict(skymap="DummyMap", id=tract)) 

1159 registry.insertDimensionData( 

1160 "patch", 

1161 *[dict(skymap="DummyMap", tract=tract, id=patch, cell_x=0, cell_y=0) for patch in range(10)], 

1162 ) 

1163 

1164 # dataset types 

1165 run = "tésτ" 

1166 registry.registerRun(run) 

1167 storageClass = StorageClass("testDataset") 

1168 registry.storageClasses.registerStorageClass(storageClass) 

1169 calexpType = DatasetType( 

1170 name="deepCoadd_calexp", 

1171 dimensions=registry.dimensions.extract(("skymap", "tract", "patch", "band")), 

1172 storageClass=storageClass, 

1173 ) 

1174 registry.registerDatasetType(calexpType) 

1175 mergeType = DatasetType( 

1176 name="deepCoadd_mergeDet", 

1177 dimensions=registry.dimensions.extract(("skymap", "tract", "patch")), 

1178 storageClass=storageClass, 

1179 ) 

1180 registry.registerDatasetType(mergeType) 

1181 measType = DatasetType( 

1182 name="deepCoadd_meas", 

1183 dimensions=registry.dimensions.extract(("skymap", "tract", "patch", "band")), 

1184 storageClass=storageClass, 

1185 ) 

1186 registry.registerDatasetType(measType) 

1187 

1188 dimensions = DimensionGraph( 

1189 registry.dimensions, 

1190 dimensions=( 

1191 calexpType.dimensions.required | mergeType.dimensions.required | measType.dimensions.required 

1192 ), 

1193 ) 

1194 

1195 # add pre-existing datasets 

1196 for tract in (1, 3, 5): 

1197 for patch in (2, 4, 6, 7): 

1198 dataId = dict(skymap="DummyMap", tract=tract, patch=patch) 

1199 registry.insertDatasets(mergeType, dataIds=[dataId], run=run) 

1200 for aFilter in ("i", "r"): 

1201 dataId = dict(skymap="DummyMap", tract=tract, patch=patch, band=aFilter) 

1202 registry.insertDatasets(calexpType, dataIds=[dataId], run=run) 

1203 

1204 # with empty expression 

1205 rows = registry.queryDataIds(dimensions, datasets=[calexpType, mergeType], collections=run).toSet() 

1206 self.assertEqual(len(rows), 3 * 4 * 2) # 4 tracts x 4 patches x 2 filters 

1207 for dataId in rows: 

1208 self.assertCountEqual(dataId.keys(), ("skymap", "tract", "patch", "band")) 

1209 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 3, 5)) 

1210 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 4, 6, 7)) 

1211 self.assertCountEqual(set(dataId["band"] for dataId in rows), ("i", "r")) 

1212 

1213 # limit to 2 tracts and 2 patches 

1214 rows = registry.queryDataIds( 

1215 dimensions, 

1216 datasets=[calexpType, mergeType], 

1217 collections=run, 

1218 where="tract IN (1, 5) AND patch IN (2, 7)", 

1219 skymap="DummyMap", 

1220 ).toSet() 

1221 self.assertEqual(len(rows), 2 * 2 * 2) # 2 tracts x 2 patches x 2 filters 

1222 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 5)) 

1223 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 7)) 

1224 self.assertCountEqual(set(dataId["band"] for dataId in rows), ("i", "r")) 

1225 

1226 # limit to single filter 

1227 rows = registry.queryDataIds( 

1228 dimensions, datasets=[calexpType, mergeType], collections=run, where="band = 'i'" 

1229 ).toSet() 

1230 self.assertEqual(len(rows), 3 * 4 * 1) # 4 tracts x 4 patches x 2 filters 

1231 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 3, 5)) 

1232 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 4, 6, 7)) 

1233 self.assertCountEqual(set(dataId["band"] for dataId in rows), ("i",)) 

1234 

1235 # Specifying non-existing skymap is an exception 

1236 with self.assertRaisesRegex(DataIdValueError, "Unknown values specified for governor dimension"): 

1237 rows = registry.queryDataIds( 

1238 dimensions, datasets=[calexpType, mergeType], collections=run, where="skymap = 'Mars'" 

1239 ).toSet() 

1240 

1241 def testSpatialJoin(self): 

1242 """Test queries that involve spatial overlap joins.""" 

1243 registry = self.makeRegistry() 

1244 self.loadData(registry, "hsc-rc2-subset.yaml") 

1245 

1246 # Dictionary of spatial DatabaseDimensionElements, keyed by the name of 

1247 # the TopologicalFamily they belong to. We'll relate all elements in 

1248 # each family to all of the elements in each other family. 

1249 families = defaultdict(set) 

1250 # Dictionary of {element.name: {dataId: region}}. 

1251 regions = {} 

1252 for element in registry.dimensions.getDatabaseElements(): 

1253 if element.spatial is not None: 

1254 families[element.spatial.name].add(element) 

1255 regions[element.name] = { 

1256 record.dataId: record.region for record in registry.queryDimensionRecords(element) 

1257 } 

1258 

1259 # If this check fails, it's not necessarily a problem - it may just be 

1260 # a reasonable change to the default dimension definitions - but the 

1261 # test below depends on there being more than one family to do anything 

1262 # useful. 

1263 self.assertEqual(len(families), 2) 

1264 

1265 # Overlap DatabaseDimensionElements with each other. 

1266 for family1, family2 in itertools.combinations(families, 2): 

1267 for element1, element2 in itertools.product(families[family1], families[family2]): 

1268 graph = DimensionGraph.union(element1.graph, element2.graph) 

1269 # Construct expected set of overlapping data IDs via a 

1270 # brute-force comparison of the regions we've already fetched. 

1271 expected = { 

1272 DataCoordinate.standardize({**dataId1.byName(), **dataId2.byName()}, graph=graph) 

1273 for (dataId1, region1), (dataId2, region2) in itertools.product( 

1274 regions[element1.name].items(), regions[element2.name].items() 

1275 ) 

1276 if not region1.isDisjointFrom(region2) 

1277 } 

1278 self.assertGreater(len(expected), 2, msg="Test that we aren't just comparing empty sets.") 

1279 queried = set(registry.queryDataIds(graph)) 

1280 self.assertEqual(expected, queried) 

1281 

1282 # Overlap each DatabaseDimensionElement with the commonSkyPix system. 

1283 commonSkyPix = registry.dimensions.commonSkyPix 

1284 for elementName, regions in regions.items(): 

1285 graph = DimensionGraph.union(registry.dimensions[elementName].graph, commonSkyPix.graph) 

1286 expected = set() 

1287 for dataId, region in regions.items(): 

1288 for begin, end in commonSkyPix.pixelization.envelope(region): 

1289 expected.update( 

1290 DataCoordinate.standardize({commonSkyPix.name: index, **dataId.byName()}, graph=graph) 

1291 for index in range(begin, end) 

1292 ) 

1293 self.assertGreater(len(expected), 2, msg="Test that we aren't just comparing empty sets.") 

1294 queried = set(registry.queryDataIds(graph)) 

1295 self.assertEqual(expected, queried) 

1296 

1297 def testAbstractQuery(self): 

1298 """Test that we can run a query that just lists the known 

1299 bands. This is tricky because band is 

1300 backed by a query against physical_filter. 

1301 """ 

1302 registry = self.makeRegistry() 

1303 registry.insertDimensionData("instrument", dict(name="DummyCam")) 

1304 registry.insertDimensionData( 

1305 "physical_filter", 

1306 dict(instrument="DummyCam", name="dummy_i", band="i"), 

1307 dict(instrument="DummyCam", name="dummy_i2", band="i"), 

1308 dict(instrument="DummyCam", name="dummy_r", band="r"), 

1309 ) 

1310 rows = registry.queryDataIds(["band"]).toSet() 

1311 self.assertCountEqual( 

1312 rows, 

1313 [ 

1314 DataCoordinate.standardize(band="i", universe=registry.dimensions), 

1315 DataCoordinate.standardize(band="r", universe=registry.dimensions), 

1316 ], 

1317 ) 

1318 

1319 def testAttributeManager(self): 

1320 """Test basic functionality of attribute manager.""" 

1321 # number of attributes with schema versions in a fresh database, 

1322 # 6 managers with 3 records per manager, plus config for dimensions 

1323 VERSION_COUNT = 6 * 3 + 1 

1324 

1325 registry = self.makeRegistry() 

1326 attributes = registry._managers.attributes 

1327 

1328 # check what get() returns for non-existing key 

1329 self.assertIsNone(attributes.get("attr")) 

1330 self.assertEqual(attributes.get("attr", ""), "") 

1331 self.assertEqual(attributes.get("attr", "Value"), "Value") 

1332 self.assertEqual(len(list(attributes.items())), VERSION_COUNT) 

1333 

1334 # cannot store empty key or value 

1335 with self.assertRaises(ValueError): 

1336 attributes.set("", "value") 

1337 with self.assertRaises(ValueError): 

1338 attributes.set("attr", "") 

1339 

1340 # set value of non-existing key 

1341 attributes.set("attr", "value") 

1342 self.assertEqual(len(list(attributes.items())), VERSION_COUNT + 1) 

1343 self.assertEqual(attributes.get("attr"), "value") 

1344 

1345 # update value of existing key 

1346 with self.assertRaises(ButlerAttributeExistsError): 

1347 attributes.set("attr", "value2") 

1348 

1349 attributes.set("attr", "value2", force=True) 

1350 self.assertEqual(len(list(attributes.items())), VERSION_COUNT + 1) 

1351 self.assertEqual(attributes.get("attr"), "value2") 

1352 

1353 # delete existing key 

1354 self.assertTrue(attributes.delete("attr")) 

1355 self.assertEqual(len(list(attributes.items())), VERSION_COUNT) 

1356 

1357 # delete non-existing key 

1358 self.assertFalse(attributes.delete("non-attr")) 

1359 

1360 # store bunch of keys and get the list back 

1361 data = [ 

1362 ("version.core", "1.2.3"), 

1363 ("version.dimensions", "3.2.1"), 

1364 ("config.managers.opaque", "ByNameOpaqueTableStorageManager"), 

1365 ] 

1366 for key, value in data: 

1367 attributes.set(key, value) 

1368 items = dict(attributes.items()) 

1369 for key, value in data: 

1370 self.assertEqual(items[key], value) 

1371 

1372 def testQueryDatasetsDeduplication(self): 

1373 """Test that the findFirst option to queryDatasets selects datasets 

1374 from collections in the order given". 

1375 """ 

1376 registry = self.makeRegistry() 

1377 self.loadData(registry, "base.yaml") 

1378 self.loadData(registry, "datasets.yaml") 

1379 self.assertCountEqual( 

1380 list(registry.queryDatasets("bias", collections=["imported_g", "imported_r"])), 

1381 [ 

1382 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"), 

1383 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g"), 

1384 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g"), 

1385 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r"), 

1386 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r"), 

1387 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

1388 ], 

1389 ) 

1390 self.assertCountEqual( 

1391 list(registry.queryDatasets("bias", collections=["imported_g", "imported_r"], findFirst=True)), 

1392 [ 

1393 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"), 

1394 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g"), 

1395 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g"), 

1396 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

1397 ], 

1398 ) 

1399 self.assertCountEqual( 

1400 list(registry.queryDatasets("bias", collections=["imported_r", "imported_g"], findFirst=True)), 

1401 [ 

1402 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"), 

1403 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r"), 

1404 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r"), 

1405 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

1406 ], 

1407 ) 

1408 

1409 def testQueryResults(self): 

1410 """Test querying for data IDs and then manipulating the QueryResults 

1411 object returned to perform other queries. 

1412 """ 

1413 registry = self.makeRegistry() 

1414 self.loadData(registry, "base.yaml") 

1415 self.loadData(registry, "datasets.yaml") 

1416 bias = registry.getDatasetType("bias") 

1417 flat = registry.getDatasetType("flat") 

1418 # Obtain expected results from methods other than those we're testing 

1419 # here. That includes: 

1420 # - the dimensions of the data IDs we want to query: 

1421 expectedGraph = DimensionGraph(registry.dimensions, names=["detector", "physical_filter"]) 

1422 # - the dimensions of some other data IDs we'll extract from that: 

1423 expectedSubsetGraph = DimensionGraph(registry.dimensions, names=["detector"]) 

1424 # - the data IDs we expect to obtain from the first queries: 

1425 expectedDataIds = DataCoordinateSet( 

1426 { 

1427 DataCoordinate.standardize( 

1428 instrument="Cam1", detector=d, physical_filter=p, universe=registry.dimensions 

1429 ) 

1430 for d, p in itertools.product({1, 2, 3}, {"Cam1-G", "Cam1-R1", "Cam1-R2"}) 

1431 }, 

1432 graph=expectedGraph, 

1433 hasFull=False, 

1434 hasRecords=False, 

1435 ) 

1436 # - the flat datasets we expect to find from those data IDs, in just 

1437 # one collection (so deduplication is irrelevant): 

1438 expectedFlats = [ 

1439 registry.findDataset( 

1440 flat, instrument="Cam1", detector=1, physical_filter="Cam1-R1", collections="imported_r" 

1441 ), 

1442 registry.findDataset( 

1443 flat, instrument="Cam1", detector=2, physical_filter="Cam1-R1", collections="imported_r" 

1444 ), 

1445 registry.findDataset( 

1446 flat, instrument="Cam1", detector=3, physical_filter="Cam1-R2", collections="imported_r" 

1447 ), 

1448 ] 

1449 # - the data IDs we expect to extract from that: 

1450 expectedSubsetDataIds = expectedDataIds.subset(expectedSubsetGraph) 

1451 # - the bias datasets we expect to find from those data IDs, after we 

1452 # subset-out the physical_filter dimension, both with duplicates: 

1453 expectedAllBiases = [ 

1454 registry.findDataset(bias, instrument="Cam1", detector=1, collections="imported_g"), 

1455 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_g"), 

1456 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_g"), 

1457 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_r"), 

1458 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_r"), 

1459 ] 

1460 # - ...and without duplicates: 

1461 expectedDeduplicatedBiases = [ 

1462 registry.findDataset(bias, instrument="Cam1", detector=1, collections="imported_g"), 

1463 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_r"), 

1464 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_r"), 

1465 ] 

1466 # Test against those expected results, using a "lazy" query for the 

1467 # data IDs (which re-executes that query each time we use it to do 

1468 # something new). 

1469 dataIds = registry.queryDataIds( 

1470 ["detector", "physical_filter"], 

1471 where="detector.purpose = 'SCIENCE'", # this rejects detector=4 

1472 instrument="Cam1", 

1473 ) 

1474 self.assertEqual(dataIds.graph, expectedGraph) 

1475 self.assertEqual(dataIds.toSet(), expectedDataIds) 

1476 self.assertCountEqual( 

1477 list( 

1478 dataIds.findDatasets( 

1479 flat, 

1480 collections=["imported_r"], 

1481 ) 

1482 ), 

1483 expectedFlats, 

1484 ) 

1485 subsetDataIds = dataIds.subset(expectedSubsetGraph, unique=True) 

1486 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph) 

1487 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds) 

1488 self.assertCountEqual( 

1489 list(subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=False)), 

1490 expectedAllBiases, 

1491 ) 

1492 self.assertCountEqual( 

1493 list(subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=True)), 

1494 expectedDeduplicatedBiases, 

1495 ) 

1496 

1497 # Check dimensions match. 

1498 with self.assertRaises(ValueError): 

1499 subsetDataIds.findDatasets("flat", collections=["imported_r", "imported_g"], findFirst=True) 

1500 

1501 # Use a component dataset type. 

1502 self.assertCountEqual( 

1503 [ 

1504 ref.makeComponentRef("image") 

1505 for ref in subsetDataIds.findDatasets( 

1506 bias, 

1507 collections=["imported_r", "imported_g"], 

1508 findFirst=False, 

1509 ) 

1510 ], 

1511 [ref.makeComponentRef("image") for ref in expectedAllBiases], 

1512 ) 

1513 

1514 # Use a named dataset type that does not exist and a dataset type 

1515 # object that does not exist. 

1516 unknown_type = DatasetType("not_known", dimensions=bias.dimensions, storageClass="Exposure") 

1517 

1518 # Test both string name and dataset type object. 

1519 test_type: Union[str, DatasetType] 

1520 for test_type, test_type_name in ( 

1521 (unknown_type, unknown_type.name), 

1522 (unknown_type.name, unknown_type.name), 

1523 ): 

1524 with self.assertRaisesRegex(DatasetTypeError, expected_regex=test_type_name): 

1525 list( 

1526 subsetDataIds.findDatasets( 

1527 test_type, collections=["imported_r", "imported_g"], findFirst=True 

1528 ) 

1529 ) 

1530 

1531 # Materialize the bias dataset queries (only) by putting the results 

1532 # into temporary tables, then repeat those tests. 

1533 with subsetDataIds.findDatasets( 

1534 bias, collections=["imported_r", "imported_g"], findFirst=False 

1535 ).materialize() as biases: 

1536 self.assertCountEqual(list(biases), expectedAllBiases) 

1537 with subsetDataIds.findDatasets( 

1538 bias, collections=["imported_r", "imported_g"], findFirst=True 

1539 ).materialize() as biases: 

1540 self.assertCountEqual(list(biases), expectedDeduplicatedBiases) 

1541 # Materialize the data ID subset query, but not the dataset queries. 

1542 with subsetDataIds.materialize() as subsetDataIds: 

1543 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph) 

1544 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds) 

1545 self.assertCountEqual( 

1546 list( 

1547 subsetDataIds.findDatasets( 

1548 bias, collections=["imported_r", "imported_g"], findFirst=False 

1549 ) 

1550 ), 

1551 expectedAllBiases, 

1552 ) 

1553 self.assertCountEqual( 

1554 list( 

1555 subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=True) 

1556 ), 

1557 expectedDeduplicatedBiases, 

1558 ) 

1559 # Materialize the dataset queries, too. 

1560 with subsetDataIds.findDatasets( 

1561 bias, collections=["imported_r", "imported_g"], findFirst=False 

1562 ).materialize() as biases: 

1563 self.assertCountEqual(list(biases), expectedAllBiases) 

1564 with subsetDataIds.findDatasets( 

1565 bias, collections=["imported_r", "imported_g"], findFirst=True 

1566 ).materialize() as biases: 

1567 self.assertCountEqual(list(biases), expectedDeduplicatedBiases) 

1568 # Materialize the original query, but none of the follow-up queries. 

1569 with dataIds.materialize() as dataIds: 

1570 self.assertEqual(dataIds.graph, expectedGraph) 

1571 self.assertEqual(dataIds.toSet(), expectedDataIds) 

1572 self.assertCountEqual( 

1573 list( 

1574 dataIds.findDatasets( 

1575 flat, 

1576 collections=["imported_r"], 

1577 ) 

1578 ), 

1579 expectedFlats, 

1580 ) 

1581 subsetDataIds = dataIds.subset(expectedSubsetGraph, unique=True) 

1582 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph) 

1583 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds) 

1584 self.assertCountEqual( 

1585 list( 

1586 subsetDataIds.findDatasets( 

1587 bias, collections=["imported_r", "imported_g"], findFirst=False 

1588 ) 

1589 ), 

1590 expectedAllBiases, 

1591 ) 

1592 self.assertCountEqual( 

1593 list( 

1594 subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=True) 

1595 ), 

1596 expectedDeduplicatedBiases, 

1597 ) 

1598 # Materialize just the bias dataset queries. 

1599 with subsetDataIds.findDatasets( 

1600 bias, collections=["imported_r", "imported_g"], findFirst=False 

1601 ).materialize() as biases: 

1602 self.assertCountEqual(list(biases), expectedAllBiases) 

1603 with subsetDataIds.findDatasets( 

1604 bias, collections=["imported_r", "imported_g"], findFirst=True 

1605 ).materialize() as biases: 

1606 self.assertCountEqual(list(biases), expectedDeduplicatedBiases) 

1607 # Materialize the subset data ID query, but not the dataset 

1608 # queries. 

1609 with subsetDataIds.materialize() as subsetDataIds: 

1610 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph) 

1611 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds) 

1612 self.assertCountEqual( 

1613 list( 

1614 subsetDataIds.findDatasets( 

1615 bias, collections=["imported_r", "imported_g"], findFirst=False 

1616 ) 

1617 ), 

1618 expectedAllBiases, 

1619 ) 

1620 self.assertCountEqual( 

1621 list( 

1622 subsetDataIds.findDatasets( 

1623 bias, collections=["imported_r", "imported_g"], findFirst=True 

1624 ) 

1625 ), 

1626 expectedDeduplicatedBiases, 

1627 ) 

1628 # Materialize the bias dataset queries, too, so now we're 

1629 # materializing every single step. 

1630 with subsetDataIds.findDatasets( 

1631 bias, collections=["imported_r", "imported_g"], findFirst=False 

1632 ).materialize() as biases: 

1633 self.assertCountEqual(list(biases), expectedAllBiases) 

1634 with subsetDataIds.findDatasets( 

1635 bias, collections=["imported_r", "imported_g"], findFirst=True 

1636 ).materialize() as biases: 

1637 self.assertCountEqual(list(biases), expectedDeduplicatedBiases) 

1638 

1639 def testStorageClassPropagation(self): 

1640 """Test that queries for datasets respect the storage class passed in 

1641 as part of a full dataset type. 

1642 """ 

1643 registry = self.makeRegistry() 

1644 self.loadData(registry, "base.yaml") 

1645 dataset_type_in_registry = DatasetType( 

1646 "tbl", dimensions=["instrument"], storageClass="DataFrame", universe=registry.dimensions 

1647 ) 

1648 registry.registerDatasetType(dataset_type_in_registry) 

1649 run = "run1" 

1650 registry.registerRun(run) 

1651 (inserted_ref,) = registry.insertDatasets( 

1652 dataset_type_in_registry, [registry.expandDataId(instrument="Cam1")], run=run 

1653 ) 

1654 self.assertEqual(inserted_ref.datasetType, dataset_type_in_registry) 

1655 query_dataset_type = DatasetType( 

1656 "tbl", dimensions=["instrument"], storageClass="ArrowAstropy", universe=registry.dimensions 

1657 ) 

1658 self.assertNotEqual(dataset_type_in_registry, query_dataset_type) 

1659 query_datasets_result = registry.queryDatasets(query_dataset_type, collections=[run]) 

1660 self.assertEqual(query_datasets_result.parentDatasetType, query_dataset_type) # type: ignore 

1661 (query_datasets_ref,) = query_datasets_result 

1662 self.assertEqual(query_datasets_ref.datasetType, query_dataset_type) 

1663 query_data_ids_find_datasets_result = registry.queryDataIds(["instrument"]).findDatasets( 

1664 query_dataset_type, collections=[run] 

1665 ) 

1666 self.assertEqual(query_data_ids_find_datasets_result.parentDatasetType, query_dataset_type) 

1667 (query_data_ids_find_datasets_ref,) = query_data_ids_find_datasets_result 

1668 self.assertEqual(query_data_ids_find_datasets_ref.datasetType, query_dataset_type) 

1669 query_dataset_types_result = registry.queryDatasetTypes(query_dataset_type) 

1670 self.assertEqual(list(query_dataset_types_result), [query_dataset_type]) 

1671 find_dataset_ref = registry.findDataset(query_dataset_type, instrument="Cam1", collections=[run]) 

1672 self.assertEqual(find_dataset_ref.datasetType, query_dataset_type) 

1673 

1674 def testEmptyDimensionsQueries(self): 

1675 """Test Query and QueryResults objects in the case where there are no 

1676 dimensions. 

1677 """ 

1678 # Set up test data: one dataset type, two runs, one dataset in each. 

1679 registry = self.makeRegistry() 

1680 self.loadData(registry, "base.yaml") 

1681 schema = DatasetType("schema", dimensions=registry.dimensions.empty, storageClass="Catalog") 

1682 registry.registerDatasetType(schema) 

1683 dataId = DataCoordinate.makeEmpty(registry.dimensions) 

1684 run1 = "run1" 

1685 run2 = "run2" 

1686 registry.registerRun(run1) 

1687 registry.registerRun(run2) 

1688 (dataset1,) = registry.insertDatasets(schema, dataIds=[dataId], run=run1) 

1689 (dataset2,) = registry.insertDatasets(schema, dataIds=[dataId], run=run2) 

1690 # Query directly for both of the datasets, and each one, one at a time. 

1691 self.checkQueryResults( 

1692 registry.queryDatasets(schema, collections=[run1, run2], findFirst=False), [dataset1, dataset2] 

1693 ) 

1694 self.checkQueryResults( 

1695 registry.queryDatasets(schema, collections=[run1, run2], findFirst=True), 

1696 [dataset1], 

1697 ) 

1698 self.checkQueryResults( 

1699 registry.queryDatasets(schema, collections=[run2, run1], findFirst=True), 

1700 [dataset2], 

1701 ) 

1702 # Query for data IDs with no dimensions. 

1703 dataIds = registry.queryDataIds([]) 

1704 self.checkQueryResults(dataIds, [dataId]) 

1705 # Use queried data IDs to find the datasets. 

1706 self.checkQueryResults( 

1707 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False), 

1708 [dataset1, dataset2], 

1709 ) 

1710 self.checkQueryResults( 

1711 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), 

1712 [dataset1], 

1713 ) 

1714 self.checkQueryResults( 

1715 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), 

1716 [dataset2], 

1717 ) 

1718 # Now materialize the data ID query results and repeat those tests. 

1719 with dataIds.materialize() as dataIds: 

1720 self.checkQueryResults(dataIds, [dataId]) 

1721 self.checkQueryResults( 

1722 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), 

1723 [dataset1], 

1724 ) 

1725 self.checkQueryResults( 

1726 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), 

1727 [dataset2], 

1728 ) 

1729 # Query for non-empty data IDs, then subset that to get the empty one. 

1730 # Repeat the above tests starting from that. 

1731 dataIds = registry.queryDataIds(["instrument"]).subset(registry.dimensions.empty, unique=True) 

1732 self.checkQueryResults(dataIds, [dataId]) 

1733 self.checkQueryResults( 

1734 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False), 

1735 [dataset1, dataset2], 

1736 ) 

1737 self.checkQueryResults( 

1738 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), 

1739 [dataset1], 

1740 ) 

1741 self.checkQueryResults( 

1742 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), 

1743 [dataset2], 

1744 ) 

1745 with dataIds.materialize() as dataIds: 

1746 self.checkQueryResults(dataIds, [dataId]) 

1747 self.checkQueryResults( 

1748 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False), 

1749 [dataset1, dataset2], 

1750 ) 

1751 self.checkQueryResults( 

1752 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), 

1753 [dataset1], 

1754 ) 

1755 self.checkQueryResults( 

1756 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), 

1757 [dataset2], 

1758 ) 

1759 # Query for non-empty data IDs, then materialize, then subset to get 

1760 # the empty one. Repeat again. 

1761 with registry.queryDataIds(["instrument"]).materialize() as nonEmptyDataIds: 

1762 dataIds = nonEmptyDataIds.subset(registry.dimensions.empty, unique=True) 

1763 self.checkQueryResults(dataIds, [dataId]) 

1764 self.checkQueryResults( 

1765 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False), 

1766 [dataset1, dataset2], 

1767 ) 

1768 self.checkQueryResults( 

1769 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), 

1770 [dataset1], 

1771 ) 

1772 self.checkQueryResults( 

1773 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), 

1774 [dataset2], 

1775 ) 

1776 with dataIds.materialize() as dataIds: 

1777 self.checkQueryResults(dataIds, [dataId]) 

1778 self.checkQueryResults( 

1779 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False), 

1780 [dataset1, dataset2], 

1781 ) 

1782 self.checkQueryResults( 

1783 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), 

1784 [dataset1], 

1785 ) 

1786 self.checkQueryResults( 

1787 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), 

1788 [dataset2], 

1789 ) 

1790 # Query for non-empty data IDs with a constraint on an empty-data-ID 

1791 # dataset that exists. 

1792 dataIds = registry.queryDataIds(["instrument"], datasets="schema", collections=...) 

1793 self.checkQueryResults( 

1794 dataIds.subset(unique=True), 

1795 [DataCoordinate.standardize(instrument="Cam1", universe=registry.dimensions)], 

1796 ) 

1797 # Again query for non-empty data IDs with a constraint on empty-data-ID 

1798 # datasets, but when the datasets don't exist. We delete the existing 

1799 # dataset and query just that collection rather than creating a new 

1800 # empty collection because this is a bit less likely for our build-time 

1801 # logic to shortcut-out (via the collection summaries), and such a 

1802 # shortcut would make this test a bit more trivial than we'd like. 

1803 registry.removeDatasets([dataset2]) 

1804 dataIds = registry.queryDataIds(["instrument"], datasets="schema", collections=run2) 

1805 self.checkQueryResults(dataIds, []) 

1806 

1807 def testDimensionDataModifications(self): 

1808 """Test that modifying dimension records via: 

1809 syncDimensionData(..., update=True) and 

1810 insertDimensionData(..., replace=True) works as expected, even in the 

1811 presence of datasets using those dimensions and spatial overlap 

1812 relationships. 

1813 """ 

1814 

1815 def unpack_range_set(ranges: lsst.sphgeom.RangeSet) -> Iterator[int]: 

1816 """Unpack a sphgeom.RangeSet into the integers it contains.""" 

1817 for begin, end in ranges: 

1818 yield from range(begin, end) 

1819 

1820 def range_set_hull( 

1821 ranges: lsst.sphgeom.RangeSet, 

1822 pixelization: lsst.sphgeom.HtmPixelization, 

1823 ) -> lsst.sphgeom.ConvexPolygon: 

1824 """Create a ConvexPolygon hull of the region defined by a set of 

1825 HTM pixelization index ranges. 

1826 """ 

1827 points = [] 

1828 for index in unpack_range_set(ranges): 

1829 points.extend(pixelization.triangle(index).getVertices()) 

1830 return lsst.sphgeom.ConvexPolygon(points) 

1831 

1832 # Use HTM to set up an initial parent region (one arbitrary trixel) 

1833 # and four child regions (the trixels within the parent at the next 

1834 # level. We'll use the parent as a tract/visit region and the children 

1835 # as its patch/visit_detector regions. 

1836 registry = self.makeRegistry() 

1837 htm6 = registry.dimensions.skypix["htm"][6].pixelization 

1838 commonSkyPix = registry.dimensions.commonSkyPix.pixelization 

1839 index = 12288 

1840 child_ranges_small = lsst.sphgeom.RangeSet(index).scaled(4) 

1841 assert htm6.universe().contains(child_ranges_small) 

1842 child_regions_small = [htm6.triangle(i) for i in unpack_range_set(child_ranges_small)] 

1843 parent_region_small = lsst.sphgeom.ConvexPolygon( 

1844 list(itertools.chain.from_iterable(c.getVertices() for c in child_regions_small)) 

1845 ) 

1846 assert all(parent_region_small.contains(c) for c in child_regions_small) 

1847 # Make a larger version of each child region, defined to be the set of 

1848 # htm6 trixels that overlap the original's bounding circle. Make a new 

1849 # parent that's the convex hull of the new children. 

1850 child_regions_large = [ 

1851 range_set_hull(htm6.envelope(c.getBoundingCircle()), htm6) for c in child_regions_small 

1852 ] 

1853 assert all(large.contains(small) for large, small in zip(child_regions_large, child_regions_small)) 

1854 parent_region_large = lsst.sphgeom.ConvexPolygon( 

1855 list(itertools.chain.from_iterable(c.getVertices() for c in child_regions_large)) 

1856 ) 

1857 assert all(parent_region_large.contains(c) for c in child_regions_large) 

1858 assert parent_region_large.contains(parent_region_small) 

1859 assert not parent_region_small.contains(parent_region_large) 

1860 assert not all(parent_region_small.contains(c) for c in child_regions_large) 

1861 # Find some commonSkyPix indices that overlap the large regions but not 

1862 # overlap the small regions. We use commonSkyPix here to make sure the 

1863 # real tests later involve what's in the database, not just post-query 

1864 # filtering of regions. 

1865 child_difference_indices = [] 

1866 for large, small in zip(child_regions_large, child_regions_small): 

1867 difference = list(unpack_range_set(commonSkyPix.envelope(large) - commonSkyPix.envelope(small))) 

1868 assert difference, "if this is empty, we can't test anything useful with these regions" 

1869 assert all( 

1870 not commonSkyPix.triangle(d).isDisjointFrom(large) 

1871 and commonSkyPix.triangle(d).isDisjointFrom(small) 

1872 for d in difference 

1873 ) 

1874 child_difference_indices.append(difference) 

1875 parent_difference_indices = list( 

1876 unpack_range_set( 

1877 commonSkyPix.envelope(parent_region_large) - commonSkyPix.envelope(parent_region_small) 

1878 ) 

1879 ) 

1880 assert parent_difference_indices, "if this is empty, we can't test anything useful with these regions" 

1881 assert all( 

1882 ( 

1883 not commonSkyPix.triangle(d).isDisjointFrom(parent_region_large) 

1884 and commonSkyPix.triangle(d).isDisjointFrom(parent_region_small) 

1885 ) 

1886 for d in parent_difference_indices 

1887 ) 

1888 # Now that we've finally got those regions, we'll insert the large ones 

1889 # as tract/patch dimension records. 

1890 skymap_name = "testing_v1" 

1891 registry.insertDimensionData( 

1892 "skymap", 

1893 { 

1894 "name": skymap_name, 

1895 "hash": bytes([42]), 

1896 "tract_max": 1, 

1897 "patch_nx_max": 2, 

1898 "patch_ny_max": 2, 

1899 }, 

1900 ) 

1901 registry.insertDimensionData("tract", {"skymap": skymap_name, "id": 0, "region": parent_region_large}) 

1902 registry.insertDimensionData( 

1903 "patch", 

1904 *[ 

1905 {"skymap": skymap_name, "tract": 0, "id": n, "cell_x": n % 2, "cell_y": n // 2, "region": c} 

1906 for n, c in enumerate(child_regions_large) 

1907 ], 

1908 ) 

1909 # Add at dataset that uses these dimensions to make sure that modifying 

1910 # them doesn't disrupt foreign keys (need to make sure DB doesn't 

1911 # implement insert with replace=True as delete-then-insert). 

1912 dataset_type = DatasetType( 

1913 "coadd", 

1914 dimensions=["tract", "patch"], 

1915 universe=registry.dimensions, 

1916 storageClass="Exposure", 

1917 ) 

1918 registry.registerDatasetType(dataset_type) 

1919 registry.registerCollection("the_run", CollectionType.RUN) 

1920 registry.insertDatasets( 

1921 dataset_type, 

1922 [{"skymap": skymap_name, "tract": 0, "patch": 2}], 

1923 run="the_run", 

1924 ) 

1925 # Query for tracts and patches that overlap some "difference" htm9 

1926 # pixels; there should be overlaps, because the database has 

1927 # the "large" suite of regions. 

1928 self.assertEqual( 

1929 {0}, 

1930 { 

1931 data_id["tract"] 

1932 for data_id in registry.queryDataIds( 

1933 ["tract"], 

1934 skymap=skymap_name, 

1935 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]}, 

1936 ) 

1937 }, 

1938 ) 

1939 for patch_id, patch_difference_indices in enumerate(child_difference_indices): 

1940 self.assertIn( 

1941 patch_id, 

1942 { 

1943 data_id["patch"] 

1944 for data_id in registry.queryDataIds( 

1945 ["patch"], 

1946 skymap=skymap_name, 

1947 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]}, 

1948 ) 

1949 }, 

1950 ) 

1951 # Use sync to update the tract region and insert to update the regions 

1952 # of the patches, to the "small" suite. 

1953 updated = registry.syncDimensionData( 

1954 "tract", 

1955 {"skymap": skymap_name, "id": 0, "region": parent_region_small}, 

1956 update=True, 

1957 ) 

1958 self.assertEqual(updated, {"region": parent_region_large}) 

1959 registry.insertDimensionData( 

1960 "patch", 

1961 *[ 

1962 {"skymap": skymap_name, "tract": 0, "id": n, "cell_x": n % 2, "cell_y": n // 2, "region": c} 

1963 for n, c in enumerate(child_regions_small) 

1964 ], 

1965 replace=True, 

1966 ) 

1967 # Query again; there now should be no such overlaps, because the 

1968 # database has the "small" suite of regions. 

1969 self.assertFalse( 

1970 set( 

1971 registry.queryDataIds( 

1972 ["tract"], 

1973 skymap=skymap_name, 

1974 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]}, 

1975 ) 

1976 ) 

1977 ) 

1978 for patch_id, patch_difference_indices in enumerate(child_difference_indices): 

1979 self.assertNotIn( 

1980 patch_id, 

1981 { 

1982 data_id["patch"] 

1983 for data_id in registry.queryDataIds( 

1984 ["patch"], 

1985 skymap=skymap_name, 

1986 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]}, 

1987 ) 

1988 }, 

1989 ) 

1990 # Update back to the large regions and query one more time. 

1991 updated = registry.syncDimensionData( 

1992 "tract", 

1993 {"skymap": skymap_name, "id": 0, "region": parent_region_large}, 

1994 update=True, 

1995 ) 

1996 self.assertEqual(updated, {"region": parent_region_small}) 

1997 registry.insertDimensionData( 

1998 "patch", 

1999 *[ 

2000 {"skymap": skymap_name, "tract": 0, "id": n, "cell_x": n % 2, "cell_y": n // 2, "region": c} 

2001 for n, c in enumerate(child_regions_large) 

2002 ], 

2003 replace=True, 

2004 ) 

2005 self.assertEqual( 

2006 {0}, 

2007 { 

2008 data_id["tract"] 

2009 for data_id in registry.queryDataIds( 

2010 ["tract"], 

2011 skymap=skymap_name, 

2012 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]}, 

2013 ) 

2014 }, 

2015 ) 

2016 for patch_id, patch_difference_indices in enumerate(child_difference_indices): 

2017 self.assertIn( 

2018 patch_id, 

2019 { 

2020 data_id["patch"] 

2021 for data_id in registry.queryDataIds( 

2022 ["patch"], 

2023 skymap=skymap_name, 

2024 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]}, 

2025 ) 

2026 }, 

2027 ) 

2028 

2029 def testCalibrationCollections(self): 

2030 """Test operations on `~CollectionType.CALIBRATION` collections, 

2031 including `Registry.certify`, `Registry.decertify`, and 

2032 `Registry.findDataset`. 

2033 """ 

2034 # Setup - make a Registry, fill it with some datasets in 

2035 # non-calibration collections. 

2036 registry = self.makeRegistry() 

2037 self.loadData(registry, "base.yaml") 

2038 self.loadData(registry, "datasets.yaml") 

2039 # Set up some timestamps. 

2040 t1 = astropy.time.Time("2020-01-01T01:00:00", format="isot", scale="tai") 

2041 t2 = astropy.time.Time("2020-01-01T02:00:00", format="isot", scale="tai") 

2042 t3 = astropy.time.Time("2020-01-01T03:00:00", format="isot", scale="tai") 

2043 t4 = astropy.time.Time("2020-01-01T04:00:00", format="isot", scale="tai") 

2044 t5 = astropy.time.Time("2020-01-01T05:00:00", format="isot", scale="tai") 

2045 allTimespans = [ 

2046 Timespan(a, b) for a, b in itertools.combinations([None, t1, t2, t3, t4, t5, None], r=2) 

2047 ] 

2048 # Get references to some datasets. 

2049 bias2a = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g") 

2050 bias3a = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g") 

2051 bias2b = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r") 

2052 bias3b = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r") 

2053 # Register the main calibration collection we'll be working with. 

2054 collection = "Cam1/calibs/default" 

2055 registry.registerCollection(collection, type=CollectionType.CALIBRATION) 

2056 # Cannot associate into a calibration collection (no timespan). 

2057 with self.assertRaises(CollectionTypeError): 

2058 registry.associate(collection, [bias2a]) 

2059 # Certify 2a dataset with [t2, t4) validity. 

2060 registry.certify(collection, [bias2a], Timespan(begin=t2, end=t4)) 

2061 # Test that we can query for this dataset via the new collection, both 

2062 # on its own and with a RUN collection, as long as we don't try to join 

2063 # in temporal dimensions or use findFirst=True. 

2064 self.assertEqual( 

2065 set(registry.queryDatasets("bias", findFirst=False, collections=collection)), 

2066 {bias2a}, 

2067 ) 

2068 self.assertEqual( 

2069 set(registry.queryDatasets("bias", findFirst=False, collections=[collection, "imported_r"])), 

2070 { 

2071 bias2a, 

2072 bias2b, 

2073 bias3b, 

2074 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

2075 }, 

2076 ) 

2077 self.assertEqual( 

2078 set(registry.queryDataIds("detector", datasets="bias", collections=collection)), 

2079 {registry.expandDataId(instrument="Cam1", detector=2)}, 

2080 ) 

2081 self.assertEqual( 

2082 set(registry.queryDataIds("detector", datasets="bias", collections=[collection, "imported_r"])), 

2083 { 

2084 registry.expandDataId(instrument="Cam1", detector=2), 

2085 registry.expandDataId(instrument="Cam1", detector=3), 

2086 registry.expandDataId(instrument="Cam1", detector=4), 

2087 }, 

2088 ) 

2089 

2090 # We should not be able to certify 2b with anything overlapping that 

2091 # window. 

2092 with self.assertRaises(ConflictingDefinitionError): 

2093 registry.certify(collection, [bias2b], Timespan(begin=None, end=t3)) 

2094 with self.assertRaises(ConflictingDefinitionError): 

2095 registry.certify(collection, [bias2b], Timespan(begin=None, end=t5)) 

2096 with self.assertRaises(ConflictingDefinitionError): 

2097 registry.certify(collection, [bias2b], Timespan(begin=t1, end=t3)) 

2098 with self.assertRaises(ConflictingDefinitionError): 

2099 registry.certify(collection, [bias2b], Timespan(begin=t1, end=t5)) 

2100 with self.assertRaises(ConflictingDefinitionError): 

2101 registry.certify(collection, [bias2b], Timespan(begin=t1, end=None)) 

2102 with self.assertRaises(ConflictingDefinitionError): 

2103 registry.certify(collection, [bias2b], Timespan(begin=t2, end=t3)) 

2104 with self.assertRaises(ConflictingDefinitionError): 

2105 registry.certify(collection, [bias2b], Timespan(begin=t2, end=t5)) 

2106 with self.assertRaises(ConflictingDefinitionError): 

2107 registry.certify(collection, [bias2b], Timespan(begin=t2, end=None)) 

2108 # We should be able to certify 3a with a range overlapping that window, 

2109 # because it's for a different detector. 

2110 # We'll certify 3a over [t1, t3). 

2111 registry.certify(collection, [bias3a], Timespan(begin=t1, end=t3)) 

2112 # Now we'll certify 2b and 3b together over [t4, ∞). 

2113 registry.certify(collection, [bias2b, bias3b], Timespan(begin=t4, end=None)) 

2114 

2115 # Fetch all associations and check that they are what we expect. 

2116 self.assertCountEqual( 

2117 list( 

2118 registry.queryDatasetAssociations( 

2119 "bias", 

2120 collections=[collection, "imported_g", "imported_r"], 

2121 ) 

2122 ), 

2123 [ 

2124 DatasetAssociation( 

2125 ref=registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"), 

2126 collection="imported_g", 

2127 timespan=None, 

2128 ), 

2129 DatasetAssociation( 

2130 ref=registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

2131 collection="imported_r", 

2132 timespan=None, 

2133 ), 

2134 DatasetAssociation(ref=bias2a, collection="imported_g", timespan=None), 

2135 DatasetAssociation(ref=bias3a, collection="imported_g", timespan=None), 

2136 DatasetAssociation(ref=bias2b, collection="imported_r", timespan=None), 

2137 DatasetAssociation(ref=bias3b, collection="imported_r", timespan=None), 

2138 DatasetAssociation(ref=bias2a, collection=collection, timespan=Timespan(begin=t2, end=t4)), 

2139 DatasetAssociation(ref=bias3a, collection=collection, timespan=Timespan(begin=t1, end=t3)), 

2140 DatasetAssociation(ref=bias2b, collection=collection, timespan=Timespan(begin=t4, end=None)), 

2141 DatasetAssociation(ref=bias3b, collection=collection, timespan=Timespan(begin=t4, end=None)), 

2142 ], 

2143 ) 

2144 

2145 class Ambiguous: 

2146 """Tag class to denote lookups that should be ambiguous.""" 

2147 

2148 pass 

2149 

2150 def assertLookup( 

2151 detector: int, timespan: Timespan, expected: Optional[Union[DatasetRef, Type[Ambiguous]]] 

2152 ) -> None: 

2153 """Local function that asserts that a bias lookup returns the given 

2154 expected result. 

2155 """ 

2156 if expected is Ambiguous: 

2157 with self.assertRaises((DatasetTypeError, LookupError)): 

2158 registry.findDataset( 

2159 "bias", 

2160 collections=collection, 

2161 instrument="Cam1", 

2162 detector=detector, 

2163 timespan=timespan, 

2164 ) 

2165 else: 

2166 self.assertEqual( 

2167 expected, 

2168 registry.findDataset( 

2169 "bias", 

2170 collections=collection, 

2171 instrument="Cam1", 

2172 detector=detector, 

2173 timespan=timespan, 

2174 ), 

2175 ) 

2176 

2177 # Systematically test lookups against expected results. 

2178 assertLookup(detector=2, timespan=Timespan(None, t1), expected=None) 

2179 assertLookup(detector=2, timespan=Timespan(None, t2), expected=None) 

2180 assertLookup(detector=2, timespan=Timespan(None, t3), expected=bias2a) 

2181 assertLookup(detector=2, timespan=Timespan(None, t4), expected=bias2a) 

2182 assertLookup(detector=2, timespan=Timespan(None, t5), expected=Ambiguous) 

2183 assertLookup(detector=2, timespan=Timespan(None, None), expected=Ambiguous) 

2184 assertLookup(detector=2, timespan=Timespan(t1, t2), expected=None) 

2185 assertLookup(detector=2, timespan=Timespan(t1, t3), expected=bias2a) 

2186 assertLookup(detector=2, timespan=Timespan(t1, t4), expected=bias2a) 

2187 assertLookup(detector=2, timespan=Timespan(t1, t5), expected=Ambiguous) 

2188 assertLookup(detector=2, timespan=Timespan(t1, None), expected=Ambiguous) 

2189 assertLookup(detector=2, timespan=Timespan(t2, t3), expected=bias2a) 

2190 assertLookup(detector=2, timespan=Timespan(t2, t4), expected=bias2a) 

2191 assertLookup(detector=2, timespan=Timespan(t2, t5), expected=Ambiguous) 

2192 assertLookup(detector=2, timespan=Timespan(t2, None), expected=Ambiguous) 

2193 assertLookup(detector=2, timespan=Timespan(t3, t4), expected=bias2a) 

2194 assertLookup(detector=2, timespan=Timespan(t3, t5), expected=Ambiguous) 

2195 assertLookup(detector=2, timespan=Timespan(t3, None), expected=Ambiguous) 

2196 assertLookup(detector=2, timespan=Timespan(t4, t5), expected=bias2b) 

2197 assertLookup(detector=2, timespan=Timespan(t4, None), expected=bias2b) 

2198 assertLookup(detector=2, timespan=Timespan(t5, None), expected=bias2b) 

2199 assertLookup(detector=3, timespan=Timespan(None, t1), expected=None) 

2200 assertLookup(detector=3, timespan=Timespan(None, t2), expected=bias3a) 

2201 assertLookup(detector=3, timespan=Timespan(None, t3), expected=bias3a) 

2202 assertLookup(detector=3, timespan=Timespan(None, t4), expected=bias3a) 

2203 assertLookup(detector=3, timespan=Timespan(None, t5), expected=Ambiguous) 

2204 assertLookup(detector=3, timespan=Timespan(None, None), expected=Ambiguous) 

2205 assertLookup(detector=3, timespan=Timespan(t1, t2), expected=bias3a) 

2206 assertLookup(detector=3, timespan=Timespan(t1, t3), expected=bias3a) 

2207 assertLookup(detector=3, timespan=Timespan(t1, t4), expected=bias3a) 

2208 assertLookup(detector=3, timespan=Timespan(t1, t5), expected=Ambiguous) 

2209 assertLookup(detector=3, timespan=Timespan(t1, None), expected=Ambiguous) 

2210 assertLookup(detector=3, timespan=Timespan(t2, t3), expected=bias3a) 

2211 assertLookup(detector=3, timespan=Timespan(t2, t4), expected=bias3a) 

2212 assertLookup(detector=3, timespan=Timespan(t2, t5), expected=Ambiguous) 

2213 assertLookup(detector=3, timespan=Timespan(t2, None), expected=Ambiguous) 

2214 assertLookup(detector=3, timespan=Timespan(t3, t4), expected=None) 

2215 assertLookup(detector=3, timespan=Timespan(t3, t5), expected=bias3b) 

2216 assertLookup(detector=3, timespan=Timespan(t3, None), expected=bias3b) 

2217 assertLookup(detector=3, timespan=Timespan(t4, t5), expected=bias3b) 

2218 assertLookup(detector=3, timespan=Timespan(t4, None), expected=bias3b) 

2219 assertLookup(detector=3, timespan=Timespan(t5, None), expected=bias3b) 

2220 

2221 # Decertify [t3, t5) for all data IDs, and do test lookups again. 

2222 # This should truncate bias2a to [t2, t3), leave bias3a unchanged at 

2223 # [t1, t3), and truncate bias2b and bias3b to [t5, ∞). 

2224 registry.decertify(collection=collection, datasetType="bias", timespan=Timespan(t3, t5)) 

2225 assertLookup(detector=2, timespan=Timespan(None, t1), expected=None) 

2226 assertLookup(detector=2, timespan=Timespan(None, t2), expected=None) 

2227 assertLookup(detector=2, timespan=Timespan(None, t3), expected=bias2a) 

2228 assertLookup(detector=2, timespan=Timespan(None, t4), expected=bias2a) 

2229 assertLookup(detector=2, timespan=Timespan(None, t5), expected=bias2a) 

2230 assertLookup(detector=2, timespan=Timespan(None, None), expected=Ambiguous) 

2231 assertLookup(detector=2, timespan=Timespan(t1, t2), expected=None) 

2232 assertLookup(detector=2, timespan=Timespan(t1, t3), expected=bias2a) 

2233 assertLookup(detector=2, timespan=Timespan(t1, t4), expected=bias2a) 

2234 assertLookup(detector=2, timespan=Timespan(t1, t5), expected=bias2a) 

2235 assertLookup(detector=2, timespan=Timespan(t1, None), expected=Ambiguous) 

2236 assertLookup(detector=2, timespan=Timespan(t2, t3), expected=bias2a) 

2237 assertLookup(detector=2, timespan=Timespan(t2, t4), expected=bias2a) 

2238 assertLookup(detector=2, timespan=Timespan(t2, t5), expected=bias2a) 

2239 assertLookup(detector=2, timespan=Timespan(t2, None), expected=Ambiguous) 

2240 assertLookup(detector=2, timespan=Timespan(t3, t4), expected=None) 

2241 assertLookup(detector=2, timespan=Timespan(t3, t5), expected=None) 

2242 assertLookup(detector=2, timespan=Timespan(t3, None), expected=bias2b) 

2243 assertLookup(detector=2, timespan=Timespan(t4, t5), expected=None) 

2244 assertLookup(detector=2, timespan=Timespan(t4, None), expected=bias2b) 

2245 assertLookup(detector=2, timespan=Timespan(t5, None), expected=bias2b) 

2246 assertLookup(detector=3, timespan=Timespan(None, t1), expected=None) 

2247 assertLookup(detector=3, timespan=Timespan(None, t2), expected=bias3a) 

2248 assertLookup(detector=3, timespan=Timespan(None, t3), expected=bias3a) 

2249 assertLookup(detector=3, timespan=Timespan(None, t4), expected=bias3a) 

2250 assertLookup(detector=3, timespan=Timespan(None, t5), expected=bias3a) 

2251 assertLookup(detector=3, timespan=Timespan(None, None), expected=Ambiguous) 

2252 assertLookup(detector=3, timespan=Timespan(t1, t2), expected=bias3a) 

2253 assertLookup(detector=3, timespan=Timespan(t1, t3), expected=bias3a) 

2254 assertLookup(detector=3, timespan=Timespan(t1, t4), expected=bias3a) 

2255 assertLookup(detector=3, timespan=Timespan(t1, t5), expected=bias3a) 

2256 assertLookup(detector=3, timespan=Timespan(t1, None), expected=Ambiguous) 

2257 assertLookup(detector=3, timespan=Timespan(t2, t3), expected=bias3a) 

2258 assertLookup(detector=3, timespan=Timespan(t2, t4), expected=bias3a) 

2259 assertLookup(detector=3, timespan=Timespan(t2, t5), expected=bias3a) 

2260 assertLookup(detector=3, timespan=Timespan(t2, None), expected=Ambiguous) 

2261 assertLookup(detector=3, timespan=Timespan(t3, t4), expected=None) 

2262 assertLookup(detector=3, timespan=Timespan(t3, t5), expected=None) 

2263 assertLookup(detector=3, timespan=Timespan(t3, None), expected=bias3b) 

2264 assertLookup(detector=3, timespan=Timespan(t4, t5), expected=None) 

2265 assertLookup(detector=3, timespan=Timespan(t4, None), expected=bias3b) 

2266 assertLookup(detector=3, timespan=Timespan(t5, None), expected=bias3b) 

2267 

2268 # Decertify everything, this time with explicit data IDs, then check 

2269 # that no lookups succeed. 

2270 registry.decertify( 

2271 collection, 

2272 "bias", 

2273 Timespan(None, None), 

2274 dataIds=[ 

2275 dict(instrument="Cam1", detector=2), 

2276 dict(instrument="Cam1", detector=3), 

2277 ], 

2278 ) 

2279 for detector in (2, 3): 

2280 for timespan in allTimespans: 

2281 assertLookup(detector=detector, timespan=timespan, expected=None) 

2282 # Certify bias2a and bias3a over (-∞, ∞), check that all lookups return 

2283 # those. 

2284 registry.certify( 

2285 collection, 

2286 [bias2a, bias3a], 

2287 Timespan(None, None), 

2288 ) 

2289 for timespan in allTimespans: 

2290 assertLookup(detector=2, timespan=timespan, expected=bias2a) 

2291 assertLookup(detector=3, timespan=timespan, expected=bias3a) 

2292 # Decertify just bias2 over [t2, t4). 

2293 # This should split a single certification row into two (and leave the 

2294 # other existing row, for bias3a, alone). 

2295 registry.decertify( 

2296 collection, "bias", Timespan(t2, t4), dataIds=[dict(instrument="Cam1", detector=2)] 

2297 ) 

2298 for timespan in allTimespans: 

2299 assertLookup(detector=3, timespan=timespan, expected=bias3a) 

2300 overlapsBefore = timespan.overlaps(Timespan(None, t2)) 

2301 overlapsAfter = timespan.overlaps(Timespan(t4, None)) 

2302 if overlapsBefore and overlapsAfter: 

2303 expected = Ambiguous 

2304 elif overlapsBefore or overlapsAfter: 

2305 expected = bias2a 

2306 else: 

2307 expected = None 

2308 assertLookup(detector=2, timespan=timespan, expected=expected) 

2309 

2310 def testSkipCalibs(self): 

2311 """Test how queries handle skipping of calibration collections.""" 

2312 registry = self.makeRegistry() 

2313 self.loadData(registry, "base.yaml") 

2314 self.loadData(registry, "datasets.yaml") 

2315 

2316 coll_calib = "Cam1/calibs/default" 

2317 registry.registerCollection(coll_calib, type=CollectionType.CALIBRATION) 

2318 

2319 # Add all biases to the calibration collection. 

2320 # Without this, the logic that prunes dataset subqueries based on 

2321 # datasetType-collection summary information will fire before the logic 

2322 # we want to test below. This is a good thing (it avoids the dreaded 

2323 # NotImplementedError a bit more often) everywhere but here. 

2324 registry.certify(coll_calib, registry.queryDatasets("bias", collections=...), Timespan(None, None)) 

2325 

2326 coll_list = [coll_calib, "imported_g", "imported_r"] 

2327 chain = "Cam1/chain" 

2328 registry.registerCollection(chain, type=CollectionType.CHAINED) 

2329 registry.setCollectionChain(chain, coll_list) 

2330 

2331 # explicit list will raise if findFirst=True or there are temporal 

2332 # dimensions 

2333 with self.assertRaises(NotImplementedError): 

2334 registry.queryDatasets("bias", collections=coll_list, findFirst=True) 

2335 with self.assertRaises(NotImplementedError): 

2336 registry.queryDataIds( 

2337 ["instrument", "detector", "exposure"], datasets="bias", collections=coll_list 

2338 ).count() 

2339 

2340 # chain will skip 

2341 datasets = list(registry.queryDatasets("bias", collections=chain)) 

2342 self.assertGreater(len(datasets), 0) 

2343 

2344 dataIds = list(registry.queryDataIds(["instrument", "detector"], datasets="bias", collections=chain)) 

2345 self.assertGreater(len(dataIds), 0) 

2346 

2347 # glob will skip too 

2348 datasets = list(registry.queryDatasets("bias", collections="*d*")) 

2349 self.assertGreater(len(datasets), 0) 

2350 

2351 # regular expression will skip too 

2352 pattern = re.compile(".*") 

2353 datasets = list(registry.queryDatasets("bias", collections=pattern)) 

2354 self.assertGreater(len(datasets), 0) 

2355 

2356 # ellipsis should work as usual 

2357 datasets = list(registry.queryDatasets("bias", collections=...)) 

2358 self.assertGreater(len(datasets), 0) 

2359 

2360 # few tests with findFirst 

2361 datasets = list(registry.queryDatasets("bias", collections=chain, findFirst=True)) 

2362 self.assertGreater(len(datasets), 0) 

2363 

2364 def testIngestTimeQuery(self): 

2365 

2366 registry = self.makeRegistry() 

2367 self.loadData(registry, "base.yaml") 

2368 dt0 = datetime.utcnow() 

2369 self.loadData(registry, "datasets.yaml") 

2370 dt1 = datetime.utcnow() 

2371 

2372 datasets = list(registry.queryDatasets(..., collections=...)) 

2373 len0 = len(datasets) 

2374 self.assertGreater(len0, 0) 

2375 

2376 where = "ingest_date > T'2000-01-01'" 

2377 datasets = list(registry.queryDatasets(..., collections=..., where=where)) 

2378 len1 = len(datasets) 

2379 self.assertEqual(len0, len1) 

2380 

2381 # no one will ever use this piece of software in 30 years 

2382 where = "ingest_date > T'2050-01-01'" 

2383 datasets = list(registry.queryDatasets(..., collections=..., where=where)) 

2384 len2 = len(datasets) 

2385 self.assertEqual(len2, 0) 

2386 

2387 # Check more exact timing to make sure there is no 37 seconds offset 

2388 # (after fixing DM-30124). SQLite time precision is 1 second, make 

2389 # sure that we don't test with higher precision. 

2390 tests = [ 

2391 # format: (timestamp, operator, expected_len) 

2392 (dt0 - timedelta(seconds=1), ">", len0), 

2393 (dt0 - timedelta(seconds=1), "<", 0), 

2394 (dt1 + timedelta(seconds=1), "<", len0), 

2395 (dt1 + timedelta(seconds=1), ">", 0), 

2396 ] 

2397 for dt, op, expect_len in tests: 

2398 dt_str = dt.isoformat(sep=" ") 

2399 

2400 where = f"ingest_date {op} T'{dt_str}'" 

2401 datasets = list(registry.queryDatasets(..., collections=..., where=where)) 

2402 self.assertEqual(len(datasets), expect_len) 

2403 

2404 # same with bind using datetime or astropy Time 

2405 where = f"ingest_date {op} ingest_time" 

2406 datasets = list( 

2407 registry.queryDatasets(..., collections=..., where=where, bind={"ingest_time": dt}) 

2408 ) 

2409 self.assertEqual(len(datasets), expect_len) 

2410 

2411 dt_astropy = astropy.time.Time(dt, format="datetime") 

2412 datasets = list( 

2413 registry.queryDatasets(..., collections=..., where=where, bind={"ingest_time": dt_astropy}) 

2414 ) 

2415 self.assertEqual(len(datasets), expect_len) 

2416 

2417 def testTimespanQueries(self): 

2418 """Test query expressions involving timespans.""" 

2419 registry = self.makeRegistry() 

2420 self.loadData(registry, "hsc-rc2-subset.yaml") 

2421 # All exposures in the database; mapping from ID to timespan. 

2422 visits = {record.id: record.timespan for record in registry.queryDimensionRecords("visit")} 

2423 # Just those IDs, sorted (which is also temporal sorting, because HSC 

2424 # exposure IDs are monotonically increasing). 

2425 ids = sorted(visits.keys()) 

2426 self.assertGreater(len(ids), 20) 

2427 # Pick some quasi-random indexes into `ids` to play with. 

2428 i1 = int(len(ids) * 0.1) 

2429 i2 = int(len(ids) * 0.3) 

2430 i3 = int(len(ids) * 0.6) 

2431 i4 = int(len(ids) * 0.8) 

2432 # Extract some times from those: just before the beginning of i1 (which 

2433 # should be after the end of the exposure before), exactly the 

2434 # beginning of i2, just after the beginning of i3 (and before its end), 

2435 # and the exact end of i4. 

2436 t1 = visits[ids[i1]].begin - astropy.time.TimeDelta(1.0, format="sec") 

2437 self.assertGreater(t1, visits[ids[i1 - 1]].end) 

2438 t2 = visits[ids[i2]].begin 

2439 t3 = visits[ids[i3]].begin + astropy.time.TimeDelta(1.0, format="sec") 

2440 self.assertLess(t3, visits[ids[i3]].end) 

2441 t4 = visits[ids[i4]].end 

2442 # Make sure those are actually in order. 

2443 self.assertEqual([t1, t2, t3, t4], sorted([t4, t3, t2, t1])) 

2444 

2445 bind = { 

2446 "t1": t1, 

2447 "t2": t2, 

2448 "t3": t3, 

2449 "t4": t4, 

2450 "ts23": Timespan(t2, t3), 

2451 } 

2452 

2453 def query(where): 

2454 """Helper function that queries for visit data IDs and returns 

2455 results as a sorted, deduplicated list of visit IDs. 

2456 """ 

2457 return sorted( 

2458 { 

2459 dataId["visit"] 

2460 for dataId in registry.queryDataIds("visit", instrument="HSC", bind=bind, where=where) 

2461 } 

2462 ) 

2463 

2464 # Try a bunch of timespan queries, mixing up the bounds themselves, 

2465 # where they appear in the expression, and how we get the timespan into 

2466 # the expression. 

2467 

2468 # t1 is before the start of i1, so this should not include i1. 

2469 self.assertEqual(ids[:i1], query("visit.timespan OVERLAPS (null, t1)")) 

2470 # t2 is exactly at the start of i2, but ends are exclusive, so these 

2471 # should not include i2. 

2472 self.assertEqual(ids[i1:i2], query("(t1, t2) OVERLAPS visit.timespan")) 

2473 self.assertEqual(ids[:i2], query("visit.timespan < (t2, t4)")) 

2474 # t3 is in the middle of i3, so this should include i3. 

2475 self.assertEqual(ids[i2 : i3 + 1], query("visit.timespan OVERLAPS ts23")) 

2476 # This one should not include t3 by the same reasoning. 

2477 self.assertEqual(ids[i3 + 1 :], query("visit.timespan > (t1, t3)")) 

2478 # t4 is exactly at the end of i4, so this should include i4. 

2479 self.assertEqual(ids[i3 : i4 + 1], query(f"visit.timespan OVERLAPS (T'{t3.tai.isot}', t4)")) 

2480 # i4's upper bound of t4 is exclusive so this should not include t4. 

2481 self.assertEqual(ids[i4 + 1 :], query("visit.timespan OVERLAPS (t4, NULL)")) 

2482 

2483 # Now some timespan vs. time scalar queries. 

2484 self.assertEqual(ids[:i2], query("visit.timespan < t2")) 

2485 self.assertEqual(ids[:i2], query("t2 > visit.timespan")) 

2486 self.assertEqual(ids[i3 + 1 :], query("visit.timespan > t3")) 

2487 self.assertEqual(ids[i3 + 1 :], query("t3 < visit.timespan")) 

2488 self.assertEqual(ids[i3 : i3 + 1], query("visit.timespan OVERLAPS t3")) 

2489 self.assertEqual(ids[i3 : i3 + 1], query(f"T'{t3.tai.isot}' OVERLAPS visit.timespan")) 

2490 

2491 # Empty timespans should not overlap anything. 

2492 self.assertEqual([], query("visit.timespan OVERLAPS (t3, t2)")) 

2493 

2494 def testCollectionSummaries(self): 

2495 """Test recording and retrieval of collection summaries.""" 

2496 self.maxDiff = None 

2497 registry = self.makeRegistry() 

2498 # Importing datasets from yaml should go through the code path where 

2499 # we update collection summaries as we insert datasets. 

2500 self.loadData(registry, "base.yaml") 

2501 self.loadData(registry, "datasets.yaml") 

2502 flat = registry.getDatasetType("flat") 

2503 expected1 = CollectionSummary() 

2504 expected1.dataset_types.add(registry.getDatasetType("bias")) 

2505 expected1.add_data_ids( 

2506 flat, [DataCoordinate.standardize(instrument="Cam1", universe=registry.dimensions)] 

2507 ) 

2508 self.assertEqual(registry.getCollectionSummary("imported_g"), expected1) 

2509 self.assertEqual(registry.getCollectionSummary("imported_r"), expected1) 

2510 # Create a chained collection with both of the imported runs; the 

2511 # summary should be the same, because it's a union with itself. 

2512 chain = "chain" 

2513 registry.registerCollection(chain, CollectionType.CHAINED) 

2514 registry.setCollectionChain(chain, ["imported_r", "imported_g"]) 

2515 self.assertEqual(registry.getCollectionSummary(chain), expected1) 

2516 # Associate flats only into a tagged collection and a calibration 

2517 # collection to check summaries of those. 

2518 tag = "tag" 

2519 registry.registerCollection(tag, CollectionType.TAGGED) 

2520 registry.associate(tag, registry.queryDatasets(flat, collections="imported_g")) 

2521 calibs = "calibs" 

2522 registry.registerCollection(calibs, CollectionType.CALIBRATION) 

2523 registry.certify( 

2524 calibs, registry.queryDatasets(flat, collections="imported_g"), timespan=Timespan(None, None) 

2525 ) 

2526 expected2 = expected1.copy() 

2527 expected2.dataset_types.discard("bias") 

2528 self.assertEqual(registry.getCollectionSummary(tag), expected2) 

2529 self.assertEqual(registry.getCollectionSummary(calibs), expected2) 

2530 # Explicitly calling Registry.refresh() should load those same 

2531 # summaries, via a totally different code path. 

2532 registry.refresh() 

2533 self.assertEqual(registry.getCollectionSummary("imported_g"), expected1) 

2534 self.assertEqual(registry.getCollectionSummary("imported_r"), expected1) 

2535 self.assertEqual(registry.getCollectionSummary(tag), expected2) 

2536 self.assertEqual(registry.getCollectionSummary(calibs), expected2) 

2537 

2538 def testBindInQueryDatasets(self): 

2539 """Test that the bind parameter is correctly forwarded in 

2540 queryDatasets recursion. 

2541 """ 

2542 registry = self.makeRegistry() 

2543 # Importing datasets from yaml should go through the code path where 

2544 # we update collection summaries as we insert datasets. 

2545 self.loadData(registry, "base.yaml") 

2546 self.loadData(registry, "datasets.yaml") 

2547 self.assertEqual( 

2548 set(registry.queryDatasets("flat", band="r", collections=...)), 

2549 set(registry.queryDatasets("flat", where="band=my_band", bind={"my_band": "r"}, collections=...)), 

2550 ) 

2551 

2552 def testQueryIntRangeExpressions(self): 

2553 """Test integer range expressions in ``where`` arguments. 

2554 

2555 Note that our expressions use inclusive stop values, unlike Python's. 

2556 """ 

2557 registry = self.makeRegistry() 

2558 self.loadData(registry, "base.yaml") 

2559 self.assertEqual( 

2560 set(registry.queryDataIds(["detector"], instrument="Cam1", where="detector IN (1..2)")), 

2561 {registry.expandDataId(instrument="Cam1", detector=n) for n in [1, 2]}, 

2562 ) 

2563 self.assertEqual( 

2564 set(registry.queryDataIds(["detector"], instrument="Cam1", where="detector IN (1..4:2)")), 

2565 {registry.expandDataId(instrument="Cam1", detector=n) for n in [1, 3]}, 

2566 ) 

2567 self.assertEqual( 

2568 set(registry.queryDataIds(["detector"], instrument="Cam1", where="detector IN (2..4:2)")), 

2569 {registry.expandDataId(instrument="Cam1", detector=n) for n in [2, 4]}, 

2570 ) 

2571 

2572 def testQueryResultSummaries(self): 

2573 """Test summary methods like `count`, `any`, and `explain_no_results` 

2574 on `DataCoordinateQueryResults` and `DatasetQueryResults` 

2575 """ 

2576 registry = self.makeRegistry() 

2577 self.loadData(registry, "base.yaml") 

2578 self.loadData(registry, "datasets.yaml") 

2579 self.loadData(registry, "spatial.yaml") 

2580 # Default test dataset has two collections, each with both flats and 

2581 # biases. Add a new collection with only biases. 

2582 registry.registerCollection("biases", CollectionType.TAGGED) 

2583 registry.associate("biases", registry.queryDatasets("bias", collections=["imported_g"])) 

2584 # First query yields two results, and involves no postprocessing. 

2585 query1 = registry.queryDataIds(["physical_filter"], band="r") 

2586 self.assertTrue(query1.any(execute=False, exact=False)) 

2587 self.assertTrue(query1.any(execute=True, exact=False)) 

2588 self.assertTrue(query1.any(execute=True, exact=True)) 

2589 self.assertEqual(query1.count(exact=False), 2) 

2590 self.assertEqual(query1.count(exact=True), 2) 

2591 self.assertFalse(list(query1.explain_no_results())) 

2592 # Second query should yield no results, which we should see when 

2593 # we attempt to expand the data ID. 

2594 query2 = registry.queryDataIds(["physical_filter"], band="h") 

2595 # There's no execute=False, exact=Fals test here because the behavior 

2596 # not something we want to guarantee in this case (and exact=False 

2597 # says either answer is legal). 

2598 self.assertFalse(query2.any(execute=True, exact=False)) 

2599 self.assertFalse(query2.any(execute=True, exact=True)) 

2600 self.assertEqual(query2.count(exact=False), 0) 

2601 self.assertEqual(query2.count(exact=True), 0) 

2602 self.assertTrue(list(query2.explain_no_results())) 

2603 # These queries yield no results due to various problems that can be 

2604 # spotted prior to execution, yielding helpful diagnostics. 

2605 base_query = registry.queryDataIds(["detector", "physical_filter"]) 

2606 queries_and_snippets = [ 

2607 ( 

2608 # Dataset type name doesn't match any existing dataset types. 

2609 registry.queryDatasets("nonexistent", collections=...), 

2610 ["nonexistent"], 

2611 ), 

2612 ( 

2613 # Dataset type object isn't registered. 

2614 registry.queryDatasets( 

2615 DatasetType( 

2616 "nonexistent", 

2617 dimensions=["instrument"], 

2618 universe=registry.dimensions, 

2619 storageClass="Image", 

2620 ), 

2621 collections=..., 

2622 ), 

2623 ["nonexistent"], 

2624 ), 

2625 ( 

2626 # No datasets of this type in this collection. 

2627 registry.queryDatasets("flat", collections=["biases"]), 

2628 ["flat", "biases"], 

2629 ), 

2630 ( 

2631 # No datasets of this type in this collection. 

2632 base_query.findDatasets("flat", collections=["biases"]), 

2633 ["flat", "biases"], 

2634 ), 

2635 ( 

2636 # No collections matching at all. 

2637 registry.queryDatasets("flat", collections=re.compile("potato.+")), 

2638 ["potato"], 

2639 ), 

2640 ] 

2641 # The behavior of these additional queries is slated to change in the 

2642 # future, so we also check for deprecation warnings. 

2643 with self.assertWarns(FutureWarning): 

2644 queries_and_snippets.append( 

2645 ( 

2646 # Dataset type name doesn't match any existing dataset 

2647 # types. 

2648 registry.queryDataIds(["detector"], datasets=["nonexistent"], collections=...), 

2649 ["nonexistent"], 

2650 ) 

2651 ) 

2652 with self.assertWarns(FutureWarning): 

2653 queries_and_snippets.append( 

2654 ( 

2655 # Dataset type name doesn't match any existing dataset 

2656 # types. 

2657 registry.queryDimensionRecords("detector", datasets=["nonexistent"], collections=...), 

2658 ["nonexistent"], 

2659 ) 

2660 ) 

2661 for query, snippets in queries_and_snippets: 

2662 self.assertFalse(query.any(execute=False, exact=False)) 

2663 self.assertFalse(query.any(execute=True, exact=False)) 

2664 self.assertFalse(query.any(execute=True, exact=True)) 

2665 self.assertEqual(query.count(exact=False), 0) 

2666 self.assertEqual(query.count(exact=True), 0) 

2667 messages = list(query.explain_no_results()) 

2668 self.assertTrue(messages) 

2669 # Want all expected snippets to appear in at least one message. 

2670 self.assertTrue( 

2671 any( 

2672 all(snippet in message for snippet in snippets) for message in query.explain_no_results() 

2673 ), 

2674 messages, 

2675 ) 

2676 

2677 # This query does yield results, but should also emit a warning because 

2678 # dataset type patterns to queryDataIds is deprecated; just look for 

2679 # the warning. 

2680 with self.assertWarns(FutureWarning): 

2681 registry.queryDataIds(["detector"], datasets=re.compile("^nonexistent$"), collections=...) 

2682 

2683 # These queries yield no results due to problems that can be identified 

2684 # by cheap follow-up queries, yielding helpful diagnostics. 

2685 for query, snippets in [ 

2686 ( 

2687 # No records for one of the involved dimensions. 

2688 registry.queryDataIds(["subfilter"]), 

2689 ["no rows", "subfilter"], 

2690 ), 

2691 ( 

2692 # No records for one of the involved dimensions. 

2693 registry.queryDimensionRecords("subfilter"), 

2694 ["no rows", "subfilter"], 

2695 ), 

2696 ]: 

2697 self.assertFalse(query.any(execute=True, exact=False)) 

2698 self.assertFalse(query.any(execute=True, exact=True)) 

2699 self.assertEqual(query.count(exact=True), 0) 

2700 messages = list(query.explain_no_results()) 

2701 self.assertTrue(messages) 

2702 # Want all expected snippets to appear in at least one message. 

2703 self.assertTrue( 

2704 any( 

2705 all(snippet in message for snippet in snippets) for message in query.explain_no_results() 

2706 ), 

2707 messages, 

2708 ) 

2709 

2710 # This query yields four overlaps in the database, but one is filtered 

2711 # out in postprocessing. The count queries aren't accurate because 

2712 # they don't account for duplication that happens due to an internal 

2713 # join against commonSkyPix. 

2714 query3 = registry.queryDataIds(["visit", "tract"], instrument="Cam1", skymap="SkyMap1") 

2715 self.assertEqual( 

2716 { 

2717 DataCoordinate.standardize( 

2718 instrument="Cam1", 

2719 skymap="SkyMap1", 

2720 visit=v, 

2721 tract=t, 

2722 universe=registry.dimensions, 

2723 ) 

2724 for v, t in [(1, 0), (2, 0), (2, 1)] 

2725 }, 

2726 set(query3), 

2727 ) 

2728 self.assertTrue(query3.any(execute=False, exact=False)) 

2729 self.assertTrue(query3.any(execute=True, exact=False)) 

2730 self.assertTrue(query3.any(execute=True, exact=True)) 

2731 self.assertGreaterEqual(query3.count(exact=False), 4) 

2732 self.assertGreaterEqual(query3.count(exact=True, discard=True), 3) 

2733 self.assertFalse(list(query3.explain_no_results())) 

2734 # This query yields overlaps in the database, but all are filtered 

2735 # out in postprocessing. The count queries again aren't very useful. 

2736 # We have to use `where=` here to avoid an optimization that 

2737 # (currently) skips the spatial postprocess-filtering because it 

2738 # recognizes that no spatial join is necessary. That's not ideal, but 

2739 # fixing it is out of scope for this ticket. 

2740 query4 = registry.queryDataIds( 

2741 ["visit", "tract"], 

2742 instrument="Cam1", 

2743 skymap="SkyMap1", 

2744 where="visit=1 AND detector=1 AND tract=0 AND patch=4", 

2745 ) 

2746 self.assertFalse(set(query4)) 

2747 self.assertTrue(query4.any(execute=False, exact=False)) 

2748 self.assertTrue(query4.any(execute=True, exact=False)) 

2749 self.assertFalse(query4.any(execute=True, exact=True)) 

2750 self.assertGreaterEqual(query4.count(exact=False), 1) 

2751 self.assertEqual(query4.count(exact=True, discard=True), 0) 

2752 messages = query4.explain_no_results() 

2753 self.assertTrue(messages) 

2754 self.assertTrue(any("overlap" in message for message in messages)) 

2755 # This query should yield results from one dataset type but not the 

2756 # other, which is not registered. 

2757 query5 = registry.queryDatasets(["bias", "nonexistent"], collections=["biases"]) 

2758 self.assertTrue(set(query5)) 

2759 self.assertTrue(query5.any(execute=False, exact=False)) 

2760 self.assertTrue(query5.any(execute=True, exact=False)) 

2761 self.assertTrue(query5.any(execute=True, exact=True)) 

2762 self.assertGreaterEqual(query5.count(exact=False), 1) 

2763 self.assertGreaterEqual(query5.count(exact=True), 1) 

2764 self.assertFalse(list(query5.explain_no_results())) 

2765 # This query applies a selection that yields no results, fully in the 

2766 # database. Explaining why it fails involves traversing the relation 

2767 # tree and running a LIMIT 1 query at each level that has the potential 

2768 # to remove rows. 

2769 query6 = registry.queryDimensionRecords( 

2770 "detector", where="detector.purpose = 'no-purpose'", instrument="Cam1" 

2771 ) 

2772 self.assertEqual(query6.count(exact=True), 0) 

2773 messages = query6.explain_no_results() 

2774 self.assertTrue(messages) 

2775 self.assertTrue(any("no-purpose" in message for message in messages)) 

2776 

2777 def testQueryDataIdsOrderBy(self): 

2778 """Test order_by and limit on result returned by queryDataIds().""" 

2779 registry = self.makeRegistry() 

2780 self.loadData(registry, "base.yaml") 

2781 self.loadData(registry, "datasets.yaml") 

2782 self.loadData(registry, "spatial.yaml") 

2783 

2784 def do_query(dimensions=("visit", "tract"), datasets=None, collections=None): 

2785 return registry.queryDataIds( 

2786 dimensions, datasets=datasets, collections=collections, instrument="Cam1", skymap="SkyMap1" 

2787 ) 

2788 

2789 Test = namedtuple( 

2790 "testQueryDataIdsOrderByTest", 

2791 ("order_by", "keys", "result", "limit", "datasets", "collections"), 

2792 defaults=(None, None, None), 

2793 ) 

2794 

2795 test_data = ( 

2796 Test("tract,visit", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))), 

2797 Test("-tract,visit", "tract,visit", ((1, 2), (1, 2), (0, 1), (0, 1), (0, 2), (0, 2))), 

2798 Test("tract,-visit", "tract,visit", ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2))), 

2799 Test("-tract,-visit", "tract,visit", ((1, 2), (1, 2), (0, 2), (0, 2), (0, 1), (0, 1))), 

2800 Test( 

2801 "tract.id,visit.id", 

2802 "tract,visit", 

2803 ((0, 1), (0, 1), (0, 2)), 

2804 limit=(3,), 

2805 ), 

2806 Test("-tract,-visit", "tract,visit", ((1, 2), (1, 2), (0, 2)), limit=(3,)), 

2807 Test("tract,visit", "tract,visit", ((0, 2), (1, 2), (1, 2)), limit=(3, 3)), 

2808 Test("-tract,-visit", "tract,visit", ((0, 1),), limit=(3, 5)), 

2809 Test( 

2810 "tract,visit.exposure_time", "tract,visit", ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2)) 

2811 ), 

2812 Test( 

2813 "-tract,-visit.exposure_time", "tract,visit", ((1, 2), (1, 2), (0, 1), (0, 1), (0, 2), (0, 2)) 

2814 ), 

2815 Test("tract,-exposure_time", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))), 

2816 Test("tract,visit.name", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))), 

2817 Test( 

2818 "tract,-timespan.begin,timespan.end", 

2819 "tract,visit", 

2820 ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2)), 

2821 ), 

2822 Test("visit.day_obs,exposure.day_obs", "visit,exposure", ()), 

2823 Test("visit.timespan.begin,-exposure.timespan.begin", "visit,exposure", ()), 

2824 Test( 

2825 "tract,detector", 

2826 "tract,detector", 

2827 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)), 

2828 datasets="flat", 

2829 collections="imported_r", 

2830 ), 

2831 Test( 

2832 "tract,detector.full_name", 

2833 "tract,detector", 

2834 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)), 

2835 datasets="flat", 

2836 collections="imported_r", 

2837 ), 

2838 Test( 

2839 "tract,detector.raft,detector.name_in_raft", 

2840 "tract,detector", 

2841 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)), 

2842 datasets="flat", 

2843 collections="imported_r", 

2844 ), 

2845 ) 

2846 

2847 for test in test_data: 

2848 order_by = test.order_by.split(",") 

2849 keys = test.keys.split(",") 

2850 query = do_query(keys, test.datasets, test.collections).order_by(*order_by) 

2851 if test.limit is not None: 

2852 query = query.limit(*test.limit) 

2853 dataIds = tuple(tuple(dataId[k] for k in keys) for dataId in query) 

2854 self.assertEqual(dataIds, test.result) 

2855 

2856 # and materialize 

2857 query = do_query(keys).order_by(*order_by) 

2858 if test.limit is not None: 

2859 query = query.limit(*test.limit) 

2860 with self.assertRaises(RelationalAlgebraError): 

2861 with query.materialize(): 

2862 pass 

2863 

2864 # errors in a name 

2865 for order_by in ("", "-"): 

2866 with self.assertRaisesRegex(ValueError, "Empty dimension name in ORDER BY"): 

2867 list(do_query().order_by(order_by)) 

2868 

2869 for order_by in ("undimension.name", "-undimension.name"): 

2870 with self.assertRaisesRegex(ValueError, "Unknown dimension element name 'undimension'"): 

2871 list(do_query().order_by(order_by)) 

2872 

2873 for order_by in ("attract", "-attract"): 

2874 with self.assertRaisesRegex(ValueError, "Metadata 'attract' cannot be found in any dimension"): 

2875 list(do_query().order_by(order_by)) 

2876 

2877 with self.assertRaisesRegex(ValueError, "Metadata 'exposure_time' exists in more than one dimension"): 

2878 list(do_query(("exposure", "visit")).order_by("exposure_time")) 

2879 

2880 with self.assertRaisesRegex(ValueError, "Timespan exists in more than one dimesion"): 

2881 list(do_query(("exposure", "visit")).order_by("timespan.begin")) 

2882 

2883 with self.assertRaisesRegex( 

2884 ValueError, "Cannot find any temporal dimension element for 'timespan.begin'" 

2885 ): 

2886 list(do_query(("tract")).order_by("timespan.begin")) 

2887 

2888 with self.assertRaisesRegex(ValueError, "Cannot use 'timespan.begin' with non-temporal element"): 

2889 list(do_query(("tract")).order_by("tract.timespan.begin")) 

2890 

2891 with self.assertRaisesRegex(ValueError, "Field 'name' does not exist in 'tract'."): 

2892 list(do_query(("tract")).order_by("tract.name")) 

2893 

2894 def testQueryDataIdsGovernorExceptions(self): 

2895 """Test exceptions raised by queryDataIds() for incorrect governors.""" 

2896 registry = self.makeRegistry() 

2897 self.loadData(registry, "base.yaml") 

2898 self.loadData(registry, "datasets.yaml") 

2899 self.loadData(registry, "spatial.yaml") 

2900 

2901 def do_query(dimensions, dataId=None, where="", bind=None, **kwargs): 

2902 return registry.queryDataIds(dimensions, dataId=dataId, where=where, bind=bind, **kwargs) 

2903 

2904 Test = namedtuple( 

2905 "testQueryDataIdExceptionsTest", 

2906 ("dimensions", "dataId", "where", "bind", "kwargs", "exception", "count"), 

2907 defaults=(None, None, None, {}, None, 0), 

2908 ) 

2909 

2910 test_data = ( 

2911 Test("tract,visit", count=6), 

2912 Test("tract,visit", kwargs={"instrument": "Cam1", "skymap": "SkyMap1"}, count=6), 

2913 Test( 

2914 "tract,visit", kwargs={"instrument": "Cam2", "skymap": "SkyMap1"}, exception=DataIdValueError 

2915 ), 

2916 Test("tract,visit", dataId={"instrument": "Cam1", "skymap": "SkyMap1"}, count=6), 

2917 Test( 

2918 "tract,visit", dataId={"instrument": "Cam1", "skymap": "SkyMap2"}, exception=DataIdValueError 

2919 ), 

2920 Test("tract,visit", where="instrument='Cam1' AND skymap='SkyMap1'", count=6), 

2921 Test("tract,visit", where="instrument='Cam1' AND skymap='SkyMap5'", exception=DataIdValueError), 

2922 Test( 

2923 "tract,visit", 

2924 where="instrument=cam AND skymap=map", 

2925 bind={"cam": "Cam1", "map": "SkyMap1"}, 

2926 count=6, 

2927 ), 

2928 Test( 

2929 "tract,visit", 

2930 where="instrument=cam AND skymap=map", 

2931 bind={"cam": "Cam", "map": "SkyMap"}, 

2932 exception=DataIdValueError, 

2933 ), 

2934 ) 

2935 

2936 for test in test_data: 

2937 dimensions = test.dimensions.split(",") 

2938 if test.exception: 

2939 with self.assertRaises(test.exception): 

2940 do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs).count() 

2941 else: 

2942 query = do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs) 

2943 self.assertEqual(query.count(discard=True), test.count) 

2944 

2945 # and materialize 

2946 if test.exception: 

2947 with self.assertRaises(test.exception): 

2948 query = do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs) 

2949 with query.materialize() as materialized: 

2950 materialized.count(discard=True) 

2951 else: 

2952 query = do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs) 

2953 with query.materialize() as materialized: 

2954 self.assertEqual(materialized.count(discard=True), test.count) 

2955 

2956 def testQueryDimensionRecordsOrderBy(self): 

2957 """Test order_by and limit on result returned by 

2958 queryDimensionRecords(). 

2959 """ 

2960 registry = self.makeRegistry() 

2961 self.loadData(registry, "base.yaml") 

2962 self.loadData(registry, "datasets.yaml") 

2963 self.loadData(registry, "spatial.yaml") 

2964 

2965 def do_query(element, datasets=None, collections=None): 

2966 return registry.queryDimensionRecords( 

2967 element, instrument="Cam1", datasets=datasets, collections=collections 

2968 ) 

2969 

2970 query = do_query("detector") 

2971 self.assertEqual(len(list(query)), 4) 

2972 

2973 Test = namedtuple( 

2974 "testQueryDataIdsOrderByTest", 

2975 ("element", "order_by", "result", "limit", "datasets", "collections"), 

2976 defaults=(None, None, None), 

2977 ) 

2978 

2979 test_data = ( 

2980 Test("detector", "detector", (1, 2, 3, 4)), 

2981 Test("detector", "-detector", (4, 3, 2, 1)), 

2982 Test("detector", "raft,-name_in_raft", (2, 1, 4, 3)), 

2983 Test("detector", "-detector.purpose", (4,), limit=(1,)), 

2984 Test("detector", "-purpose,detector.raft,name_in_raft", (2, 3), limit=(2, 2)), 

2985 Test("visit", "visit", (1, 2)), 

2986 Test("visit", "-visit.id", (2, 1)), 

2987 Test("visit", "zenith_angle", (1, 2)), 

2988 Test("visit", "-visit.name", (2, 1)), 

2989 Test("visit", "day_obs,-timespan.begin", (2, 1)), 

2990 ) 

2991 

2992 for test in test_data: 

2993 order_by = test.order_by.split(",") 

2994 query = do_query(test.element).order_by(*order_by) 

2995 if test.limit is not None: 

2996 query = query.limit(*test.limit) 

2997 dataIds = tuple(rec.id for rec in query) 

2998 self.assertEqual(dataIds, test.result) 

2999 

3000 # errors in a name 

3001 for order_by in ("", "-"): 

3002 with self.assertRaisesRegex(ValueError, "Empty dimension name in ORDER BY"): 

3003 list(do_query("detector").order_by(order_by)) 

3004 

3005 for order_by in ("undimension.name", "-undimension.name"): 

3006 with self.assertRaisesRegex(ValueError, "Element name mismatch: 'undimension'"): 

3007 list(do_query("detector").order_by(order_by)) 

3008 

3009 for order_by in ("attract", "-attract"): 

3010 with self.assertRaisesRegex(ValueError, "Field 'attract' does not exist in 'detector'."): 

3011 list(do_query("detector").order_by(order_by)) 

3012 

3013 def testQueryDimensionRecordsExceptions(self): 

3014 """Test exceptions raised by queryDimensionRecords().""" 

3015 registry = self.makeRegistry() 

3016 self.loadData(registry, "base.yaml") 

3017 self.loadData(registry, "datasets.yaml") 

3018 self.loadData(registry, "spatial.yaml") 

3019 

3020 result = registry.queryDimensionRecords("detector") 

3021 self.assertEqual(result.count(), 4) 

3022 result = registry.queryDimensionRecords("detector", instrument="Cam1") 

3023 self.assertEqual(result.count(), 4) 

3024 result = registry.queryDimensionRecords("detector", dataId={"instrument": "Cam1"}) 

3025 self.assertEqual(result.count(), 4) 

3026 result = registry.queryDimensionRecords("detector", where="instrument='Cam1'") 

3027 self.assertEqual(result.count(), 4) 

3028 result = registry.queryDimensionRecords("detector", where="instrument=instr", bind={"instr": "Cam1"}) 

3029 self.assertEqual(result.count(), 4) 

3030 

3031 with self.assertRaisesRegex(DataIdValueError, "dimension instrument"): 

3032 result = registry.queryDimensionRecords("detector", instrument="NotCam1") 

3033 result.count() 

3034 

3035 with self.assertRaisesRegex(DataIdValueError, "dimension instrument"): 

3036 result = registry.queryDimensionRecords("detector", dataId={"instrument": "NotCam1"}) 

3037 result.count() 

3038 

3039 with self.assertRaisesRegex(DataIdValueError, "Unknown values specified for governor dimension"): 

3040 result = registry.queryDimensionRecords("detector", where="instrument='NotCam1'") 

3041 result.count() 

3042 

3043 with self.assertRaisesRegex(DataIdValueError, "Unknown values specified for governor dimension"): 

3044 result = registry.queryDimensionRecords( 

3045 "detector", where="instrument=instr", bind={"instr": "NotCam1"} 

3046 ) 

3047 result.count() 

3048 

3049 def testDatasetConstrainedDimensionRecordQueries(self): 

3050 """Test that queryDimensionRecords works even when given a dataset 

3051 constraint whose dimensions extend beyond the requested dimension 

3052 element's. 

3053 """ 

3054 registry = self.makeRegistry() 

3055 self.loadData(registry, "base.yaml") 

3056 self.loadData(registry, "datasets.yaml") 

3057 # Query for physical_filter dimension records, using a dataset that 

3058 # has both physical_filter and dataset dimensions. 

3059 records = registry.queryDimensionRecords( 

3060 "physical_filter", 

3061 datasets=["flat"], 

3062 collections="imported_r", 

3063 ) 

3064 self.assertEqual({record.name for record in records}, {"Cam1-R1", "Cam1-R2"}) 

3065 # Trying to constrain by all dataset types is an error. 

3066 with self.assertRaises(TypeError): 

3067 list(registry.queryDimensionRecords("physical_filter", datasets=..., collections="imported_r")) 

3068 

3069 def testSkyPixDatasetQueries(self): 

3070 """Test that we can build queries involving skypix dimensions as long 

3071 as a dataset type that uses those dimensions is included. 

3072 """ 

3073 registry = self.makeRegistry() 

3074 self.loadData(registry, "base.yaml") 

3075 dataset_type = DatasetType( 

3076 "a", dimensions=["htm7", "instrument"], universe=registry.dimensions, storageClass="int" 

3077 ) 

3078 registry.registerDatasetType(dataset_type) 

3079 run = "r" 

3080 registry.registerRun(run) 

3081 # First try queries where there are no datasets; the concern is whether 

3082 # we can even build and execute these queries without raising, even 

3083 # when "doomed" query shortcuts are in play. 

3084 self.assertFalse( 

3085 list(registry.queryDataIds(["htm7", "instrument"], datasets=dataset_type, collections=run)) 

3086 ) 

3087 self.assertFalse(list(registry.queryDatasets(dataset_type, collections=run))) 

3088 # Now add a dataset and see that we can get it back. 

3089 htm7 = registry.dimensions.skypix["htm"][7].pixelization 

3090 data_id = registry.expandDataId(instrument="Cam1", htm7=htm7.universe()[0][0]) 

3091 (ref,) = registry.insertDatasets(dataset_type, [data_id], run=run) 

3092 self.assertEqual( 

3093 set(registry.queryDataIds(["htm7", "instrument"], datasets=dataset_type, collections=run)), 

3094 {data_id}, 

3095 ) 

3096 self.assertEqual(set(registry.queryDatasets(dataset_type, collections=run)), {ref}) 

3097 

3098 def testDatasetIdFactory(self): 

3099 """Simple test for DatasetIdFactory, mostly to catch potential changes 

3100 in its API. 

3101 """ 

3102 registry = self.makeRegistry() 

3103 factory = registry.datasetIdFactory 

3104 dataset_type = DatasetType( 

3105 "datasetType", 

3106 dimensions=["detector", "instrument"], 

3107 universe=registry.dimensions, 

3108 storageClass="int", 

3109 ) 

3110 run = "run" 

3111 data_id = DataCoordinate.standardize(instrument="Cam1", detector=1, graph=dataset_type.dimensions) 

3112 

3113 datasetId = factory.makeDatasetId(run, dataset_type, data_id, DatasetIdGenEnum.UNIQUE) 

3114 self.assertIsInstance(datasetId, uuid.UUID) 

3115 self.assertEqual(datasetId.version, 4) 

3116 

3117 datasetId = factory.makeDatasetId(run, dataset_type, data_id, DatasetIdGenEnum.DATAID_TYPE) 

3118 self.assertIsInstance(datasetId, uuid.UUID) 

3119 self.assertEqual(datasetId.version, 5) 

3120 

3121 datasetId = factory.makeDatasetId(run, dataset_type, data_id, DatasetIdGenEnum.DATAID_TYPE_RUN) 

3122 self.assertIsInstance(datasetId, uuid.UUID) 

3123 self.assertEqual(datasetId.version, 5)