Coverage for python/lsst/daf/butler/registry/tests/_registry.py: 4%

1426 statements  

« prev     ^ index     » next       coverage.py v7.2.4, created at 2023-04-29 02:58 -0700

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = ["RegistryTests"] 

24 

25import itertools 

26import logging 

27import os 

28import re 

29import unittest 

30import uuid 

31from abc import ABC, abstractmethod 

32from collections import defaultdict, namedtuple 

33from datetime import datetime, timedelta 

34from typing import TYPE_CHECKING, Iterator, Optional, Type, Union 

35 

36import astropy.time 

37import sqlalchemy 

38 

39try: 

40 import numpy as np 

41except ImportError: 

42 np = None 

43 

44import lsst.sphgeom 

45from lsst.daf.relation import Relation, RelationalAlgebraError, Transfer, iteration, sql 

46 

47from ...core import ( 

48 DataCoordinate, 

49 DataCoordinateSet, 

50 DatasetAssociation, 

51 DatasetIdGenEnum, 

52 DatasetRef, 

53 DatasetType, 

54 DimensionGraph, 

55 NamedValueSet, 

56 SkyPixDimension, 

57 StorageClass, 

58 Timespan, 

59 ddl, 

60) 

61from .._collection_summary import CollectionSummary 

62from .._collectionType import CollectionType 

63from .._config import RegistryConfig 

64from .._exceptions import ( 

65 ArgumentError, 

66 CollectionError, 

67 CollectionTypeError, 

68 ConflictingDefinitionError, 

69 DataIdValueError, 

70 DatasetTypeError, 

71 InconsistentDataIdError, 

72 MissingCollectionError, 

73 MissingDatasetTypeError, 

74 OrphanedRecordError, 

75) 

76from ..interfaces import ButlerAttributeExistsError 

77 

78if TYPE_CHECKING: 

79 from .._registry import Registry 

80 

81 

82class RegistryTests(ABC): 

83 """Generic tests for the `Registry` class that can be subclassed to 

84 generate tests for different configurations. 

85 """ 

86 

87 collectionsManager: Optional[str] = None 

88 """Name of the collections manager class, if subclass provides value for 

89 this member then it overrides name specified in default configuration 

90 (`str`). 

91 """ 

92 

93 datasetsManager: Optional[str | dict[str, str]] = None 

94 """Name or configuration dictionary of the datasets manager class, if 

95 subclass provides value for this member then it overrides name specified 

96 in default configuration (`str` or `dict`). 

97 """ 

98 

99 @classmethod 

100 @abstractmethod 

101 def getDataDir(cls) -> str: 

102 """Return the root directory containing test data YAML files.""" 

103 raise NotImplementedError() 

104 

105 def makeRegistryConfig(self) -> RegistryConfig: 

106 """Create RegistryConfig used to create a registry. 

107 

108 This method should be called by a subclass from `makeRegistry`. 

109 Returned instance will be pre-configured based on the values of class 

110 members, and default-configured for all other parameters. Subclasses 

111 that need default configuration should just instantiate 

112 `RegistryConfig` directly. 

113 """ 

114 config = RegistryConfig() 

115 if self.collectionsManager: 

116 config["managers", "collections"] = self.collectionsManager 

117 if self.datasetsManager: 

118 config["managers", "datasets"] = self.datasetsManager 

119 return config 

120 

121 @abstractmethod 

122 def makeRegistry(self, share_repo_with: Optional[Registry] = None) -> Optional[Registry]: 

123 """Return the Registry instance to be tested. 

124 

125 Parameters 

126 ---------- 

127 share_repo_with : `Registry`, optional 

128 If provided, the new registry should point to the same data 

129 repository as this existing registry. 

130 

131 Returns 

132 ------- 

133 registry : `Registry` 

134 New `Registry` instance, or `None` *only* if `share_repo_with` is 

135 not `None` and this test case does not support that argument 

136 (e.g. it is impossible with in-memory SQLite DBs). 

137 """ 

138 raise NotImplementedError() 

139 

140 def loadData(self, registry: Registry, filename: str): 

141 """Load registry test data from ``getDataDir/<filename>``, 

142 which should be a YAML import/export file. 

143 """ 

144 from ...transfers import YamlRepoImportBackend 

145 

146 with open(os.path.join(self.getDataDir(), filename), "r") as stream: 

147 backend = YamlRepoImportBackend(stream, registry) 

148 backend.register() 

149 backend.load(datastore=None) 

150 

151 def checkQueryResults(self, results, expected): 

152 """Check that a query results object contains expected values. 

153 

154 Parameters 

155 ---------- 

156 results : `DataCoordinateQueryResults` or `DatasetQueryResults` 

157 A lazy-evaluation query results object. 

158 expected : `list` 

159 A list of `DataCoordinate` o `DatasetRef` objects that should be 

160 equal to results of the query, aside from ordering. 

161 """ 

162 self.assertCountEqual(list(results), expected) 

163 self.assertEqual(results.count(), len(expected)) 

164 if expected: 

165 self.assertTrue(results.any()) 

166 else: 

167 self.assertFalse(results.any()) 

168 

169 def testOpaque(self): 

170 """Tests for `Registry.registerOpaqueTable`, 

171 `Registry.insertOpaqueData`, `Registry.fetchOpaqueData`, and 

172 `Registry.deleteOpaqueData`. 

173 """ 

174 registry = self.makeRegistry() 

175 table = "opaque_table_for_testing" 

176 registry.registerOpaqueTable( 

177 table, 

178 spec=ddl.TableSpec( 

179 fields=[ 

180 ddl.FieldSpec("id", dtype=sqlalchemy.BigInteger, primaryKey=True), 

181 ddl.FieldSpec("name", dtype=sqlalchemy.String, length=16, nullable=False), 

182 ddl.FieldSpec("count", dtype=sqlalchemy.SmallInteger, nullable=True), 

183 ], 

184 ), 

185 ) 

186 rows = [ 

187 {"id": 1, "name": "one", "count": None}, 

188 {"id": 2, "name": "two", "count": 5}, 

189 {"id": 3, "name": "three", "count": 6}, 

190 ] 

191 registry.insertOpaqueData(table, *rows) 

192 self.assertCountEqual(rows, list(registry.fetchOpaqueData(table))) 

193 self.assertEqual(rows[0:1], list(registry.fetchOpaqueData(table, id=1))) 

194 self.assertEqual(rows[1:2], list(registry.fetchOpaqueData(table, name="two"))) 

195 self.assertEqual(rows[0:1], list(registry.fetchOpaqueData(table, id=(1, 3), name=("one", "two")))) 

196 self.assertEqual(rows, list(registry.fetchOpaqueData(table, id=(1, 2, 3)))) 

197 # Test very long IN clause which exceeds sqlite limit on number of 

198 # parameters. SQLite says the limit is 32k but it looks like it is 

199 # much higher. 

200 self.assertEqual(rows, list(registry.fetchOpaqueData(table, id=list(range(300_000))))) 

201 # Two IN clauses, each longer than 1k batch size, first with 

202 # duplicates, second has matching elements in different batches (after 

203 # sorting). 

204 self.assertEqual( 

205 rows[0:2], 

206 list( 

207 registry.fetchOpaqueData( 

208 table, 

209 id=list(range(1000)) + list(range(100, 0, -1)), 

210 name=["one"] + [f"q{i}" for i in range(2200)] + ["two"], 

211 ) 

212 ), 

213 ) 

214 self.assertEqual([], list(registry.fetchOpaqueData(table, id=1, name="two"))) 

215 registry.deleteOpaqueData(table, id=3) 

216 self.assertCountEqual(rows[:2], list(registry.fetchOpaqueData(table))) 

217 registry.deleteOpaqueData(table) 

218 self.assertEqual([], list(registry.fetchOpaqueData(table))) 

219 

220 def testDatasetType(self): 

221 """Tests for `Registry.registerDatasetType` and 

222 `Registry.getDatasetType`. 

223 """ 

224 registry = self.makeRegistry() 

225 # Check valid insert 

226 datasetTypeName = "test" 

227 storageClass = StorageClass("testDatasetType") 

228 registry.storageClasses.registerStorageClass(storageClass) 

229 dimensions = registry.dimensions.extract(("instrument", "visit")) 

230 differentDimensions = registry.dimensions.extract(("instrument", "patch")) 

231 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

232 # Inserting for the first time should return True 

233 self.assertTrue(registry.registerDatasetType(inDatasetType)) 

234 outDatasetType1 = registry.getDatasetType(datasetTypeName) 

235 self.assertEqual(outDatasetType1, inDatasetType) 

236 

237 # Re-inserting should work 

238 self.assertFalse(registry.registerDatasetType(inDatasetType)) 

239 # Except when they are not identical 

240 with self.assertRaises(ConflictingDefinitionError): 

241 nonIdenticalDatasetType = DatasetType(datasetTypeName, differentDimensions, storageClass) 

242 registry.registerDatasetType(nonIdenticalDatasetType) 

243 

244 # Template can be None 

245 datasetTypeName = "testNoneTemplate" 

246 storageClass = StorageClass("testDatasetType2") 

247 registry.storageClasses.registerStorageClass(storageClass) 

248 dimensions = registry.dimensions.extract(("instrument", "visit")) 

249 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

250 registry.registerDatasetType(inDatasetType) 

251 outDatasetType2 = registry.getDatasetType(datasetTypeName) 

252 self.assertEqual(outDatasetType2, inDatasetType) 

253 

254 allTypes = set(registry.queryDatasetTypes()) 

255 self.assertEqual(allTypes, {outDatasetType1, outDatasetType2}) 

256 

257 def testDimensions(self): 

258 """Tests for `Registry.insertDimensionData`, 

259 `Registry.syncDimensionData`, and `Registry.expandDataId`. 

260 """ 

261 registry = self.makeRegistry() 

262 dimensionName = "instrument" 

263 dimension = registry.dimensions[dimensionName] 

264 dimensionValue = { 

265 "name": "DummyCam", 

266 "visit_max": 10, 

267 "visit_system": 0, 

268 "exposure_max": 10, 

269 "detector_max": 2, 

270 "class_name": "lsst.pipe.base.Instrument", 

271 } 

272 registry.insertDimensionData(dimensionName, dimensionValue) 

273 # Inserting the same value twice should fail 

274 with self.assertRaises(sqlalchemy.exc.IntegrityError): 

275 registry.insertDimensionData(dimensionName, dimensionValue) 

276 # expandDataId should retrieve the record we just inserted 

277 self.assertEqual( 

278 registry.expandDataId(instrument="DummyCam", graph=dimension.graph) 

279 .records[dimensionName] 

280 .toDict(), 

281 dimensionValue, 

282 ) 

283 # expandDataId should raise if there is no record with the given ID. 

284 with self.assertRaises(DataIdValueError): 

285 registry.expandDataId({"instrument": "Unknown"}, graph=dimension.graph) 

286 # band doesn't have a table; insert should fail. 

287 with self.assertRaises(TypeError): 

288 registry.insertDimensionData("band", {"band": "i"}) 

289 dimensionName2 = "physical_filter" 

290 dimension2 = registry.dimensions[dimensionName2] 

291 dimensionValue2 = {"name": "DummyCam_i", "band": "i"} 

292 # Missing required dependency ("instrument") should fail 

293 with self.assertRaises(KeyError): 

294 registry.insertDimensionData(dimensionName2, dimensionValue2) 

295 # Adding required dependency should fix the failure 

296 dimensionValue2["instrument"] = "DummyCam" 

297 registry.insertDimensionData(dimensionName2, dimensionValue2) 

298 # expandDataId should retrieve the record we just inserted. 

299 self.assertEqual( 

300 registry.expandDataId(instrument="DummyCam", physical_filter="DummyCam_i", graph=dimension2.graph) 

301 .records[dimensionName2] 

302 .toDict(), 

303 dimensionValue2, 

304 ) 

305 # Use syncDimensionData to insert a new record successfully. 

306 dimensionName3 = "detector" 

307 dimensionValue3 = { 

308 "instrument": "DummyCam", 

309 "id": 1, 

310 "full_name": "one", 

311 "name_in_raft": "zero", 

312 "purpose": "SCIENCE", 

313 } 

314 self.assertTrue(registry.syncDimensionData(dimensionName3, dimensionValue3)) 

315 # Sync that again. Note that one field ("raft") is NULL, and that 

316 # should be okay. 

317 self.assertFalse(registry.syncDimensionData(dimensionName3, dimensionValue3)) 

318 # Now try that sync with the same primary key but a different value. 

319 # This should fail. 

320 with self.assertRaises(ConflictingDefinitionError): 

321 registry.syncDimensionData( 

322 dimensionName3, 

323 { 

324 "instrument": "DummyCam", 

325 "id": 1, 

326 "full_name": "one", 

327 "name_in_raft": "four", 

328 "purpose": "SCIENCE", 

329 }, 

330 ) 

331 

332 @unittest.skipIf(np is None, "numpy not available.") 

333 def testNumpyDataId(self): 

334 """Test that we can use a numpy int in a dataId.""" 

335 registry = self.makeRegistry() 

336 dimensionEntries = [ 

337 ("instrument", {"instrument": "DummyCam"}), 

338 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}), 

339 # Using an np.int64 here fails unless Records.fromDict is also 

340 # patched to look for numbers.Integral 

341 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}), 

342 ] 

343 for args in dimensionEntries: 

344 registry.insertDimensionData(*args) 

345 

346 # Try a normal integer and something that looks like an int but 

347 # is not. 

348 for visit_id in (42, np.int64(42)): 

349 with self.subTest(visit_id=visit_id, id_type=type(visit_id).__name__): 

350 expanded = registry.expandDataId({"instrument": "DummyCam", "visit": visit_id}) 

351 self.assertEqual(expanded["visit"], int(visit_id)) 

352 self.assertIsInstance(expanded["visit"], int) 

353 

354 def testDataIdRelationships(self): 

355 """Test that `Registry.expandDataId` raises an exception when the given 

356 keys are inconsistent. 

357 """ 

358 registry = self.makeRegistry() 

359 self.loadData(registry, "base.yaml") 

360 # Insert a few more dimension records for the next test. 

361 registry.insertDimensionData( 

362 "exposure", 

363 {"instrument": "Cam1", "id": 1, "obs_id": "one", "physical_filter": "Cam1-G"}, 

364 ) 

365 registry.insertDimensionData( 

366 "exposure", 

367 {"instrument": "Cam1", "id": 2, "obs_id": "two", "physical_filter": "Cam1-G"}, 

368 ) 

369 registry.insertDimensionData( 

370 "visit_system", 

371 {"instrument": "Cam1", "id": 0, "name": "one-to-one"}, 

372 ) 

373 registry.insertDimensionData( 

374 "visit", 

375 {"instrument": "Cam1", "id": 1, "name": "one", "physical_filter": "Cam1-G", "visit_system": 0}, 

376 ) 

377 registry.insertDimensionData( 

378 "visit_definition", 

379 {"instrument": "Cam1", "visit": 1, "exposure": 1, "visit_system": 0}, 

380 ) 

381 with self.assertRaises(InconsistentDataIdError): 

382 registry.expandDataId( 

383 {"instrument": "Cam1", "visit": 1, "exposure": 2}, 

384 ) 

385 

386 def testDataset(self): 

387 """Basic tests for `Registry.insertDatasets`, `Registry.getDataset`, 

388 and `Registry.removeDatasets`. 

389 """ 

390 registry = self.makeRegistry() 

391 self.loadData(registry, "base.yaml") 

392 run = "tésτ" 

393 registry.registerRun(run) 

394 datasetType = registry.getDatasetType("bias") 

395 dataId = {"instrument": "Cam1", "detector": 2} 

396 (ref,) = registry.insertDatasets(datasetType, dataIds=[dataId], run=run) 

397 outRef = registry.getDataset(ref.id) 

398 self.assertIsNotNone(ref.id) 

399 self.assertEqual(ref, outRef) 

400 with self.assertRaises(ConflictingDefinitionError): 

401 registry.insertDatasets(datasetType, dataIds=[dataId], run=run) 

402 registry.removeDatasets([ref]) 

403 self.assertIsNone(registry.findDataset(datasetType, dataId, collections=[run])) 

404 

405 def testFindDataset(self): 

406 """Tests for `Registry.findDataset`.""" 

407 registry = self.makeRegistry() 

408 self.loadData(registry, "base.yaml") 

409 run = "tésτ" 

410 datasetType = registry.getDatasetType("bias") 

411 dataId = {"instrument": "Cam1", "detector": 4} 

412 registry.registerRun(run) 

413 (inputRef,) = registry.insertDatasets(datasetType, dataIds=[dataId], run=run) 

414 outputRef = registry.findDataset(datasetType, dataId, collections=[run]) 

415 self.assertEqual(outputRef, inputRef) 

416 # Check that retrieval with invalid dataId raises 

417 with self.assertRaises(LookupError): 

418 dataId = {"instrument": "Cam1"} # no detector 

419 registry.findDataset(datasetType, dataId, collections=run) 

420 # Check that different dataIds match to different datasets 

421 dataId1 = {"instrument": "Cam1", "detector": 1} 

422 (inputRef1,) = registry.insertDatasets(datasetType, dataIds=[dataId1], run=run) 

423 dataId2 = {"instrument": "Cam1", "detector": 2} 

424 (inputRef2,) = registry.insertDatasets(datasetType, dataIds=[dataId2], run=run) 

425 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef1) 

426 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef2) 

427 self.assertNotEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef2) 

428 self.assertNotEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef1) 

429 # Check that requesting a non-existing dataId returns None 

430 nonExistingDataId = {"instrument": "Cam1", "detector": 3} 

431 self.assertIsNone(registry.findDataset(datasetType, nonExistingDataId, collections=run)) 

432 # Search more than one collection, in which two have the right 

433 # dataset type and another does not. 

434 registry.registerRun("empty") 

435 self.loadData(registry, "datasets-uuid.yaml") 

436 bias1 = registry.findDataset("bias", instrument="Cam1", detector=2, collections=["imported_g"]) 

437 self.assertIsNotNone(bias1) 

438 bias2 = registry.findDataset("bias", instrument="Cam1", detector=2, collections=["imported_r"]) 

439 self.assertIsNotNone(bias2) 

440 self.assertEqual( 

441 bias1, 

442 registry.findDataset( 

443 "bias", instrument="Cam1", detector=2, collections=["empty", "imported_g", "imported_r"] 

444 ), 

445 ) 

446 self.assertEqual( 

447 bias2, 

448 registry.findDataset( 

449 "bias", instrument="Cam1", detector=2, collections=["empty", "imported_r", "imported_g"] 

450 ), 

451 ) 

452 # Search more than one collection, with one of them a CALIBRATION 

453 # collection. 

454 registry.registerCollection("Cam1/calib", CollectionType.CALIBRATION) 

455 timespan = Timespan( 

456 begin=astropy.time.Time("2020-01-01T01:00:00", format="isot", scale="tai"), 

457 end=astropy.time.Time("2020-01-01T02:00:00", format="isot", scale="tai"), 

458 ) 

459 registry.certify("Cam1/calib", [bias2], timespan=timespan) 

460 self.assertEqual( 

461 bias1, 

462 registry.findDataset( 

463 "bias", 

464 instrument="Cam1", 

465 detector=2, 

466 collections=["empty", "imported_g", "Cam1/calib"], 

467 timespan=timespan, 

468 ), 

469 ) 

470 self.assertEqual( 

471 bias2, 

472 registry.findDataset( 

473 "bias", 

474 instrument="Cam1", 

475 detector=2, 

476 collections=["empty", "Cam1/calib", "imported_g"], 

477 timespan=timespan, 

478 ), 

479 ) 

480 # If we try to search those same collections without a timespan, it 

481 # should still work, since the CALIBRATION collection is ignored. 

482 self.assertEqual( 

483 bias1, 

484 registry.findDataset( 

485 "bias", instrument="Cam1", detector=2, collections=["empty", "imported_g", "Cam1/calib"] 

486 ), 

487 ) 

488 self.assertEqual( 

489 bias1, 

490 registry.findDataset( 

491 "bias", instrument="Cam1", detector=2, collections=["empty", "Cam1/calib", "imported_g"] 

492 ), 

493 ) 

494 

495 def testRemoveDatasetTypeSuccess(self): 

496 """Test that Registry.removeDatasetType works when there are no 

497 datasets of that type present. 

498 """ 

499 registry = self.makeRegistry() 

500 self.loadData(registry, "base.yaml") 

501 registry.removeDatasetType("flat") 

502 with self.assertRaises(MissingDatasetTypeError): 

503 registry.getDatasetType("flat") 

504 

505 def testRemoveDatasetTypeFailure(self): 

506 """Test that Registry.removeDatasetType raises when there are datasets 

507 of that type present or if the dataset type is for a component. 

508 """ 

509 registry = self.makeRegistry() 

510 self.loadData(registry, "base.yaml") 

511 self.loadData(registry, "datasets.yaml") 

512 with self.assertRaises(OrphanedRecordError): 

513 registry.removeDatasetType("flat") 

514 with self.assertRaises(ValueError): 

515 registry.removeDatasetType(DatasetType.nameWithComponent("flat", "image")) 

516 

517 def testImportDatasetsUUID(self): 

518 """Test for `Registry._importDatasets` with UUID dataset ID.""" 

519 if isinstance(self.datasetsManager, str): 

520 if not self.datasetsManager.endswith(".ByDimensionsDatasetRecordStorageManagerUUID"): 

521 self.skipTest(f"Unexpected dataset manager {self.datasetsManager}") 

522 elif isinstance(self.datasetsManager, dict): 

523 if not self.datasetsManager["cls"].endswith(".ByDimensionsDatasetRecordStorageManagerUUID"): 

524 self.skipTest(f"Unexpected dataset manager {self.datasetsManager['cls']}") 

525 

526 registry = self.makeRegistry() 

527 self.loadData(registry, "base.yaml") 

528 for run in range(6): 

529 registry.registerRun(f"run{run}") 

530 datasetTypeBias = registry.getDatasetType("bias") 

531 datasetTypeFlat = registry.getDatasetType("flat") 

532 dataIdBias1 = {"instrument": "Cam1", "detector": 1} 

533 dataIdBias2 = {"instrument": "Cam1", "detector": 2} 

534 dataIdFlat1 = {"instrument": "Cam1", "detector": 1, "physical_filter": "Cam1-G", "band": "g"} 

535 

536 ref = DatasetRef(datasetTypeBias, dataIdBias1, run="run0") 

537 (ref1,) = registry._importDatasets([ref]) 

538 # UUID is used without change 

539 self.assertEqual(ref.id, ref1.id) 

540 

541 # All different failure modes 

542 refs = ( 

543 # Importing same DatasetRef with different dataset ID is an error 

544 DatasetRef(datasetTypeBias, dataIdBias1, run="run0"), 

545 # Same DatasetId but different DataId 

546 DatasetRef(datasetTypeBias, dataIdBias2, id=ref1.id, run="run0"), 

547 DatasetRef(datasetTypeFlat, dataIdFlat1, id=ref1.id, run="run0"), 

548 # Same DatasetRef and DatasetId but different run 

549 DatasetRef(datasetTypeBias, dataIdBias1, id=ref1.id, run="run1"), 

550 ) 

551 for ref in refs: 

552 with self.assertRaises(ConflictingDefinitionError): 

553 registry._importDatasets([ref]) 

554 

555 # Test for non-unique IDs, they can be re-imported multiple times. 

556 for run, idGenMode in ((2, DatasetIdGenEnum.DATAID_TYPE), (4, DatasetIdGenEnum.DATAID_TYPE_RUN)): 

557 with self.subTest(idGenMode=idGenMode): 

558 # Use integer dataset ID to force UUID calculation in _import 

559 ref = DatasetRef(datasetTypeBias, dataIdBias1, id=0, run=f"run{run}") 

560 (ref1,) = registry._importDatasets([ref], idGenerationMode=idGenMode) 

561 self.assertIsInstance(ref1.id, uuid.UUID) 

562 self.assertEqual(ref1.id.version, 5) 

563 

564 # Importing it again is OK 

565 (ref2,) = registry._importDatasets([ref1]) 

566 self.assertEqual(ref2.id, ref1.id) 

567 

568 # Cannot import to different run with the same ID 

569 ref = DatasetRef(datasetTypeBias, dataIdBias1, id=ref1.id, run=f"run{run+1}") 

570 with self.assertRaises(ConflictingDefinitionError): 

571 registry._importDatasets([ref]) 

572 

573 ref = DatasetRef(datasetTypeBias, dataIdBias1, id=0, run=f"run{run+1}") 

574 if idGenMode is DatasetIdGenEnum.DATAID_TYPE: 

575 # Cannot import same DATAID_TYPE ref into a new run 

576 with self.assertRaises(ConflictingDefinitionError): 

577 (ref2,) = registry._importDatasets([ref], idGenerationMode=idGenMode) 

578 else: 

579 # DATAID_TYPE_RUN ref can be imported into a new run 

580 (ref2,) = registry._importDatasets([ref], idGenerationMode=idGenMode) 

581 

582 def testDatasetTypeComponentQueries(self): 

583 """Test component options when querying for dataset types. 

584 

585 All of the behavior here is deprecated, so many of these tests are 

586 currently wrapped in a context to check that we get a warning whenever 

587 a component dataset is actually returned. 

588 """ 

589 registry = self.makeRegistry() 

590 self.loadData(registry, "base.yaml") 

591 self.loadData(registry, "datasets.yaml") 

592 # Test querying for dataset types with different inputs. 

593 # First query for all dataset types; components should only be included 

594 # when components=True. 

595 self.assertEqual({"bias", "flat"}, NamedValueSet(registry.queryDatasetTypes()).names) 

596 self.assertEqual({"bias", "flat"}, NamedValueSet(registry.queryDatasetTypes(components=False)).names) 

597 with self.assertWarns(FutureWarning): 

598 self.assertLess( 

599 {"bias", "flat", "bias.wcs", "flat.photoCalib"}, 

600 NamedValueSet(registry.queryDatasetTypes(components=True)).names, 

601 ) 

602 # Use a pattern that can match either parent or components. Again, 

603 # components are only returned if components=True. 

604 self.assertEqual({"bias"}, NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"))).names) 

605 self.assertEqual( 

606 {"bias"}, NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"), components=False)).names 

607 ) 

608 with self.assertWarns(FutureWarning): 

609 self.assertLess( 

610 {"bias", "bias.wcs"}, 

611 NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"), components=True)).names, 

612 ) 

613 # This pattern matches only a component. In this case we also return 

614 # that component dataset type if components=None. 

615 with self.assertWarns(FutureWarning): 

616 self.assertEqual( 

617 {"bias.wcs"}, NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"))).names 

618 ) 

619 self.assertEqual( 

620 set(), 

621 NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"), components=False)).names, 

622 ) 

623 with self.assertWarns(FutureWarning): 

624 self.assertEqual( 

625 {"bias.wcs"}, 

626 NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"), components=True)).names, 

627 ) 

628 # Add a dataset type using a StorageClass that we'll then remove; check 

629 # that this does not affect our ability to query for dataset types 

630 # (though it will warn). 

631 tempStorageClass = StorageClass( 

632 name="TempStorageClass", 

633 components={ 

634 "data1": registry.storageClasses.getStorageClass("StructuredDataDict"), 

635 "data2": registry.storageClasses.getStorageClass("StructuredDataDict"), 

636 }, 

637 ) 

638 registry.storageClasses.registerStorageClass(tempStorageClass) 

639 datasetType = DatasetType( 

640 "temporary", 

641 dimensions=["instrument"], 

642 storageClass=tempStorageClass, 

643 universe=registry.dimensions, 

644 ) 

645 registry.registerDatasetType(datasetType) 

646 registry.storageClasses._unregisterStorageClass(tempStorageClass.name) 

647 datasetType._storageClass = None 

648 del tempStorageClass 

649 # Querying for all dataset types, including components, should include 

650 # at least all non-component dataset types (and I don't want to 

651 # enumerate all of the Exposure components for bias and flat here). 

652 with self.assertWarns(FutureWarning): 

653 with self.assertLogs("lsst.daf.butler.registry", logging.WARN) as cm: 

654 everything = NamedValueSet(registry.queryDatasetTypes(components=True)) 

655 self.assertIn("TempStorageClass", cm.output[0]) 

656 self.assertLess({"bias", "flat", "temporary"}, everything.names) 

657 # It should not include "temporary.columns", because we tried to remove 

658 # the storage class that would tell it about that. So if the next line 

659 # fails (i.e. "temporary.columns" _is_ in everything.names), it means 

660 # this part of the test isn't doing anything, because the _unregister 

661 # call about isn't simulating the real-life case we want it to 

662 # simulate, in which different versions of daf_butler in entirely 

663 # different Python processes interact with the same repo. 

664 self.assertNotIn("temporary.data", everything.names) 

665 # Query for dataset types that start with "temp". This should again 

666 # not include the component, and also not fail. 

667 with self.assertLogs("lsst.daf.butler.registry", logging.WARN) as cm: 

668 startsWithTemp = NamedValueSet(registry.queryDatasetTypes(re.compile("temp.*"), components=True)) 

669 self.assertIn("TempStorageClass", cm.output[0]) 

670 self.assertEqual({"temporary"}, startsWithTemp.names) 

671 # Querying with no components should not warn at all. 

672 with self.assertLogs("lsst.daf.butler.registries", logging.WARN) as cm: 

673 startsWithTemp = NamedValueSet(registry.queryDatasetTypes(re.compile("temp.*"), components=False)) 

674 # Must issue a warning of our own to be captured. 

675 logging.getLogger("lsst.daf.butler.registries").warning("test message") 

676 self.assertEqual(len(cm.output), 1) 

677 self.assertIn("test message", cm.output[0]) 

678 

679 def testComponentLookups(self): 

680 """Test searching for component datasets via their parents. 

681 

682 All of the behavior here is deprecated, so many of these tests are 

683 currently wrapped in a context to check that we get a warning whenever 

684 a component dataset is actually returned. 

685 """ 

686 registry = self.makeRegistry() 

687 self.loadData(registry, "base.yaml") 

688 self.loadData(registry, "datasets.yaml") 

689 # Test getting the child dataset type (which does still exist in the 

690 # Registry), and check for consistency with 

691 # DatasetRef.makeComponentRef. 

692 collection = "imported_g" 

693 parentType = registry.getDatasetType("bias") 

694 childType = registry.getDatasetType("bias.wcs") 

695 parentRefResolved = registry.findDataset( 

696 parentType, collections=collection, instrument="Cam1", detector=1 

697 ) 

698 self.assertIsInstance(parentRefResolved, DatasetRef) 

699 self.assertEqual(childType, parentRefResolved.makeComponentRef("wcs").datasetType) 

700 # Search for a single dataset with findDataset. 

701 childRef1 = registry.findDataset("bias.wcs", collections=collection, dataId=parentRefResolved.dataId) 

702 self.assertEqual(childRef1, parentRefResolved.makeComponentRef("wcs")) 

703 # Search for detector data IDs constrained by component dataset 

704 # existence with queryDataIds. 

705 with self.assertWarns(FutureWarning): 

706 dataIds = registry.queryDataIds( 

707 ["detector"], 

708 datasets=["bias.wcs"], 

709 collections=collection, 

710 ).toSet() 

711 self.assertEqual( 

712 dataIds, 

713 DataCoordinateSet( 

714 { 

715 DataCoordinate.standardize(instrument="Cam1", detector=d, graph=parentType.dimensions) 

716 for d in (1, 2, 3) 

717 }, 

718 parentType.dimensions, 

719 ), 

720 ) 

721 # Search for multiple datasets of a single type with queryDatasets. 

722 with self.assertWarns(FutureWarning): 

723 childRefs2 = set( 

724 registry.queryDatasets( 

725 "bias.wcs", 

726 collections=collection, 

727 ) 

728 ) 

729 self.assertEqual( 

730 {ref.unresolved() for ref in childRefs2}, {DatasetRef(childType, dataId) for dataId in dataIds} 

731 ) 

732 

733 def testCollections(self): 

734 """Tests for registry methods that manage collections.""" 

735 registry = self.makeRegistry() 

736 other_registry = self.makeRegistry(share_repo_with=registry) 

737 self.loadData(registry, "base.yaml") 

738 self.loadData(registry, "datasets.yaml") 

739 run1 = "imported_g" 

740 run2 = "imported_r" 

741 # Test setting a collection docstring after it has been created. 

742 registry.setCollectionDocumentation(run1, "doc for run1") 

743 self.assertEqual(registry.getCollectionDocumentation(run1), "doc for run1") 

744 registry.setCollectionDocumentation(run1, None) 

745 self.assertIsNone(registry.getCollectionDocumentation(run1)) 

746 datasetType = "bias" 

747 # Find some datasets via their run's collection. 

748 dataId1 = {"instrument": "Cam1", "detector": 1} 

749 ref1 = registry.findDataset(datasetType, dataId1, collections=run1) 

750 self.assertIsNotNone(ref1) 

751 dataId2 = {"instrument": "Cam1", "detector": 2} 

752 ref2 = registry.findDataset(datasetType, dataId2, collections=run1) 

753 self.assertIsNotNone(ref2) 

754 # Associate those into a new collection, then look for them there. 

755 tag1 = "tag1" 

756 registry.registerCollection(tag1, type=CollectionType.TAGGED, doc="doc for tag1") 

757 # Check that we can query for old and new collections by type. 

758 self.assertEqual(set(registry.queryCollections(collectionTypes=CollectionType.RUN)), {run1, run2}) 

759 self.assertEqual( 

760 set(registry.queryCollections(collectionTypes={CollectionType.TAGGED, CollectionType.RUN})), 

761 {tag1, run1, run2}, 

762 ) 

763 self.assertEqual(registry.getCollectionDocumentation(tag1), "doc for tag1") 

764 registry.associate(tag1, [ref1, ref2]) 

765 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1) 

766 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

767 # Disassociate one and verify that we can't it there anymore... 

768 registry.disassociate(tag1, [ref1]) 

769 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=tag1)) 

770 # ...but we can still find ref2 in tag1, and ref1 in the run. 

771 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run1), ref1) 

772 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

773 collections = set(registry.queryCollections()) 

774 self.assertEqual(collections, {run1, run2, tag1}) 

775 # Associate both refs into tag1 again; ref2 is already there, but that 

776 # should be a harmless no-op. 

777 registry.associate(tag1, [ref1, ref2]) 

778 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1) 

779 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

780 # Get a different dataset (from a different run) that has the same 

781 # dataset type and data ID as ref2. 

782 ref2b = registry.findDataset(datasetType, dataId2, collections=run2) 

783 self.assertNotEqual(ref2, ref2b) 

784 # Attempting to associate that into tag1 should be an error. 

785 with self.assertRaises(ConflictingDefinitionError): 

786 registry.associate(tag1, [ref2b]) 

787 # That error shouldn't have messed up what we had before. 

788 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1) 

789 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

790 # Attempt to associate the conflicting dataset again, this time with 

791 # a dataset that isn't in the collection and won't cause a conflict. 

792 # Should also fail without modifying anything. 

793 dataId3 = {"instrument": "Cam1", "detector": 3} 

794 ref3 = registry.findDataset(datasetType, dataId3, collections=run1) 

795 with self.assertRaises(ConflictingDefinitionError): 

796 registry.associate(tag1, [ref3, ref2b]) 

797 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1) 

798 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

799 self.assertIsNone(registry.findDataset(datasetType, dataId3, collections=tag1)) 

800 # Register a chained collection that searches [tag1, run2] 

801 chain1 = "chain1" 

802 registry.registerCollection(chain1, type=CollectionType.CHAINED) 

803 self.assertIs(registry.getCollectionType(chain1), CollectionType.CHAINED) 

804 # Chained collection exists, but has no collections in it. 

805 self.assertFalse(registry.getCollectionChain(chain1)) 

806 # If we query for all collections, we should get the chained collection 

807 # only if we don't ask to flatten it (i.e. yield only its children). 

808 self.assertEqual(set(registry.queryCollections(flattenChains=False)), {tag1, run1, run2, chain1}) 

809 self.assertEqual(set(registry.queryCollections(flattenChains=True)), {tag1, run1, run2}) 

810 # Attempt to set its child collections to something circular; that 

811 # should fail. 

812 with self.assertRaises(ValueError): 

813 registry.setCollectionChain(chain1, [tag1, chain1]) 

814 # Add the child collections. 

815 registry.setCollectionChain(chain1, [tag1, run2]) 

816 self.assertEqual(list(registry.getCollectionChain(chain1)), [tag1, run2]) 

817 self.assertEqual(registry.getCollectionParentChains(tag1), {chain1}) 

818 self.assertEqual(registry.getCollectionParentChains(run2), {chain1}) 

819 # Refresh the other registry that points to the same repo, and make 

820 # sure it can see the things we've done (note that this does require 

821 # an explicit refresh(); that's the documented behavior, because 

822 # caching is ~impossible otherwise). 

823 if other_registry is not None: 

824 other_registry.refresh() 

825 self.assertEqual(list(other_registry.getCollectionChain(chain1)), [tag1, run2]) 

826 self.assertEqual(other_registry.getCollectionParentChains(tag1), {chain1}) 

827 self.assertEqual(other_registry.getCollectionParentChains(run2), {chain1}) 

828 # Searching for dataId1 or dataId2 in the chain should return ref1 and 

829 # ref2, because both are in tag1. 

830 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain1), ref1) 

831 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain1), ref2) 

832 # Now disassociate ref2 from tag1. The search (for bias) with 

833 # dataId2 in chain1 should then: 

834 # 1. not find it in tag1 

835 # 2. find a different dataset in run2 

836 registry.disassociate(tag1, [ref2]) 

837 ref2b = registry.findDataset(datasetType, dataId2, collections=chain1) 

838 self.assertNotEqual(ref2b, ref2) 

839 self.assertEqual(ref2b, registry.findDataset(datasetType, dataId2, collections=run2)) 

840 # Define a new chain so we can test recursive chains. 

841 chain2 = "chain2" 

842 registry.registerCollection(chain2, type=CollectionType.CHAINED) 

843 registry.setCollectionChain(chain2, [run2, chain1]) 

844 self.assertEqual(registry.getCollectionParentChains(chain1), {chain2}) 

845 self.assertEqual(registry.getCollectionParentChains(run2), {chain1, chain2}) 

846 # Query for collections matching a regex. 

847 self.assertCountEqual( 

848 list(registry.queryCollections(re.compile("imported_."), flattenChains=False)), 

849 ["imported_r", "imported_g"], 

850 ) 

851 # Query for collections matching a regex or an explicit str. 

852 self.assertCountEqual( 

853 list(registry.queryCollections([re.compile("imported_."), "chain1"], flattenChains=False)), 

854 ["imported_r", "imported_g", "chain1"], 

855 ) 

856 # Search for bias with dataId1 should find it via tag1 in chain2, 

857 # recursing, because is not in run1. 

858 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=run2)) 

859 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain2), ref1) 

860 # Search for bias with dataId2 should find it in run2 (ref2b). 

861 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain2), ref2b) 

862 # Search for a flat that is in run2. That should not be found 

863 # at the front of chain2, because of the restriction to bias 

864 # on run2 there, but it should be found in at the end of chain1. 

865 dataId4 = {"instrument": "Cam1", "detector": 3, "physical_filter": "Cam1-R2"} 

866 ref4 = registry.findDataset("flat", dataId4, collections=run2) 

867 self.assertIsNotNone(ref4) 

868 self.assertEqual(ref4, registry.findDataset("flat", dataId4, collections=chain2)) 

869 # Deleting a collection that's part of a CHAINED collection is not 

870 # allowed, and is exception-safe. 

871 with self.assertRaises(Exception): 

872 registry.removeCollection(run2) 

873 self.assertEqual(registry.getCollectionType(run2), CollectionType.RUN) 

874 with self.assertRaises(Exception): 

875 registry.removeCollection(chain1) 

876 self.assertEqual(registry.getCollectionType(chain1), CollectionType.CHAINED) 

877 # Actually remove chain2, test that it's gone by asking for its type. 

878 registry.removeCollection(chain2) 

879 with self.assertRaises(MissingCollectionError): 

880 registry.getCollectionType(chain2) 

881 # Actually remove run2 and chain1, which should work now. 

882 registry.removeCollection(chain1) 

883 registry.removeCollection(run2) 

884 with self.assertRaises(MissingCollectionError): 

885 registry.getCollectionType(run2) 

886 with self.assertRaises(MissingCollectionError): 

887 registry.getCollectionType(chain1) 

888 # Remove tag1 as well, just to test that we can remove TAGGED 

889 # collections. 

890 registry.removeCollection(tag1) 

891 with self.assertRaises(MissingCollectionError): 

892 registry.getCollectionType(tag1) 

893 

894 def testCollectionChainFlatten(self): 

895 """Test that Registry.setCollectionChain obeys its 'flatten' option.""" 

896 registry = self.makeRegistry() 

897 registry.registerCollection("inner", CollectionType.CHAINED) 

898 registry.registerCollection("innermost", CollectionType.RUN) 

899 registry.setCollectionChain("inner", ["innermost"]) 

900 registry.registerCollection("outer", CollectionType.CHAINED) 

901 registry.setCollectionChain("outer", ["inner"], flatten=False) 

902 self.assertEqual(list(registry.getCollectionChain("outer")), ["inner"]) 

903 registry.setCollectionChain("outer", ["inner"], flatten=True) 

904 self.assertEqual(list(registry.getCollectionChain("outer")), ["innermost"]) 

905 

906 def testBasicTransaction(self): 

907 """Test that all operations within a single transaction block are 

908 rolled back if an exception propagates out of the block. 

909 """ 

910 registry = self.makeRegistry() 

911 storageClass = StorageClass("testDatasetType") 

912 registry.storageClasses.registerStorageClass(storageClass) 

913 with registry.transaction(): 

914 registry.insertDimensionData("instrument", {"name": "Cam1", "class_name": "A"}) 

915 with self.assertRaises(ValueError): 

916 with registry.transaction(): 

917 registry.insertDimensionData("instrument", {"name": "Cam2"}) 

918 raise ValueError("Oops, something went wrong") 

919 # Cam1 should exist 

920 self.assertEqual(registry.expandDataId(instrument="Cam1").records["instrument"].class_name, "A") 

921 # But Cam2 and Cam3 should both not exist 

922 with self.assertRaises(DataIdValueError): 

923 registry.expandDataId(instrument="Cam2") 

924 with self.assertRaises(DataIdValueError): 

925 registry.expandDataId(instrument="Cam3") 

926 

927 def testNestedTransaction(self): 

928 """Test that operations within a transaction block are not rolled back 

929 if an exception propagates out of an inner transaction block and is 

930 then caught. 

931 """ 

932 registry = self.makeRegistry() 

933 dimension = registry.dimensions["instrument"] 

934 dataId1 = {"instrument": "DummyCam"} 

935 dataId2 = {"instrument": "DummyCam2"} 

936 checkpointReached = False 

937 with registry.transaction(): 

938 # This should be added and (ultimately) committed. 

939 registry.insertDimensionData(dimension, dataId1) 

940 with self.assertRaises(sqlalchemy.exc.IntegrityError): 

941 with registry.transaction(savepoint=True): 

942 # This does not conflict, and should succeed (but not 

943 # be committed). 

944 registry.insertDimensionData(dimension, dataId2) 

945 checkpointReached = True 

946 # This should conflict and raise, triggerring a rollback 

947 # of the previous insertion within the same transaction 

948 # context, but not the original insertion in the outer 

949 # block. 

950 registry.insertDimensionData(dimension, dataId1) 

951 self.assertTrue(checkpointReached) 

952 self.assertIsNotNone(registry.expandDataId(dataId1, graph=dimension.graph)) 

953 with self.assertRaises(DataIdValueError): 

954 registry.expandDataId(dataId2, graph=dimension.graph) 

955 

956 def testInstrumentDimensions(self): 

957 """Test queries involving only instrument dimensions, with no joins to 

958 skymap.""" 

959 registry = self.makeRegistry() 

960 

961 # need a bunch of dimensions and datasets for test 

962 registry.insertDimensionData( 

963 "instrument", dict(name="DummyCam", visit_max=25, exposure_max=300, detector_max=6) 

964 ) 

965 registry.insertDimensionData( 

966 "physical_filter", 

967 dict(instrument="DummyCam", name="dummy_r", band="r"), 

968 dict(instrument="DummyCam", name="dummy_i", band="i"), 

969 ) 

970 registry.insertDimensionData( 

971 "detector", *[dict(instrument="DummyCam", id=i, full_name=str(i)) for i in range(1, 6)] 

972 ) 

973 registry.insertDimensionData( 

974 "visit_system", 

975 dict(instrument="DummyCam", id=1, name="default"), 

976 ) 

977 registry.insertDimensionData( 

978 "visit", 

979 dict(instrument="DummyCam", id=10, name="ten", physical_filter="dummy_i", visit_system=1), 

980 dict(instrument="DummyCam", id=11, name="eleven", physical_filter="dummy_r", visit_system=1), 

981 dict(instrument="DummyCam", id=20, name="twelve", physical_filter="dummy_r", visit_system=1), 

982 ) 

983 for i in range(1, 6): 

984 registry.insertDimensionData( 

985 "visit_detector_region", 

986 dict(instrument="DummyCam", visit=10, detector=i), 

987 dict(instrument="DummyCam", visit=11, detector=i), 

988 dict(instrument="DummyCam", visit=20, detector=i), 

989 ) 

990 registry.insertDimensionData( 

991 "exposure", 

992 dict(instrument="DummyCam", id=100, obs_id="100", physical_filter="dummy_i"), 

993 dict(instrument="DummyCam", id=101, obs_id="101", physical_filter="dummy_i"), 

994 dict(instrument="DummyCam", id=110, obs_id="110", physical_filter="dummy_r"), 

995 dict(instrument="DummyCam", id=111, obs_id="111", physical_filter="dummy_r"), 

996 dict(instrument="DummyCam", id=200, obs_id="200", physical_filter="dummy_r"), 

997 dict(instrument="DummyCam", id=201, obs_id="201", physical_filter="dummy_r"), 

998 ) 

999 registry.insertDimensionData( 

1000 "visit_definition", 

1001 dict(instrument="DummyCam", exposure=100, visit_system=1, visit=10), 

1002 dict(instrument="DummyCam", exposure=101, visit_system=1, visit=10), 

1003 dict(instrument="DummyCam", exposure=110, visit_system=1, visit=11), 

1004 dict(instrument="DummyCam", exposure=111, visit_system=1, visit=11), 

1005 dict(instrument="DummyCam", exposure=200, visit_system=1, visit=20), 

1006 dict(instrument="DummyCam", exposure=201, visit_system=1, visit=20), 

1007 ) 

1008 # dataset types 

1009 run1 = "test1_r" 

1010 run2 = "test2_r" 

1011 tagged2 = "test2_t" 

1012 registry.registerRun(run1) 

1013 registry.registerRun(run2) 

1014 registry.registerCollection(tagged2) 

1015 storageClass = StorageClass("testDataset") 

1016 registry.storageClasses.registerStorageClass(storageClass) 

1017 rawType = DatasetType( 

1018 name="RAW", 

1019 dimensions=registry.dimensions.extract(("instrument", "exposure", "detector")), 

1020 storageClass=storageClass, 

1021 ) 

1022 registry.registerDatasetType(rawType) 

1023 calexpType = DatasetType( 

1024 name="CALEXP", 

1025 dimensions=registry.dimensions.extract(("instrument", "visit", "detector")), 

1026 storageClass=storageClass, 

1027 ) 

1028 registry.registerDatasetType(calexpType) 

1029 

1030 # add pre-existing datasets 

1031 for exposure in (100, 101, 110, 111): 

1032 for detector in (1, 2, 3): 

1033 # note that only 3 of 5 detectors have datasets 

1034 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector) 

1035 (ref,) = registry.insertDatasets(rawType, dataIds=[dataId], run=run1) 

1036 # exposures 100 and 101 appear in both run1 and tagged2. 

1037 # 100 has different datasets in the different collections 

1038 # 101 has the same dataset in both collections. 

1039 if exposure == 100: 

1040 (ref,) = registry.insertDatasets(rawType, dataIds=[dataId], run=run2) 

1041 if exposure in (100, 101): 

1042 registry.associate(tagged2, [ref]) 

1043 # Add pre-existing datasets to tagged2. 

1044 for exposure in (200, 201): 

1045 for detector in (3, 4, 5): 

1046 # note that only 3 of 5 detectors have datasets 

1047 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector) 

1048 (ref,) = registry.insertDatasets(rawType, dataIds=[dataId], run=run2) 

1049 registry.associate(tagged2, [ref]) 

1050 

1051 dimensions = DimensionGraph( 

1052 registry.dimensions, dimensions=(rawType.dimensions.required | calexpType.dimensions.required) 

1053 ) 

1054 # Test that single dim string works as well as list of str 

1055 rows = registry.queryDataIds("visit", datasets=rawType, collections=run1).expanded().toSet() 

1056 rowsI = registry.queryDataIds(["visit"], datasets=rawType, collections=run1).expanded().toSet() 

1057 self.assertEqual(rows, rowsI) 

1058 # with empty expression 

1059 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1).expanded().toSet() 

1060 self.assertEqual(len(rows), 4 * 3) # 4 exposures times 3 detectors 

1061 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101, 110, 111)) 

1062 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 11)) 

1063 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3)) 

1064 

1065 # second collection 

1066 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=tagged2).toSet() 

1067 self.assertEqual(len(rows), 4 * 3) # 4 exposures times 3 detectors 

1068 for dataId in rows: 

1069 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit")) 

1070 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101, 200, 201)) 

1071 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 20)) 

1072 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3, 4, 5)) 

1073 

1074 # with two input datasets 

1075 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=[run1, tagged2]).toSet() 

1076 self.assertEqual(len(set(rows)), 6 * 3) # 6 exposures times 3 detectors; set needed to de-dupe 

1077 for dataId in rows: 

1078 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit")) 

1079 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101, 110, 111, 200, 201)) 

1080 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 11, 20)) 

1081 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3, 4, 5)) 

1082 

1083 # limit to single visit 

1084 rows = registry.queryDataIds( 

1085 dimensions, datasets=rawType, collections=run1, where="visit = 10", instrument="DummyCam" 

1086 ).toSet() 

1087 self.assertEqual(len(rows), 2 * 3) # 2 exposures times 3 detectors 

1088 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101)) 

1089 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10,)) 

1090 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3)) 

1091 

1092 # more limiting expression, using link names instead of Table.column 

1093 rows = registry.queryDataIds( 

1094 dimensions, 

1095 datasets=rawType, 

1096 collections=run1, 

1097 where="visit = 10 and detector > 1 and 'DummyCam'=instrument", 

1098 ).toSet() 

1099 self.assertEqual(len(rows), 2 * 2) # 2 exposures times 2 detectors 

1100 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101)) 

1101 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10,)) 

1102 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (2, 3)) 

1103 

1104 # queryDataIds with only one of `datasets` and `collections` is an 

1105 # error. 

1106 with self.assertRaises(CollectionError): 

1107 registry.queryDataIds(dimensions, datasets=rawType) 

1108 with self.assertRaises(ArgumentError): 

1109 registry.queryDataIds(dimensions, collections=run1) 

1110 

1111 # expression excludes everything 

1112 rows = registry.queryDataIds( 

1113 dimensions, datasets=rawType, collections=run1, where="visit > 1000", instrument="DummyCam" 

1114 ).toSet() 

1115 self.assertEqual(len(rows), 0) 

1116 

1117 # Selecting by physical_filter, this is not in the dimensions, but it 

1118 # is a part of the full expression so it should work too. 

1119 rows = registry.queryDataIds( 

1120 dimensions, 

1121 datasets=rawType, 

1122 collections=run1, 

1123 where="physical_filter = 'dummy_r'", 

1124 instrument="DummyCam", 

1125 ).toSet() 

1126 self.assertEqual(len(rows), 2 * 3) # 2 exposures times 3 detectors 

1127 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (110, 111)) 

1128 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (11,)) 

1129 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3)) 

1130 

1131 def testSkyMapDimensions(self): 

1132 """Tests involving only skymap dimensions, no joins to instrument.""" 

1133 registry = self.makeRegistry() 

1134 

1135 # need a bunch of dimensions and datasets for test, we want 

1136 # "band" in the test so also have to add physical_filter 

1137 # dimensions 

1138 registry.insertDimensionData("instrument", dict(instrument="DummyCam")) 

1139 registry.insertDimensionData( 

1140 "physical_filter", 

1141 dict(instrument="DummyCam", name="dummy_r", band="r"), 

1142 dict(instrument="DummyCam", name="dummy_i", band="i"), 

1143 ) 

1144 registry.insertDimensionData("skymap", dict(name="DummyMap", hash="sha!".encode("utf8"))) 

1145 for tract in range(10): 

1146 registry.insertDimensionData("tract", dict(skymap="DummyMap", id=tract)) 

1147 registry.insertDimensionData( 

1148 "patch", 

1149 *[dict(skymap="DummyMap", tract=tract, id=patch, cell_x=0, cell_y=0) for patch in range(10)], 

1150 ) 

1151 

1152 # dataset types 

1153 run = "tésτ" 

1154 registry.registerRun(run) 

1155 storageClass = StorageClass("testDataset") 

1156 registry.storageClasses.registerStorageClass(storageClass) 

1157 calexpType = DatasetType( 

1158 name="deepCoadd_calexp", 

1159 dimensions=registry.dimensions.extract(("skymap", "tract", "patch", "band")), 

1160 storageClass=storageClass, 

1161 ) 

1162 registry.registerDatasetType(calexpType) 

1163 mergeType = DatasetType( 

1164 name="deepCoadd_mergeDet", 

1165 dimensions=registry.dimensions.extract(("skymap", "tract", "patch")), 

1166 storageClass=storageClass, 

1167 ) 

1168 registry.registerDatasetType(mergeType) 

1169 measType = DatasetType( 

1170 name="deepCoadd_meas", 

1171 dimensions=registry.dimensions.extract(("skymap", "tract", "patch", "band")), 

1172 storageClass=storageClass, 

1173 ) 

1174 registry.registerDatasetType(measType) 

1175 

1176 dimensions = DimensionGraph( 

1177 registry.dimensions, 

1178 dimensions=( 

1179 calexpType.dimensions.required | mergeType.dimensions.required | measType.dimensions.required 

1180 ), 

1181 ) 

1182 

1183 # add pre-existing datasets 

1184 for tract in (1, 3, 5): 

1185 for patch in (2, 4, 6, 7): 

1186 dataId = dict(skymap="DummyMap", tract=tract, patch=patch) 

1187 registry.insertDatasets(mergeType, dataIds=[dataId], run=run) 

1188 for aFilter in ("i", "r"): 

1189 dataId = dict(skymap="DummyMap", tract=tract, patch=patch, band=aFilter) 

1190 registry.insertDatasets(calexpType, dataIds=[dataId], run=run) 

1191 

1192 # with empty expression 

1193 rows = registry.queryDataIds(dimensions, datasets=[calexpType, mergeType], collections=run).toSet() 

1194 self.assertEqual(len(rows), 3 * 4 * 2) # 4 tracts x 4 patches x 2 filters 

1195 for dataId in rows: 

1196 self.assertCountEqual(dataId.keys(), ("skymap", "tract", "patch", "band")) 

1197 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 3, 5)) 

1198 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 4, 6, 7)) 

1199 self.assertCountEqual(set(dataId["band"] for dataId in rows), ("i", "r")) 

1200 

1201 # limit to 2 tracts and 2 patches 

1202 rows = registry.queryDataIds( 

1203 dimensions, 

1204 datasets=[calexpType, mergeType], 

1205 collections=run, 

1206 where="tract IN (1, 5) AND patch IN (2, 7)", 

1207 skymap="DummyMap", 

1208 ).toSet() 

1209 self.assertEqual(len(rows), 2 * 2 * 2) # 2 tracts x 2 patches x 2 filters 

1210 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 5)) 

1211 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 7)) 

1212 self.assertCountEqual(set(dataId["band"] for dataId in rows), ("i", "r")) 

1213 

1214 # limit to single filter 

1215 rows = registry.queryDataIds( 

1216 dimensions, datasets=[calexpType, mergeType], collections=run, where="band = 'i'" 

1217 ).toSet() 

1218 self.assertEqual(len(rows), 3 * 4 * 1) # 4 tracts x 4 patches x 2 filters 

1219 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 3, 5)) 

1220 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 4, 6, 7)) 

1221 self.assertCountEqual(set(dataId["band"] for dataId in rows), ("i",)) 

1222 

1223 # Specifying non-existing skymap is an exception 

1224 with self.assertRaisesRegex(DataIdValueError, "Unknown values specified for governor dimension"): 

1225 rows = registry.queryDataIds( 

1226 dimensions, datasets=[calexpType, mergeType], collections=run, where="skymap = 'Mars'" 

1227 ).toSet() 

1228 

1229 def testSpatialJoin(self): 

1230 """Test queries that involve spatial overlap joins.""" 

1231 registry = self.makeRegistry() 

1232 self.loadData(registry, "hsc-rc2-subset.yaml") 

1233 

1234 # Dictionary of spatial DatabaseDimensionElements, keyed by the name of 

1235 # the TopologicalFamily they belong to. We'll relate all elements in 

1236 # each family to all of the elements in each other family. 

1237 families = defaultdict(set) 

1238 # Dictionary of {element.name: {dataId: region}}. 

1239 regions = {} 

1240 for element in registry.dimensions.getDatabaseElements(): 

1241 if element.spatial is not None: 

1242 families[element.spatial.name].add(element) 

1243 regions[element.name] = { 

1244 record.dataId: record.region for record in registry.queryDimensionRecords(element) 

1245 } 

1246 

1247 # If this check fails, it's not necessarily a problem - it may just be 

1248 # a reasonable change to the default dimension definitions - but the 

1249 # test below depends on there being more than one family to do anything 

1250 # useful. 

1251 self.assertEqual(len(families), 2) 

1252 

1253 # Overlap DatabaseDimensionElements with each other. 

1254 for family1, family2 in itertools.combinations(families, 2): 

1255 for element1, element2 in itertools.product(families[family1], families[family2]): 

1256 graph = DimensionGraph.union(element1.graph, element2.graph) 

1257 # Construct expected set of overlapping data IDs via a 

1258 # brute-force comparison of the regions we've already fetched. 

1259 expected = { 

1260 DataCoordinate.standardize({**dataId1.byName(), **dataId2.byName()}, graph=graph) 

1261 for (dataId1, region1), (dataId2, region2) in itertools.product( 

1262 regions[element1.name].items(), regions[element2.name].items() 

1263 ) 

1264 if not region1.isDisjointFrom(region2) 

1265 } 

1266 self.assertGreater(len(expected), 2, msg="Test that we aren't just comparing empty sets.") 

1267 queried = set(registry.queryDataIds(graph)) 

1268 self.assertEqual(expected, queried) 

1269 

1270 # Overlap each DatabaseDimensionElement with the commonSkyPix system. 

1271 commonSkyPix = registry.dimensions.commonSkyPix 

1272 for elementName, regions in regions.items(): 

1273 graph = DimensionGraph.union(registry.dimensions[elementName].graph, commonSkyPix.graph) 

1274 expected = set() 

1275 for dataId, region in regions.items(): 

1276 for begin, end in commonSkyPix.pixelization.envelope(region): 

1277 expected.update( 

1278 DataCoordinate.standardize({commonSkyPix.name: index, **dataId.byName()}, graph=graph) 

1279 for index in range(begin, end) 

1280 ) 

1281 self.assertGreater(len(expected), 2, msg="Test that we aren't just comparing empty sets.") 

1282 queried = set(registry.queryDataIds(graph)) 

1283 self.assertEqual(expected, queried) 

1284 

1285 def testAbstractQuery(self): 

1286 """Test that we can run a query that just lists the known 

1287 bands. This is tricky because band is 

1288 backed by a query against physical_filter. 

1289 """ 

1290 registry = self.makeRegistry() 

1291 registry.insertDimensionData("instrument", dict(name="DummyCam")) 

1292 registry.insertDimensionData( 

1293 "physical_filter", 

1294 dict(instrument="DummyCam", name="dummy_i", band="i"), 

1295 dict(instrument="DummyCam", name="dummy_i2", band="i"), 

1296 dict(instrument="DummyCam", name="dummy_r", band="r"), 

1297 ) 

1298 rows = registry.queryDataIds(["band"]).toSet() 

1299 self.assertCountEqual( 

1300 rows, 

1301 [ 

1302 DataCoordinate.standardize(band="i", universe=registry.dimensions), 

1303 DataCoordinate.standardize(band="r", universe=registry.dimensions), 

1304 ], 

1305 ) 

1306 

1307 def testAttributeManager(self): 

1308 """Test basic functionality of attribute manager.""" 

1309 # number of attributes with schema versions in a fresh database, 

1310 # 6 managers with 2 records per manager, plus config for dimensions 

1311 VERSION_COUNT = 6 * 2 + 1 

1312 

1313 registry = self.makeRegistry() 

1314 attributes = registry._managers.attributes 

1315 

1316 # check what get() returns for non-existing key 

1317 self.assertIsNone(attributes.get("attr")) 

1318 self.assertEqual(attributes.get("attr", ""), "") 

1319 self.assertEqual(attributes.get("attr", "Value"), "Value") 

1320 self.assertEqual(len(list(attributes.items())), VERSION_COUNT) 

1321 

1322 # cannot store empty key or value 

1323 with self.assertRaises(ValueError): 

1324 attributes.set("", "value") 

1325 with self.assertRaises(ValueError): 

1326 attributes.set("attr", "") 

1327 

1328 # set value of non-existing key 

1329 attributes.set("attr", "value") 

1330 self.assertEqual(len(list(attributes.items())), VERSION_COUNT + 1) 

1331 self.assertEqual(attributes.get("attr"), "value") 

1332 

1333 # update value of existing key 

1334 with self.assertRaises(ButlerAttributeExistsError): 

1335 attributes.set("attr", "value2") 

1336 

1337 attributes.set("attr", "value2", force=True) 

1338 self.assertEqual(len(list(attributes.items())), VERSION_COUNT + 1) 

1339 self.assertEqual(attributes.get("attr"), "value2") 

1340 

1341 # delete existing key 

1342 self.assertTrue(attributes.delete("attr")) 

1343 self.assertEqual(len(list(attributes.items())), VERSION_COUNT) 

1344 

1345 # delete non-existing key 

1346 self.assertFalse(attributes.delete("non-attr")) 

1347 

1348 # store bunch of keys and get the list back 

1349 data = [ 

1350 ("version.core", "1.2.3"), 

1351 ("version.dimensions", "3.2.1"), 

1352 ("config.managers.opaque", "ByNameOpaqueTableStorageManager"), 

1353 ] 

1354 for key, value in data: 

1355 attributes.set(key, value) 

1356 items = dict(attributes.items()) 

1357 for key, value in data: 

1358 self.assertEqual(items[key], value) 

1359 

1360 def testQueryDatasetsDeduplication(self): 

1361 """Test that the findFirst option to queryDatasets selects datasets 

1362 from collections in the order given". 

1363 """ 

1364 registry = self.makeRegistry() 

1365 self.loadData(registry, "base.yaml") 

1366 self.loadData(registry, "datasets.yaml") 

1367 self.assertCountEqual( 

1368 list(registry.queryDatasets("bias", collections=["imported_g", "imported_r"])), 

1369 [ 

1370 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"), 

1371 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g"), 

1372 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g"), 

1373 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r"), 

1374 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r"), 

1375 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

1376 ], 

1377 ) 

1378 self.assertCountEqual( 

1379 list(registry.queryDatasets("bias", collections=["imported_g", "imported_r"], findFirst=True)), 

1380 [ 

1381 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"), 

1382 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g"), 

1383 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g"), 

1384 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

1385 ], 

1386 ) 

1387 self.assertCountEqual( 

1388 list(registry.queryDatasets("bias", collections=["imported_r", "imported_g"], findFirst=True)), 

1389 [ 

1390 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"), 

1391 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r"), 

1392 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r"), 

1393 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

1394 ], 

1395 ) 

1396 

1397 def testQueryResults(self): 

1398 """Test querying for data IDs and then manipulating the QueryResults 

1399 object returned to perform other queries. 

1400 """ 

1401 registry = self.makeRegistry() 

1402 self.loadData(registry, "base.yaml") 

1403 self.loadData(registry, "datasets.yaml") 

1404 bias = registry.getDatasetType("bias") 

1405 flat = registry.getDatasetType("flat") 

1406 # Obtain expected results from methods other than those we're testing 

1407 # here. That includes: 

1408 # - the dimensions of the data IDs we want to query: 

1409 expectedGraph = DimensionGraph(registry.dimensions, names=["detector", "physical_filter"]) 

1410 # - the dimensions of some other data IDs we'll extract from that: 

1411 expectedSubsetGraph = DimensionGraph(registry.dimensions, names=["detector"]) 

1412 # - the data IDs we expect to obtain from the first queries: 

1413 expectedDataIds = DataCoordinateSet( 

1414 { 

1415 DataCoordinate.standardize( 

1416 instrument="Cam1", detector=d, physical_filter=p, universe=registry.dimensions 

1417 ) 

1418 for d, p in itertools.product({1, 2, 3}, {"Cam1-G", "Cam1-R1", "Cam1-R2"}) 

1419 }, 

1420 graph=expectedGraph, 

1421 hasFull=False, 

1422 hasRecords=False, 

1423 ) 

1424 # - the flat datasets we expect to find from those data IDs, in just 

1425 # one collection (so deduplication is irrelevant): 

1426 expectedFlats = [ 

1427 registry.findDataset( 

1428 flat, instrument="Cam1", detector=1, physical_filter="Cam1-R1", collections="imported_r" 

1429 ), 

1430 registry.findDataset( 

1431 flat, instrument="Cam1", detector=2, physical_filter="Cam1-R1", collections="imported_r" 

1432 ), 

1433 registry.findDataset( 

1434 flat, instrument="Cam1", detector=3, physical_filter="Cam1-R2", collections="imported_r" 

1435 ), 

1436 ] 

1437 # - the data IDs we expect to extract from that: 

1438 expectedSubsetDataIds = expectedDataIds.subset(expectedSubsetGraph) 

1439 # - the bias datasets we expect to find from those data IDs, after we 

1440 # subset-out the physical_filter dimension, both with duplicates: 

1441 expectedAllBiases = [ 

1442 registry.findDataset(bias, instrument="Cam1", detector=1, collections="imported_g"), 

1443 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_g"), 

1444 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_g"), 

1445 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_r"), 

1446 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_r"), 

1447 ] 

1448 # - ...and without duplicates: 

1449 expectedDeduplicatedBiases = [ 

1450 registry.findDataset(bias, instrument="Cam1", detector=1, collections="imported_g"), 

1451 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_r"), 

1452 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_r"), 

1453 ] 

1454 # Test against those expected results, using a "lazy" query for the 

1455 # data IDs (which re-executes that query each time we use it to do 

1456 # something new). 

1457 dataIds = registry.queryDataIds( 

1458 ["detector", "physical_filter"], 

1459 where="detector.purpose = 'SCIENCE'", # this rejects detector=4 

1460 instrument="Cam1", 

1461 ) 

1462 self.assertEqual(dataIds.graph, expectedGraph) 

1463 self.assertEqual(dataIds.toSet(), expectedDataIds) 

1464 self.assertCountEqual( 

1465 list( 

1466 dataIds.findDatasets( 

1467 flat, 

1468 collections=["imported_r"], 

1469 ) 

1470 ), 

1471 expectedFlats, 

1472 ) 

1473 subsetDataIds = dataIds.subset(expectedSubsetGraph, unique=True) 

1474 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph) 

1475 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds) 

1476 self.assertCountEqual( 

1477 list(subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=False)), 

1478 expectedAllBiases, 

1479 ) 

1480 self.assertCountEqual( 

1481 list(subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=True)), 

1482 expectedDeduplicatedBiases, 

1483 ) 

1484 

1485 # Check dimensions match. 

1486 with self.assertRaises(ValueError): 

1487 subsetDataIds.findDatasets("flat", collections=["imported_r", "imported_g"], findFirst=True) 

1488 

1489 # Use a component dataset type. 

1490 self.assertCountEqual( 

1491 [ 

1492 ref.makeComponentRef("image") 

1493 for ref in subsetDataIds.findDatasets( 

1494 bias, 

1495 collections=["imported_r", "imported_g"], 

1496 findFirst=False, 

1497 ) 

1498 ], 

1499 [ref.makeComponentRef("image") for ref in expectedAllBiases], 

1500 ) 

1501 

1502 # Use a named dataset type that does not exist and a dataset type 

1503 # object that does not exist. 

1504 unknown_type = DatasetType("not_known", dimensions=bias.dimensions, storageClass="Exposure") 

1505 

1506 # Test both string name and dataset type object. 

1507 test_type: Union[str, DatasetType] 

1508 for test_type, test_type_name in ( 

1509 (unknown_type, unknown_type.name), 

1510 (unknown_type.name, unknown_type.name), 

1511 ): 

1512 with self.assertRaisesRegex(DatasetTypeError, expected_regex=test_type_name): 

1513 list( 

1514 subsetDataIds.findDatasets( 

1515 test_type, collections=["imported_r", "imported_g"], findFirst=True 

1516 ) 

1517 ) 

1518 

1519 # Materialize the bias dataset queries (only) by putting the results 

1520 # into temporary tables, then repeat those tests. 

1521 with subsetDataIds.findDatasets( 

1522 bias, collections=["imported_r", "imported_g"], findFirst=False 

1523 ).materialize() as biases: 

1524 self.assertCountEqual(list(biases), expectedAllBiases) 

1525 with subsetDataIds.findDatasets( 

1526 bias, collections=["imported_r", "imported_g"], findFirst=True 

1527 ).materialize() as biases: 

1528 self.assertCountEqual(list(biases), expectedDeduplicatedBiases) 

1529 # Materialize the data ID subset query, but not the dataset queries. 

1530 with subsetDataIds.materialize() as subsetDataIds: 

1531 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph) 

1532 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds) 

1533 self.assertCountEqual( 

1534 list( 

1535 subsetDataIds.findDatasets( 

1536 bias, collections=["imported_r", "imported_g"], findFirst=False 

1537 ) 

1538 ), 

1539 expectedAllBiases, 

1540 ) 

1541 self.assertCountEqual( 

1542 list( 

1543 subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=True) 

1544 ), 

1545 expectedDeduplicatedBiases, 

1546 ) 

1547 # Materialize the dataset queries, too. 

1548 with subsetDataIds.findDatasets( 

1549 bias, collections=["imported_r", "imported_g"], findFirst=False 

1550 ).materialize() as biases: 

1551 self.assertCountEqual(list(biases), expectedAllBiases) 

1552 with subsetDataIds.findDatasets( 

1553 bias, collections=["imported_r", "imported_g"], findFirst=True 

1554 ).materialize() as biases: 

1555 self.assertCountEqual(list(biases), expectedDeduplicatedBiases) 

1556 # Materialize the original query, but none of the follow-up queries. 

1557 with dataIds.materialize() as dataIds: 

1558 self.assertEqual(dataIds.graph, expectedGraph) 

1559 self.assertEqual(dataIds.toSet(), expectedDataIds) 

1560 self.assertCountEqual( 

1561 list( 

1562 dataIds.findDatasets( 

1563 flat, 

1564 collections=["imported_r"], 

1565 ) 

1566 ), 

1567 expectedFlats, 

1568 ) 

1569 subsetDataIds = dataIds.subset(expectedSubsetGraph, unique=True) 

1570 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph) 

1571 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds) 

1572 self.assertCountEqual( 

1573 list( 

1574 subsetDataIds.findDatasets( 

1575 bias, collections=["imported_r", "imported_g"], findFirst=False 

1576 ) 

1577 ), 

1578 expectedAllBiases, 

1579 ) 

1580 self.assertCountEqual( 

1581 list( 

1582 subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=True) 

1583 ), 

1584 expectedDeduplicatedBiases, 

1585 ) 

1586 # Materialize just the bias dataset queries. 

1587 with subsetDataIds.findDatasets( 

1588 bias, collections=["imported_r", "imported_g"], findFirst=False 

1589 ).materialize() as biases: 

1590 self.assertCountEqual(list(biases), expectedAllBiases) 

1591 with subsetDataIds.findDatasets( 

1592 bias, collections=["imported_r", "imported_g"], findFirst=True 

1593 ).materialize() as biases: 

1594 self.assertCountEqual(list(biases), expectedDeduplicatedBiases) 

1595 # Materialize the subset data ID query, but not the dataset 

1596 # queries. 

1597 with subsetDataIds.materialize() as subsetDataIds: 

1598 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph) 

1599 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds) 

1600 self.assertCountEqual( 

1601 list( 

1602 subsetDataIds.findDatasets( 

1603 bias, collections=["imported_r", "imported_g"], findFirst=False 

1604 ) 

1605 ), 

1606 expectedAllBiases, 

1607 ) 

1608 self.assertCountEqual( 

1609 list( 

1610 subsetDataIds.findDatasets( 

1611 bias, collections=["imported_r", "imported_g"], findFirst=True 

1612 ) 

1613 ), 

1614 expectedDeduplicatedBiases, 

1615 ) 

1616 # Materialize the bias dataset queries, too, so now we're 

1617 # materializing every single step. 

1618 with subsetDataIds.findDatasets( 

1619 bias, collections=["imported_r", "imported_g"], findFirst=False 

1620 ).materialize() as biases: 

1621 self.assertCountEqual(list(biases), expectedAllBiases) 

1622 with subsetDataIds.findDatasets( 

1623 bias, collections=["imported_r", "imported_g"], findFirst=True 

1624 ).materialize() as biases: 

1625 self.assertCountEqual(list(biases), expectedDeduplicatedBiases) 

1626 

1627 def testStorageClassPropagation(self): 

1628 """Test that queries for datasets respect the storage class passed in 

1629 as part of a full dataset type. 

1630 """ 

1631 registry = self.makeRegistry() 

1632 self.loadData(registry, "base.yaml") 

1633 dataset_type_in_registry = DatasetType( 

1634 "tbl", dimensions=["instrument"], storageClass="DataFrame", universe=registry.dimensions 

1635 ) 

1636 registry.registerDatasetType(dataset_type_in_registry) 

1637 run = "run1" 

1638 registry.registerRun(run) 

1639 (inserted_ref,) = registry.insertDatasets( 

1640 dataset_type_in_registry, [registry.expandDataId(instrument="Cam1")], run=run 

1641 ) 

1642 self.assertEqual(inserted_ref.datasetType, dataset_type_in_registry) 

1643 query_dataset_type = DatasetType( 

1644 "tbl", dimensions=["instrument"], storageClass="ArrowAstropy", universe=registry.dimensions 

1645 ) 

1646 self.assertNotEqual(dataset_type_in_registry, query_dataset_type) 

1647 query_datasets_result = registry.queryDatasets(query_dataset_type, collections=[run]) 

1648 self.assertEqual(query_datasets_result.parentDatasetType, query_dataset_type) # type: ignore 

1649 (query_datasets_ref,) = query_datasets_result 

1650 self.assertEqual(query_datasets_ref.datasetType, query_dataset_type) 

1651 query_data_ids_find_datasets_result = registry.queryDataIds(["instrument"]).findDatasets( 

1652 query_dataset_type, collections=[run] 

1653 ) 

1654 self.assertEqual(query_data_ids_find_datasets_result.parentDatasetType, query_dataset_type) 

1655 (query_data_ids_find_datasets_ref,) = query_data_ids_find_datasets_result 

1656 self.assertEqual(query_data_ids_find_datasets_ref.datasetType, query_dataset_type) 

1657 query_dataset_types_result = registry.queryDatasetTypes(query_dataset_type) 

1658 self.assertEqual(list(query_dataset_types_result), [query_dataset_type]) 

1659 find_dataset_ref = registry.findDataset(query_dataset_type, instrument="Cam1", collections=[run]) 

1660 self.assertEqual(find_dataset_ref.datasetType, query_dataset_type) 

1661 

1662 def testEmptyDimensionsQueries(self): 

1663 """Test Query and QueryResults objects in the case where there are no 

1664 dimensions. 

1665 """ 

1666 # Set up test data: one dataset type, two runs, one dataset in each. 

1667 registry = self.makeRegistry() 

1668 self.loadData(registry, "base.yaml") 

1669 schema = DatasetType("schema", dimensions=registry.dimensions.empty, storageClass="Catalog") 

1670 registry.registerDatasetType(schema) 

1671 dataId = DataCoordinate.makeEmpty(registry.dimensions) 

1672 run1 = "run1" 

1673 run2 = "run2" 

1674 registry.registerRun(run1) 

1675 registry.registerRun(run2) 

1676 (dataset1,) = registry.insertDatasets(schema, dataIds=[dataId], run=run1) 

1677 (dataset2,) = registry.insertDatasets(schema, dataIds=[dataId], run=run2) 

1678 # Query directly for both of the datasets, and each one, one at a time. 

1679 self.checkQueryResults( 

1680 registry.queryDatasets(schema, collections=[run1, run2], findFirst=False), [dataset1, dataset2] 

1681 ) 

1682 self.checkQueryResults( 

1683 registry.queryDatasets(schema, collections=[run1, run2], findFirst=True), 

1684 [dataset1], 

1685 ) 

1686 self.checkQueryResults( 

1687 registry.queryDatasets(schema, collections=[run2, run1], findFirst=True), 

1688 [dataset2], 

1689 ) 

1690 # Query for data IDs with no dimensions. 

1691 dataIds = registry.queryDataIds([]) 

1692 self.checkQueryResults(dataIds, [dataId]) 

1693 # Use queried data IDs to find the datasets. 

1694 self.checkQueryResults( 

1695 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False), 

1696 [dataset1, dataset2], 

1697 ) 

1698 self.checkQueryResults( 

1699 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), 

1700 [dataset1], 

1701 ) 

1702 self.checkQueryResults( 

1703 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), 

1704 [dataset2], 

1705 ) 

1706 # Now materialize the data ID query results and repeat those tests. 

1707 with dataIds.materialize() as dataIds: 

1708 self.checkQueryResults(dataIds, [dataId]) 

1709 self.checkQueryResults( 

1710 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), 

1711 [dataset1], 

1712 ) 

1713 self.checkQueryResults( 

1714 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), 

1715 [dataset2], 

1716 ) 

1717 # Query for non-empty data IDs, then subset that to get the empty one. 

1718 # Repeat the above tests starting from that. 

1719 dataIds = registry.queryDataIds(["instrument"]).subset(registry.dimensions.empty, unique=True) 

1720 self.checkQueryResults(dataIds, [dataId]) 

1721 self.checkQueryResults( 

1722 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False), 

1723 [dataset1, dataset2], 

1724 ) 

1725 self.checkQueryResults( 

1726 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), 

1727 [dataset1], 

1728 ) 

1729 self.checkQueryResults( 

1730 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), 

1731 [dataset2], 

1732 ) 

1733 with dataIds.materialize() as dataIds: 

1734 self.checkQueryResults(dataIds, [dataId]) 

1735 self.checkQueryResults( 

1736 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False), 

1737 [dataset1, dataset2], 

1738 ) 

1739 self.checkQueryResults( 

1740 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), 

1741 [dataset1], 

1742 ) 

1743 self.checkQueryResults( 

1744 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), 

1745 [dataset2], 

1746 ) 

1747 # Query for non-empty data IDs, then materialize, then subset to get 

1748 # the empty one. Repeat again. 

1749 with registry.queryDataIds(["instrument"]).materialize() as nonEmptyDataIds: 

1750 dataIds = nonEmptyDataIds.subset(registry.dimensions.empty, unique=True) 

1751 self.checkQueryResults(dataIds, [dataId]) 

1752 self.checkQueryResults( 

1753 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False), 

1754 [dataset1, dataset2], 

1755 ) 

1756 self.checkQueryResults( 

1757 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), 

1758 [dataset1], 

1759 ) 

1760 self.checkQueryResults( 

1761 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), 

1762 [dataset2], 

1763 ) 

1764 with dataIds.materialize() as dataIds: 

1765 self.checkQueryResults(dataIds, [dataId]) 

1766 self.checkQueryResults( 

1767 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False), 

1768 [dataset1, dataset2], 

1769 ) 

1770 self.checkQueryResults( 

1771 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), 

1772 [dataset1], 

1773 ) 

1774 self.checkQueryResults( 

1775 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), 

1776 [dataset2], 

1777 ) 

1778 # Query for non-empty data IDs with a constraint on an empty-data-ID 

1779 # dataset that exists. 

1780 dataIds = registry.queryDataIds(["instrument"], datasets="schema", collections=...) 

1781 self.checkQueryResults( 

1782 dataIds.subset(unique=True), 

1783 [DataCoordinate.standardize(instrument="Cam1", universe=registry.dimensions)], 

1784 ) 

1785 # Again query for non-empty data IDs with a constraint on empty-data-ID 

1786 # datasets, but when the datasets don't exist. We delete the existing 

1787 # dataset and query just that collection rather than creating a new 

1788 # empty collection because this is a bit less likely for our build-time 

1789 # logic to shortcut-out (via the collection summaries), and such a 

1790 # shortcut would make this test a bit more trivial than we'd like. 

1791 registry.removeDatasets([dataset2]) 

1792 dataIds = registry.queryDataIds(["instrument"], datasets="schema", collections=run2) 

1793 self.checkQueryResults(dataIds, []) 

1794 

1795 def testDimensionDataModifications(self): 

1796 """Test that modifying dimension records via: 

1797 syncDimensionData(..., update=True) and 

1798 insertDimensionData(..., replace=True) works as expected, even in the 

1799 presence of datasets using those dimensions and spatial overlap 

1800 relationships. 

1801 """ 

1802 

1803 def unpack_range_set(ranges: lsst.sphgeom.RangeSet) -> Iterator[int]: 

1804 """Unpack a sphgeom.RangeSet into the integers it contains.""" 

1805 for begin, end in ranges: 

1806 yield from range(begin, end) 

1807 

1808 def range_set_hull( 

1809 ranges: lsst.sphgeom.RangeSet, 

1810 pixelization: lsst.sphgeom.HtmPixelization, 

1811 ) -> lsst.sphgeom.ConvexPolygon: 

1812 """Create a ConvexPolygon hull of the region defined by a set of 

1813 HTM pixelization index ranges. 

1814 """ 

1815 points = [] 

1816 for index in unpack_range_set(ranges): 

1817 points.extend(pixelization.triangle(index).getVertices()) 

1818 return lsst.sphgeom.ConvexPolygon(points) 

1819 

1820 # Use HTM to set up an initial parent region (one arbitrary trixel) 

1821 # and four child regions (the trixels within the parent at the next 

1822 # level. We'll use the parent as a tract/visit region and the children 

1823 # as its patch/visit_detector regions. 

1824 registry = self.makeRegistry() 

1825 htm6 = registry.dimensions.skypix["htm"][6].pixelization 

1826 commonSkyPix = registry.dimensions.commonSkyPix.pixelization 

1827 index = 12288 

1828 child_ranges_small = lsst.sphgeom.RangeSet(index).scaled(4) 

1829 assert htm6.universe().contains(child_ranges_small) 

1830 child_regions_small = [htm6.triangle(i) for i in unpack_range_set(child_ranges_small)] 

1831 parent_region_small = lsst.sphgeom.ConvexPolygon( 

1832 list(itertools.chain.from_iterable(c.getVertices() for c in child_regions_small)) 

1833 ) 

1834 assert all(parent_region_small.contains(c) for c in child_regions_small) 

1835 # Make a larger version of each child region, defined to be the set of 

1836 # htm6 trixels that overlap the original's bounding circle. Make a new 

1837 # parent that's the convex hull of the new children. 

1838 child_regions_large = [ 

1839 range_set_hull(htm6.envelope(c.getBoundingCircle()), htm6) for c in child_regions_small 

1840 ] 

1841 assert all(large.contains(small) for large, small in zip(child_regions_large, child_regions_small)) 

1842 parent_region_large = lsst.sphgeom.ConvexPolygon( 

1843 list(itertools.chain.from_iterable(c.getVertices() for c in child_regions_large)) 

1844 ) 

1845 assert all(parent_region_large.contains(c) for c in child_regions_large) 

1846 assert parent_region_large.contains(parent_region_small) 

1847 assert not parent_region_small.contains(parent_region_large) 

1848 assert not all(parent_region_small.contains(c) for c in child_regions_large) 

1849 # Find some commonSkyPix indices that overlap the large regions but not 

1850 # overlap the small regions. We use commonSkyPix here to make sure the 

1851 # real tests later involve what's in the database, not just post-query 

1852 # filtering of regions. 

1853 child_difference_indices = [] 

1854 for large, small in zip(child_regions_large, child_regions_small): 

1855 difference = list(unpack_range_set(commonSkyPix.envelope(large) - commonSkyPix.envelope(small))) 

1856 assert difference, "if this is empty, we can't test anything useful with these regions" 

1857 assert all( 

1858 not commonSkyPix.triangle(d).isDisjointFrom(large) 

1859 and commonSkyPix.triangle(d).isDisjointFrom(small) 

1860 for d in difference 

1861 ) 

1862 child_difference_indices.append(difference) 

1863 parent_difference_indices = list( 

1864 unpack_range_set( 

1865 commonSkyPix.envelope(parent_region_large) - commonSkyPix.envelope(parent_region_small) 

1866 ) 

1867 ) 

1868 assert parent_difference_indices, "if this is empty, we can't test anything useful with these regions" 

1869 assert all( 

1870 ( 

1871 not commonSkyPix.triangle(d).isDisjointFrom(parent_region_large) 

1872 and commonSkyPix.triangle(d).isDisjointFrom(parent_region_small) 

1873 ) 

1874 for d in parent_difference_indices 

1875 ) 

1876 # Now that we've finally got those regions, we'll insert the large ones 

1877 # as tract/patch dimension records. 

1878 skymap_name = "testing_v1" 

1879 registry.insertDimensionData( 

1880 "skymap", 

1881 { 

1882 "name": skymap_name, 

1883 "hash": bytes([42]), 

1884 "tract_max": 1, 

1885 "patch_nx_max": 2, 

1886 "patch_ny_max": 2, 

1887 }, 

1888 ) 

1889 registry.insertDimensionData("tract", {"skymap": skymap_name, "id": 0, "region": parent_region_large}) 

1890 registry.insertDimensionData( 

1891 "patch", 

1892 *[ 

1893 {"skymap": skymap_name, "tract": 0, "id": n, "cell_x": n % 2, "cell_y": n // 2, "region": c} 

1894 for n, c in enumerate(child_regions_large) 

1895 ], 

1896 ) 

1897 # Add at dataset that uses these dimensions to make sure that modifying 

1898 # them doesn't disrupt foreign keys (need to make sure DB doesn't 

1899 # implement insert with replace=True as delete-then-insert). 

1900 dataset_type = DatasetType( 

1901 "coadd", 

1902 dimensions=["tract", "patch"], 

1903 universe=registry.dimensions, 

1904 storageClass="Exposure", 

1905 ) 

1906 registry.registerDatasetType(dataset_type) 

1907 registry.registerCollection("the_run", CollectionType.RUN) 

1908 registry.insertDatasets( 

1909 dataset_type, 

1910 [{"skymap": skymap_name, "tract": 0, "patch": 2}], 

1911 run="the_run", 

1912 ) 

1913 # Query for tracts and patches that overlap some "difference" htm9 

1914 # pixels; there should be overlaps, because the database has 

1915 # the "large" suite of regions. 

1916 self.assertEqual( 

1917 {0}, 

1918 { 

1919 data_id["tract"] 

1920 for data_id in registry.queryDataIds( 

1921 ["tract"], 

1922 skymap=skymap_name, 

1923 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]}, 

1924 ) 

1925 }, 

1926 ) 

1927 for patch_id, patch_difference_indices in enumerate(child_difference_indices): 

1928 self.assertIn( 

1929 patch_id, 

1930 { 

1931 data_id["patch"] 

1932 for data_id in registry.queryDataIds( 

1933 ["patch"], 

1934 skymap=skymap_name, 

1935 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]}, 

1936 ) 

1937 }, 

1938 ) 

1939 # Use sync to update the tract region and insert to update the regions 

1940 # of the patches, to the "small" suite. 

1941 updated = registry.syncDimensionData( 

1942 "tract", 

1943 {"skymap": skymap_name, "id": 0, "region": parent_region_small}, 

1944 update=True, 

1945 ) 

1946 self.assertEqual(updated, {"region": parent_region_large}) 

1947 registry.insertDimensionData( 

1948 "patch", 

1949 *[ 

1950 {"skymap": skymap_name, "tract": 0, "id": n, "cell_x": n % 2, "cell_y": n // 2, "region": c} 

1951 for n, c in enumerate(child_regions_small) 

1952 ], 

1953 replace=True, 

1954 ) 

1955 # Query again; there now should be no such overlaps, because the 

1956 # database has the "small" suite of regions. 

1957 self.assertFalse( 

1958 set( 

1959 registry.queryDataIds( 

1960 ["tract"], 

1961 skymap=skymap_name, 

1962 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]}, 

1963 ) 

1964 ) 

1965 ) 

1966 for patch_id, patch_difference_indices in enumerate(child_difference_indices): 

1967 self.assertNotIn( 

1968 patch_id, 

1969 { 

1970 data_id["patch"] 

1971 for data_id in registry.queryDataIds( 

1972 ["patch"], 

1973 skymap=skymap_name, 

1974 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]}, 

1975 ) 

1976 }, 

1977 ) 

1978 # Update back to the large regions and query one more time. 

1979 updated = registry.syncDimensionData( 

1980 "tract", 

1981 {"skymap": skymap_name, "id": 0, "region": parent_region_large}, 

1982 update=True, 

1983 ) 

1984 self.assertEqual(updated, {"region": parent_region_small}) 

1985 registry.insertDimensionData( 

1986 "patch", 

1987 *[ 

1988 {"skymap": skymap_name, "tract": 0, "id": n, "cell_x": n % 2, "cell_y": n // 2, "region": c} 

1989 for n, c in enumerate(child_regions_large) 

1990 ], 

1991 replace=True, 

1992 ) 

1993 self.assertEqual( 

1994 {0}, 

1995 { 

1996 data_id["tract"] 

1997 for data_id in registry.queryDataIds( 

1998 ["tract"], 

1999 skymap=skymap_name, 

2000 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]}, 

2001 ) 

2002 }, 

2003 ) 

2004 for patch_id, patch_difference_indices in enumerate(child_difference_indices): 

2005 self.assertIn( 

2006 patch_id, 

2007 { 

2008 data_id["patch"] 

2009 for data_id in registry.queryDataIds( 

2010 ["patch"], 

2011 skymap=skymap_name, 

2012 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]}, 

2013 ) 

2014 }, 

2015 ) 

2016 

2017 def testCalibrationCollections(self): 

2018 """Test operations on `~CollectionType.CALIBRATION` collections, 

2019 including `Registry.certify`, `Registry.decertify`, and 

2020 `Registry.findDataset`. 

2021 """ 

2022 # Setup - make a Registry, fill it with some datasets in 

2023 # non-calibration collections. 

2024 registry = self.makeRegistry() 

2025 self.loadData(registry, "base.yaml") 

2026 self.loadData(registry, "datasets.yaml") 

2027 # Set up some timestamps. 

2028 t1 = astropy.time.Time("2020-01-01T01:00:00", format="isot", scale="tai") 

2029 t2 = astropy.time.Time("2020-01-01T02:00:00", format="isot", scale="tai") 

2030 t3 = astropy.time.Time("2020-01-01T03:00:00", format="isot", scale="tai") 

2031 t4 = astropy.time.Time("2020-01-01T04:00:00", format="isot", scale="tai") 

2032 t5 = astropy.time.Time("2020-01-01T05:00:00", format="isot", scale="tai") 

2033 allTimespans = [ 

2034 Timespan(a, b) for a, b in itertools.combinations([None, t1, t2, t3, t4, t5, None], r=2) 

2035 ] 

2036 # Get references to some datasets. 

2037 bias2a = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g") 

2038 bias3a = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g") 

2039 bias2b = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r") 

2040 bias3b = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r") 

2041 # Register the main calibration collection we'll be working with. 

2042 collection = "Cam1/calibs/default" 

2043 registry.registerCollection(collection, type=CollectionType.CALIBRATION) 

2044 # Cannot associate into a calibration collection (no timespan). 

2045 with self.assertRaises(CollectionTypeError): 

2046 registry.associate(collection, [bias2a]) 

2047 # Certify 2a dataset with [t2, t4) validity. 

2048 registry.certify(collection, [bias2a], Timespan(begin=t2, end=t4)) 

2049 # Test that we can query for this dataset via the new collection, both 

2050 # on its own and with a RUN collection, as long as we don't try to join 

2051 # in temporal dimensions or use findFirst=True. 

2052 self.assertEqual( 

2053 set(registry.queryDatasets("bias", findFirst=False, collections=collection)), 

2054 {bias2a}, 

2055 ) 

2056 self.assertEqual( 

2057 set(registry.queryDatasets("bias", findFirst=False, collections=[collection, "imported_r"])), 

2058 { 

2059 bias2a, 

2060 bias2b, 

2061 bias3b, 

2062 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

2063 }, 

2064 ) 

2065 self.assertEqual( 

2066 set(registry.queryDataIds("detector", datasets="bias", collections=collection)), 

2067 {registry.expandDataId(instrument="Cam1", detector=2)}, 

2068 ) 

2069 self.assertEqual( 

2070 set(registry.queryDataIds("detector", datasets="bias", collections=[collection, "imported_r"])), 

2071 { 

2072 registry.expandDataId(instrument="Cam1", detector=2), 

2073 registry.expandDataId(instrument="Cam1", detector=3), 

2074 registry.expandDataId(instrument="Cam1", detector=4), 

2075 }, 

2076 ) 

2077 

2078 # We should not be able to certify 2b with anything overlapping that 

2079 # window. 

2080 with self.assertRaises(ConflictingDefinitionError): 

2081 registry.certify(collection, [bias2b], Timespan(begin=None, end=t3)) 

2082 with self.assertRaises(ConflictingDefinitionError): 

2083 registry.certify(collection, [bias2b], Timespan(begin=None, end=t5)) 

2084 with self.assertRaises(ConflictingDefinitionError): 

2085 registry.certify(collection, [bias2b], Timespan(begin=t1, end=t3)) 

2086 with self.assertRaises(ConflictingDefinitionError): 

2087 registry.certify(collection, [bias2b], Timespan(begin=t1, end=t5)) 

2088 with self.assertRaises(ConflictingDefinitionError): 

2089 registry.certify(collection, [bias2b], Timespan(begin=t1, end=None)) 

2090 with self.assertRaises(ConflictingDefinitionError): 

2091 registry.certify(collection, [bias2b], Timespan(begin=t2, end=t3)) 

2092 with self.assertRaises(ConflictingDefinitionError): 

2093 registry.certify(collection, [bias2b], Timespan(begin=t2, end=t5)) 

2094 with self.assertRaises(ConflictingDefinitionError): 

2095 registry.certify(collection, [bias2b], Timespan(begin=t2, end=None)) 

2096 # We should be able to certify 3a with a range overlapping that window, 

2097 # because it's for a different detector. 

2098 # We'll certify 3a over [t1, t3). 

2099 registry.certify(collection, [bias3a], Timespan(begin=t1, end=t3)) 

2100 # Now we'll certify 2b and 3b together over [t4, ∞). 

2101 registry.certify(collection, [bias2b, bias3b], Timespan(begin=t4, end=None)) 

2102 

2103 # Fetch all associations and check that they are what we expect. 

2104 self.assertCountEqual( 

2105 list( 

2106 registry.queryDatasetAssociations( 

2107 "bias", 

2108 collections=[collection, "imported_g", "imported_r"], 

2109 ) 

2110 ), 

2111 [ 

2112 DatasetAssociation( 

2113 ref=registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"), 

2114 collection="imported_g", 

2115 timespan=None, 

2116 ), 

2117 DatasetAssociation( 

2118 ref=registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

2119 collection="imported_r", 

2120 timespan=None, 

2121 ), 

2122 DatasetAssociation(ref=bias2a, collection="imported_g", timespan=None), 

2123 DatasetAssociation(ref=bias3a, collection="imported_g", timespan=None), 

2124 DatasetAssociation(ref=bias2b, collection="imported_r", timespan=None), 

2125 DatasetAssociation(ref=bias3b, collection="imported_r", timespan=None), 

2126 DatasetAssociation(ref=bias2a, collection=collection, timespan=Timespan(begin=t2, end=t4)), 

2127 DatasetAssociation(ref=bias3a, collection=collection, timespan=Timespan(begin=t1, end=t3)), 

2128 DatasetAssociation(ref=bias2b, collection=collection, timespan=Timespan(begin=t4, end=None)), 

2129 DatasetAssociation(ref=bias3b, collection=collection, timespan=Timespan(begin=t4, end=None)), 

2130 ], 

2131 ) 

2132 

2133 class Ambiguous: 

2134 """Tag class to denote lookups that should be ambiguous.""" 

2135 

2136 pass 

2137 

2138 def assertLookup( 

2139 detector: int, timespan: Timespan, expected: Optional[Union[DatasetRef, Type[Ambiguous]]] 

2140 ) -> None: 

2141 """Local function that asserts that a bias lookup returns the given 

2142 expected result. 

2143 """ 

2144 if expected is Ambiguous: 

2145 with self.assertRaises((DatasetTypeError, LookupError)): 

2146 registry.findDataset( 

2147 "bias", 

2148 collections=collection, 

2149 instrument="Cam1", 

2150 detector=detector, 

2151 timespan=timespan, 

2152 ) 

2153 else: 

2154 self.assertEqual( 

2155 expected, 

2156 registry.findDataset( 

2157 "bias", 

2158 collections=collection, 

2159 instrument="Cam1", 

2160 detector=detector, 

2161 timespan=timespan, 

2162 ), 

2163 ) 

2164 

2165 # Systematically test lookups against expected results. 

2166 assertLookup(detector=2, timespan=Timespan(None, t1), expected=None) 

2167 assertLookup(detector=2, timespan=Timespan(None, t2), expected=None) 

2168 assertLookup(detector=2, timespan=Timespan(None, t3), expected=bias2a) 

2169 assertLookup(detector=2, timespan=Timespan(None, t4), expected=bias2a) 

2170 assertLookup(detector=2, timespan=Timespan(None, t5), expected=Ambiguous) 

2171 assertLookup(detector=2, timespan=Timespan(None, None), expected=Ambiguous) 

2172 assertLookup(detector=2, timespan=Timespan(t1, t2), expected=None) 

2173 assertLookup(detector=2, timespan=Timespan(t1, t3), expected=bias2a) 

2174 assertLookup(detector=2, timespan=Timespan(t1, t4), expected=bias2a) 

2175 assertLookup(detector=2, timespan=Timespan(t1, t5), expected=Ambiguous) 

2176 assertLookup(detector=2, timespan=Timespan(t1, None), expected=Ambiguous) 

2177 assertLookup(detector=2, timespan=Timespan(t2, t3), expected=bias2a) 

2178 assertLookup(detector=2, timespan=Timespan(t2, t4), expected=bias2a) 

2179 assertLookup(detector=2, timespan=Timespan(t2, t5), expected=Ambiguous) 

2180 assertLookup(detector=2, timespan=Timespan(t2, None), expected=Ambiguous) 

2181 assertLookup(detector=2, timespan=Timespan(t3, t4), expected=bias2a) 

2182 assertLookup(detector=2, timespan=Timespan(t3, t5), expected=Ambiguous) 

2183 assertLookup(detector=2, timespan=Timespan(t3, None), expected=Ambiguous) 

2184 assertLookup(detector=2, timespan=Timespan(t4, t5), expected=bias2b) 

2185 assertLookup(detector=2, timespan=Timespan(t4, None), expected=bias2b) 

2186 assertLookup(detector=2, timespan=Timespan(t5, None), expected=bias2b) 

2187 assertLookup(detector=3, timespan=Timespan(None, t1), expected=None) 

2188 assertLookup(detector=3, timespan=Timespan(None, t2), expected=bias3a) 

2189 assertLookup(detector=3, timespan=Timespan(None, t3), expected=bias3a) 

2190 assertLookup(detector=3, timespan=Timespan(None, t4), expected=bias3a) 

2191 assertLookup(detector=3, timespan=Timespan(None, t5), expected=Ambiguous) 

2192 assertLookup(detector=3, timespan=Timespan(None, None), expected=Ambiguous) 

2193 assertLookup(detector=3, timespan=Timespan(t1, t2), expected=bias3a) 

2194 assertLookup(detector=3, timespan=Timespan(t1, t3), expected=bias3a) 

2195 assertLookup(detector=3, timespan=Timespan(t1, t4), expected=bias3a) 

2196 assertLookup(detector=3, timespan=Timespan(t1, t5), expected=Ambiguous) 

2197 assertLookup(detector=3, timespan=Timespan(t1, None), expected=Ambiguous) 

2198 assertLookup(detector=3, timespan=Timespan(t2, t3), expected=bias3a) 

2199 assertLookup(detector=3, timespan=Timespan(t2, t4), expected=bias3a) 

2200 assertLookup(detector=3, timespan=Timespan(t2, t5), expected=Ambiguous) 

2201 assertLookup(detector=3, timespan=Timespan(t2, None), expected=Ambiguous) 

2202 assertLookup(detector=3, timespan=Timespan(t3, t4), expected=None) 

2203 assertLookup(detector=3, timespan=Timespan(t3, t5), expected=bias3b) 

2204 assertLookup(detector=3, timespan=Timespan(t3, None), expected=bias3b) 

2205 assertLookup(detector=3, timespan=Timespan(t4, t5), expected=bias3b) 

2206 assertLookup(detector=3, timespan=Timespan(t4, None), expected=bias3b) 

2207 assertLookup(detector=3, timespan=Timespan(t5, None), expected=bias3b) 

2208 

2209 # Decertify [t3, t5) for all data IDs, and do test lookups again. 

2210 # This should truncate bias2a to [t2, t3), leave bias3a unchanged at 

2211 # [t1, t3), and truncate bias2b and bias3b to [t5, ∞). 

2212 registry.decertify(collection=collection, datasetType="bias", timespan=Timespan(t3, t5)) 

2213 assertLookup(detector=2, timespan=Timespan(None, t1), expected=None) 

2214 assertLookup(detector=2, timespan=Timespan(None, t2), expected=None) 

2215 assertLookup(detector=2, timespan=Timespan(None, t3), expected=bias2a) 

2216 assertLookup(detector=2, timespan=Timespan(None, t4), expected=bias2a) 

2217 assertLookup(detector=2, timespan=Timespan(None, t5), expected=bias2a) 

2218 assertLookup(detector=2, timespan=Timespan(None, None), expected=Ambiguous) 

2219 assertLookup(detector=2, timespan=Timespan(t1, t2), expected=None) 

2220 assertLookup(detector=2, timespan=Timespan(t1, t3), expected=bias2a) 

2221 assertLookup(detector=2, timespan=Timespan(t1, t4), expected=bias2a) 

2222 assertLookup(detector=2, timespan=Timespan(t1, t5), expected=bias2a) 

2223 assertLookup(detector=2, timespan=Timespan(t1, None), expected=Ambiguous) 

2224 assertLookup(detector=2, timespan=Timespan(t2, t3), expected=bias2a) 

2225 assertLookup(detector=2, timespan=Timespan(t2, t4), expected=bias2a) 

2226 assertLookup(detector=2, timespan=Timespan(t2, t5), expected=bias2a) 

2227 assertLookup(detector=2, timespan=Timespan(t2, None), expected=Ambiguous) 

2228 assertLookup(detector=2, timespan=Timespan(t3, t4), expected=None) 

2229 assertLookup(detector=2, timespan=Timespan(t3, t5), expected=None) 

2230 assertLookup(detector=2, timespan=Timespan(t3, None), expected=bias2b) 

2231 assertLookup(detector=2, timespan=Timespan(t4, t5), expected=None) 

2232 assertLookup(detector=2, timespan=Timespan(t4, None), expected=bias2b) 

2233 assertLookup(detector=2, timespan=Timespan(t5, None), expected=bias2b) 

2234 assertLookup(detector=3, timespan=Timespan(None, t1), expected=None) 

2235 assertLookup(detector=3, timespan=Timespan(None, t2), expected=bias3a) 

2236 assertLookup(detector=3, timespan=Timespan(None, t3), expected=bias3a) 

2237 assertLookup(detector=3, timespan=Timespan(None, t4), expected=bias3a) 

2238 assertLookup(detector=3, timespan=Timespan(None, t5), expected=bias3a) 

2239 assertLookup(detector=3, timespan=Timespan(None, None), expected=Ambiguous) 

2240 assertLookup(detector=3, timespan=Timespan(t1, t2), expected=bias3a) 

2241 assertLookup(detector=3, timespan=Timespan(t1, t3), expected=bias3a) 

2242 assertLookup(detector=3, timespan=Timespan(t1, t4), expected=bias3a) 

2243 assertLookup(detector=3, timespan=Timespan(t1, t5), expected=bias3a) 

2244 assertLookup(detector=3, timespan=Timespan(t1, None), expected=Ambiguous) 

2245 assertLookup(detector=3, timespan=Timespan(t2, t3), expected=bias3a) 

2246 assertLookup(detector=3, timespan=Timespan(t2, t4), expected=bias3a) 

2247 assertLookup(detector=3, timespan=Timespan(t2, t5), expected=bias3a) 

2248 assertLookup(detector=3, timespan=Timespan(t2, None), expected=Ambiguous) 

2249 assertLookup(detector=3, timespan=Timespan(t3, t4), expected=None) 

2250 assertLookup(detector=3, timespan=Timespan(t3, t5), expected=None) 

2251 assertLookup(detector=3, timespan=Timespan(t3, None), expected=bias3b) 

2252 assertLookup(detector=3, timespan=Timespan(t4, t5), expected=None) 

2253 assertLookup(detector=3, timespan=Timespan(t4, None), expected=bias3b) 

2254 assertLookup(detector=3, timespan=Timespan(t5, None), expected=bias3b) 

2255 

2256 # Decertify everything, this time with explicit data IDs, then check 

2257 # that no lookups succeed. 

2258 registry.decertify( 

2259 collection, 

2260 "bias", 

2261 Timespan(None, None), 

2262 dataIds=[ 

2263 dict(instrument="Cam1", detector=2), 

2264 dict(instrument="Cam1", detector=3), 

2265 ], 

2266 ) 

2267 for detector in (2, 3): 

2268 for timespan in allTimespans: 

2269 assertLookup(detector=detector, timespan=timespan, expected=None) 

2270 # Certify bias2a and bias3a over (-∞, ∞), check that all lookups return 

2271 # those. 

2272 registry.certify( 

2273 collection, 

2274 [bias2a, bias3a], 

2275 Timespan(None, None), 

2276 ) 

2277 for timespan in allTimespans: 

2278 assertLookup(detector=2, timespan=timespan, expected=bias2a) 

2279 assertLookup(detector=3, timespan=timespan, expected=bias3a) 

2280 # Decertify just bias2 over [t2, t4). 

2281 # This should split a single certification row into two (and leave the 

2282 # other existing row, for bias3a, alone). 

2283 registry.decertify( 

2284 collection, "bias", Timespan(t2, t4), dataIds=[dict(instrument="Cam1", detector=2)] 

2285 ) 

2286 for timespan in allTimespans: 

2287 assertLookup(detector=3, timespan=timespan, expected=bias3a) 

2288 overlapsBefore = timespan.overlaps(Timespan(None, t2)) 

2289 overlapsAfter = timespan.overlaps(Timespan(t4, None)) 

2290 if overlapsBefore and overlapsAfter: 

2291 expected = Ambiguous 

2292 elif overlapsBefore or overlapsAfter: 

2293 expected = bias2a 

2294 else: 

2295 expected = None 

2296 assertLookup(detector=2, timespan=timespan, expected=expected) 

2297 

2298 def testSkipCalibs(self): 

2299 """Test how queries handle skipping of calibration collections.""" 

2300 registry = self.makeRegistry() 

2301 self.loadData(registry, "base.yaml") 

2302 self.loadData(registry, "datasets.yaml") 

2303 

2304 coll_calib = "Cam1/calibs/default" 

2305 registry.registerCollection(coll_calib, type=CollectionType.CALIBRATION) 

2306 

2307 # Add all biases to the calibration collection. 

2308 # Without this, the logic that prunes dataset subqueries based on 

2309 # datasetType-collection summary information will fire before the logic 

2310 # we want to test below. This is a good thing (it avoids the dreaded 

2311 # NotImplementedError a bit more often) everywhere but here. 

2312 registry.certify(coll_calib, registry.queryDatasets("bias", collections=...), Timespan(None, None)) 

2313 

2314 coll_list = [coll_calib, "imported_g", "imported_r"] 

2315 chain = "Cam1/chain" 

2316 registry.registerCollection(chain, type=CollectionType.CHAINED) 

2317 registry.setCollectionChain(chain, coll_list) 

2318 

2319 # explicit list will raise if findFirst=True or there are temporal 

2320 # dimensions 

2321 with self.assertRaises(NotImplementedError): 

2322 registry.queryDatasets("bias", collections=coll_list, findFirst=True) 

2323 with self.assertRaises(NotImplementedError): 

2324 registry.queryDataIds( 

2325 ["instrument", "detector", "exposure"], datasets="bias", collections=coll_list 

2326 ).count() 

2327 

2328 # chain will skip 

2329 datasets = list(registry.queryDatasets("bias", collections=chain)) 

2330 self.assertGreater(len(datasets), 0) 

2331 

2332 dataIds = list(registry.queryDataIds(["instrument", "detector"], datasets="bias", collections=chain)) 

2333 self.assertGreater(len(dataIds), 0) 

2334 

2335 # glob will skip too 

2336 datasets = list(registry.queryDatasets("bias", collections="*d*")) 

2337 self.assertGreater(len(datasets), 0) 

2338 

2339 # regular expression will skip too 

2340 pattern = re.compile(".*") 

2341 datasets = list(registry.queryDatasets("bias", collections=pattern)) 

2342 self.assertGreater(len(datasets), 0) 

2343 

2344 # ellipsis should work as usual 

2345 datasets = list(registry.queryDatasets("bias", collections=...)) 

2346 self.assertGreater(len(datasets), 0) 

2347 

2348 # few tests with findFirst 

2349 datasets = list(registry.queryDatasets("bias", collections=chain, findFirst=True)) 

2350 self.assertGreater(len(datasets), 0) 

2351 

2352 def testIngestTimeQuery(self): 

2353 registry = self.makeRegistry() 

2354 self.loadData(registry, "base.yaml") 

2355 dt0 = datetime.utcnow() 

2356 self.loadData(registry, "datasets.yaml") 

2357 dt1 = datetime.utcnow() 

2358 

2359 datasets = list(registry.queryDatasets(..., collections=...)) 

2360 len0 = len(datasets) 

2361 self.assertGreater(len0, 0) 

2362 

2363 where = "ingest_date > T'2000-01-01'" 

2364 datasets = list(registry.queryDatasets(..., collections=..., where=where)) 

2365 len1 = len(datasets) 

2366 self.assertEqual(len0, len1) 

2367 

2368 # no one will ever use this piece of software in 30 years 

2369 where = "ingest_date > T'2050-01-01'" 

2370 datasets = list(registry.queryDatasets(..., collections=..., where=where)) 

2371 len2 = len(datasets) 

2372 self.assertEqual(len2, 0) 

2373 

2374 # Check more exact timing to make sure there is no 37 seconds offset 

2375 # (after fixing DM-30124). SQLite time precision is 1 second, make 

2376 # sure that we don't test with higher precision. 

2377 tests = [ 

2378 # format: (timestamp, operator, expected_len) 

2379 (dt0 - timedelta(seconds=1), ">", len0), 

2380 (dt0 - timedelta(seconds=1), "<", 0), 

2381 (dt1 + timedelta(seconds=1), "<", len0), 

2382 (dt1 + timedelta(seconds=1), ">", 0), 

2383 ] 

2384 for dt, op, expect_len in tests: 

2385 dt_str = dt.isoformat(sep=" ") 

2386 

2387 where = f"ingest_date {op} T'{dt_str}'" 

2388 datasets = list(registry.queryDatasets(..., collections=..., where=where)) 

2389 self.assertEqual(len(datasets), expect_len) 

2390 

2391 # same with bind using datetime or astropy Time 

2392 where = f"ingest_date {op} ingest_time" 

2393 datasets = list( 

2394 registry.queryDatasets(..., collections=..., where=where, bind={"ingest_time": dt}) 

2395 ) 

2396 self.assertEqual(len(datasets), expect_len) 

2397 

2398 dt_astropy = astropy.time.Time(dt, format="datetime") 

2399 datasets = list( 

2400 registry.queryDatasets(..., collections=..., where=where, bind={"ingest_time": dt_astropy}) 

2401 ) 

2402 self.assertEqual(len(datasets), expect_len) 

2403 

2404 def testTimespanQueries(self): 

2405 """Test query expressions involving timespans.""" 

2406 registry = self.makeRegistry() 

2407 self.loadData(registry, "hsc-rc2-subset.yaml") 

2408 # All exposures in the database; mapping from ID to timespan. 

2409 visits = {record.id: record.timespan for record in registry.queryDimensionRecords("visit")} 

2410 # Just those IDs, sorted (which is also temporal sorting, because HSC 

2411 # exposure IDs are monotonically increasing). 

2412 ids = sorted(visits.keys()) 

2413 self.assertGreater(len(ids), 20) 

2414 # Pick some quasi-random indexes into `ids` to play with. 

2415 i1 = int(len(ids) * 0.1) 

2416 i2 = int(len(ids) * 0.3) 

2417 i3 = int(len(ids) * 0.6) 

2418 i4 = int(len(ids) * 0.8) 

2419 # Extract some times from those: just before the beginning of i1 (which 

2420 # should be after the end of the exposure before), exactly the 

2421 # beginning of i2, just after the beginning of i3 (and before its end), 

2422 # and the exact end of i4. 

2423 t1 = visits[ids[i1]].begin - astropy.time.TimeDelta(1.0, format="sec") 

2424 self.assertGreater(t1, visits[ids[i1 - 1]].end) 

2425 t2 = visits[ids[i2]].begin 

2426 t3 = visits[ids[i3]].begin + astropy.time.TimeDelta(1.0, format="sec") 

2427 self.assertLess(t3, visits[ids[i3]].end) 

2428 t4 = visits[ids[i4]].end 

2429 # Make sure those are actually in order. 

2430 self.assertEqual([t1, t2, t3, t4], sorted([t4, t3, t2, t1])) 

2431 

2432 bind = { 

2433 "t1": t1, 

2434 "t2": t2, 

2435 "t3": t3, 

2436 "t4": t4, 

2437 "ts23": Timespan(t2, t3), 

2438 } 

2439 

2440 def query(where): 

2441 """Helper function that queries for visit data IDs and returns 

2442 results as a sorted, deduplicated list of visit IDs. 

2443 """ 

2444 return sorted( 

2445 { 

2446 dataId["visit"] 

2447 for dataId in registry.queryDataIds("visit", instrument="HSC", bind=bind, where=where) 

2448 } 

2449 ) 

2450 

2451 # Try a bunch of timespan queries, mixing up the bounds themselves, 

2452 # where they appear in the expression, and how we get the timespan into 

2453 # the expression. 

2454 

2455 # t1 is before the start of i1, so this should not include i1. 

2456 self.assertEqual(ids[:i1], query("visit.timespan OVERLAPS (null, t1)")) 

2457 # t2 is exactly at the start of i2, but ends are exclusive, so these 

2458 # should not include i2. 

2459 self.assertEqual(ids[i1:i2], query("(t1, t2) OVERLAPS visit.timespan")) 

2460 self.assertEqual(ids[:i2], query("visit.timespan < (t2, t4)")) 

2461 # t3 is in the middle of i3, so this should include i3. 

2462 self.assertEqual(ids[i2 : i3 + 1], query("visit.timespan OVERLAPS ts23")) 

2463 # This one should not include t3 by the same reasoning. 

2464 self.assertEqual(ids[i3 + 1 :], query("visit.timespan > (t1, t3)")) 

2465 # t4 is exactly at the end of i4, so this should include i4. 

2466 self.assertEqual(ids[i3 : i4 + 1], query(f"visit.timespan OVERLAPS (T'{t3.tai.isot}', t4)")) 

2467 # i4's upper bound of t4 is exclusive so this should not include t4. 

2468 self.assertEqual(ids[i4 + 1 :], query("visit.timespan OVERLAPS (t4, NULL)")) 

2469 

2470 # Now some timespan vs. time scalar queries. 

2471 self.assertEqual(ids[:i2], query("visit.timespan < t2")) 

2472 self.assertEqual(ids[:i2], query("t2 > visit.timespan")) 

2473 self.assertEqual(ids[i3 + 1 :], query("visit.timespan > t3")) 

2474 self.assertEqual(ids[i3 + 1 :], query("t3 < visit.timespan")) 

2475 self.assertEqual(ids[i3 : i3 + 1], query("visit.timespan OVERLAPS t3")) 

2476 self.assertEqual(ids[i3 : i3 + 1], query(f"T'{t3.tai.isot}' OVERLAPS visit.timespan")) 

2477 

2478 # Empty timespans should not overlap anything. 

2479 self.assertEqual([], query("visit.timespan OVERLAPS (t3, t2)")) 

2480 

2481 def testCollectionSummaries(self): 

2482 """Test recording and retrieval of collection summaries.""" 

2483 self.maxDiff = None 

2484 registry = self.makeRegistry() 

2485 # Importing datasets from yaml should go through the code path where 

2486 # we update collection summaries as we insert datasets. 

2487 self.loadData(registry, "base.yaml") 

2488 self.loadData(registry, "datasets.yaml") 

2489 flat = registry.getDatasetType("flat") 

2490 expected1 = CollectionSummary() 

2491 expected1.dataset_types.add(registry.getDatasetType("bias")) 

2492 expected1.add_data_ids( 

2493 flat, [DataCoordinate.standardize(instrument="Cam1", universe=registry.dimensions)] 

2494 ) 

2495 self.assertEqual(registry.getCollectionSummary("imported_g"), expected1) 

2496 self.assertEqual(registry.getCollectionSummary("imported_r"), expected1) 

2497 # Create a chained collection with both of the imported runs; the 

2498 # summary should be the same, because it's a union with itself. 

2499 chain = "chain" 

2500 registry.registerCollection(chain, CollectionType.CHAINED) 

2501 registry.setCollectionChain(chain, ["imported_r", "imported_g"]) 

2502 self.assertEqual(registry.getCollectionSummary(chain), expected1) 

2503 # Associate flats only into a tagged collection and a calibration 

2504 # collection to check summaries of those. 

2505 tag = "tag" 

2506 registry.registerCollection(tag, CollectionType.TAGGED) 

2507 registry.associate(tag, registry.queryDatasets(flat, collections="imported_g")) 

2508 calibs = "calibs" 

2509 registry.registerCollection(calibs, CollectionType.CALIBRATION) 

2510 registry.certify( 

2511 calibs, registry.queryDatasets(flat, collections="imported_g"), timespan=Timespan(None, None) 

2512 ) 

2513 expected2 = expected1.copy() 

2514 expected2.dataset_types.discard("bias") 

2515 self.assertEqual(registry.getCollectionSummary(tag), expected2) 

2516 self.assertEqual(registry.getCollectionSummary(calibs), expected2) 

2517 # Explicitly calling Registry.refresh() should load those same 

2518 # summaries, via a totally different code path. 

2519 registry.refresh() 

2520 self.assertEqual(registry.getCollectionSummary("imported_g"), expected1) 

2521 self.assertEqual(registry.getCollectionSummary("imported_r"), expected1) 

2522 self.assertEqual(registry.getCollectionSummary(tag), expected2) 

2523 self.assertEqual(registry.getCollectionSummary(calibs), expected2) 

2524 

2525 def testBindInQueryDatasets(self): 

2526 """Test that the bind parameter is correctly forwarded in 

2527 queryDatasets recursion. 

2528 """ 

2529 registry = self.makeRegistry() 

2530 # Importing datasets from yaml should go through the code path where 

2531 # we update collection summaries as we insert datasets. 

2532 self.loadData(registry, "base.yaml") 

2533 self.loadData(registry, "datasets.yaml") 

2534 self.assertEqual( 

2535 set(registry.queryDatasets("flat", band="r", collections=...)), 

2536 set(registry.queryDatasets("flat", where="band=my_band", bind={"my_band": "r"}, collections=...)), 

2537 ) 

2538 

2539 def testQueryIntRangeExpressions(self): 

2540 """Test integer range expressions in ``where`` arguments. 

2541 

2542 Note that our expressions use inclusive stop values, unlike Python's. 

2543 """ 

2544 registry = self.makeRegistry() 

2545 self.loadData(registry, "base.yaml") 

2546 self.assertEqual( 

2547 set(registry.queryDataIds(["detector"], instrument="Cam1", where="detector IN (1..2)")), 

2548 {registry.expandDataId(instrument="Cam1", detector=n) for n in [1, 2]}, 

2549 ) 

2550 self.assertEqual( 

2551 set(registry.queryDataIds(["detector"], instrument="Cam1", where="detector IN (1..4:2)")), 

2552 {registry.expandDataId(instrument="Cam1", detector=n) for n in [1, 3]}, 

2553 ) 

2554 self.assertEqual( 

2555 set(registry.queryDataIds(["detector"], instrument="Cam1", where="detector IN (2..4:2)")), 

2556 {registry.expandDataId(instrument="Cam1", detector=n) for n in [2, 4]}, 

2557 ) 

2558 

2559 def testQueryResultSummaries(self): 

2560 """Test summary methods like `count`, `any`, and `explain_no_results` 

2561 on `DataCoordinateQueryResults` and `DatasetQueryResults` 

2562 """ 

2563 registry = self.makeRegistry() 

2564 self.loadData(registry, "base.yaml") 

2565 self.loadData(registry, "datasets.yaml") 

2566 self.loadData(registry, "spatial.yaml") 

2567 # Default test dataset has two collections, each with both flats and 

2568 # biases. Add a new collection with only biases. 

2569 registry.registerCollection("biases", CollectionType.TAGGED) 

2570 registry.associate("biases", registry.queryDatasets("bias", collections=["imported_g"])) 

2571 # First query yields two results, and involves no postprocessing. 

2572 query1 = registry.queryDataIds(["physical_filter"], band="r") 

2573 self.assertTrue(query1.any(execute=False, exact=False)) 

2574 self.assertTrue(query1.any(execute=True, exact=False)) 

2575 self.assertTrue(query1.any(execute=True, exact=True)) 

2576 self.assertEqual(query1.count(exact=False), 2) 

2577 self.assertEqual(query1.count(exact=True), 2) 

2578 self.assertFalse(list(query1.explain_no_results())) 

2579 # Second query should yield no results, which we should see when 

2580 # we attempt to expand the data ID. 

2581 query2 = registry.queryDataIds(["physical_filter"], band="h") 

2582 # There's no execute=False, exact=Fals test here because the behavior 

2583 # not something we want to guarantee in this case (and exact=False 

2584 # says either answer is legal). 

2585 self.assertFalse(query2.any(execute=True, exact=False)) 

2586 self.assertFalse(query2.any(execute=True, exact=True)) 

2587 self.assertEqual(query2.count(exact=False), 0) 

2588 self.assertEqual(query2.count(exact=True), 0) 

2589 self.assertTrue(list(query2.explain_no_results())) 

2590 # These queries yield no results due to various problems that can be 

2591 # spotted prior to execution, yielding helpful diagnostics. 

2592 base_query = registry.queryDataIds(["detector", "physical_filter"]) 

2593 queries_and_snippets = [ 

2594 ( 

2595 # Dataset type name doesn't match any existing dataset types. 

2596 registry.queryDatasets("nonexistent", collections=...), 

2597 ["nonexistent"], 

2598 ), 

2599 ( 

2600 # Dataset type object isn't registered. 

2601 registry.queryDatasets( 

2602 DatasetType( 

2603 "nonexistent", 

2604 dimensions=["instrument"], 

2605 universe=registry.dimensions, 

2606 storageClass="Image", 

2607 ), 

2608 collections=..., 

2609 ), 

2610 ["nonexistent"], 

2611 ), 

2612 ( 

2613 # No datasets of this type in this collection. 

2614 registry.queryDatasets("flat", collections=["biases"]), 

2615 ["flat", "biases"], 

2616 ), 

2617 ( 

2618 # No datasets of this type in this collection. 

2619 base_query.findDatasets("flat", collections=["biases"]), 

2620 ["flat", "biases"], 

2621 ), 

2622 ( 

2623 # No collections matching at all. 

2624 registry.queryDatasets("flat", collections=re.compile("potato.+")), 

2625 ["potato"], 

2626 ), 

2627 ] 

2628 # The behavior of these additional queries is slated to change in the 

2629 # future, so we also check for deprecation warnings. 

2630 with self.assertWarns(FutureWarning): 

2631 queries_and_snippets.append( 

2632 ( 

2633 # Dataset type name doesn't match any existing dataset 

2634 # types. 

2635 registry.queryDataIds(["detector"], datasets=["nonexistent"], collections=...), 

2636 ["nonexistent"], 

2637 ) 

2638 ) 

2639 with self.assertWarns(FutureWarning): 

2640 queries_and_snippets.append( 

2641 ( 

2642 # Dataset type name doesn't match any existing dataset 

2643 # types. 

2644 registry.queryDimensionRecords("detector", datasets=["nonexistent"], collections=...), 

2645 ["nonexistent"], 

2646 ) 

2647 ) 

2648 for query, snippets in queries_and_snippets: 

2649 self.assertFalse(query.any(execute=False, exact=False)) 

2650 self.assertFalse(query.any(execute=True, exact=False)) 

2651 self.assertFalse(query.any(execute=True, exact=True)) 

2652 self.assertEqual(query.count(exact=False), 0) 

2653 self.assertEqual(query.count(exact=True), 0) 

2654 messages = list(query.explain_no_results()) 

2655 self.assertTrue(messages) 

2656 # Want all expected snippets to appear in at least one message. 

2657 self.assertTrue( 

2658 any( 

2659 all(snippet in message for snippet in snippets) for message in query.explain_no_results() 

2660 ), 

2661 messages, 

2662 ) 

2663 

2664 # This query does yield results, but should also emit a warning because 

2665 # dataset type patterns to queryDataIds is deprecated; just look for 

2666 # the warning. 

2667 with self.assertWarns(FutureWarning): 

2668 registry.queryDataIds(["detector"], datasets=re.compile("^nonexistent$"), collections=...) 

2669 

2670 # These queries yield no results due to problems that can be identified 

2671 # by cheap follow-up queries, yielding helpful diagnostics. 

2672 for query, snippets in [ 

2673 ( 

2674 # No records for one of the involved dimensions. 

2675 registry.queryDataIds(["subfilter"]), 

2676 ["no rows", "subfilter"], 

2677 ), 

2678 ( 

2679 # No records for one of the involved dimensions. 

2680 registry.queryDimensionRecords("subfilter"), 

2681 ["no rows", "subfilter"], 

2682 ), 

2683 ]: 

2684 self.assertFalse(query.any(execute=True, exact=False)) 

2685 self.assertFalse(query.any(execute=True, exact=True)) 

2686 self.assertEqual(query.count(exact=True), 0) 

2687 messages = list(query.explain_no_results()) 

2688 self.assertTrue(messages) 

2689 # Want all expected snippets to appear in at least one message. 

2690 self.assertTrue( 

2691 any( 

2692 all(snippet in message for snippet in snippets) for message in query.explain_no_results() 

2693 ), 

2694 messages, 

2695 ) 

2696 

2697 # This query yields four overlaps in the database, but one is filtered 

2698 # out in postprocessing. The count queries aren't accurate because 

2699 # they don't account for duplication that happens due to an internal 

2700 # join against commonSkyPix. 

2701 query3 = registry.queryDataIds(["visit", "tract"], instrument="Cam1", skymap="SkyMap1") 

2702 self.assertEqual( 

2703 { 

2704 DataCoordinate.standardize( 

2705 instrument="Cam1", 

2706 skymap="SkyMap1", 

2707 visit=v, 

2708 tract=t, 

2709 universe=registry.dimensions, 

2710 ) 

2711 for v, t in [(1, 0), (2, 0), (2, 1)] 

2712 }, 

2713 set(query3), 

2714 ) 

2715 self.assertTrue(query3.any(execute=False, exact=False)) 

2716 self.assertTrue(query3.any(execute=True, exact=False)) 

2717 self.assertTrue(query3.any(execute=True, exact=True)) 

2718 self.assertGreaterEqual(query3.count(exact=False), 4) 

2719 self.assertGreaterEqual(query3.count(exact=True, discard=True), 3) 

2720 self.assertFalse(list(query3.explain_no_results())) 

2721 # This query yields overlaps in the database, but all are filtered 

2722 # out in postprocessing. The count queries again aren't very useful. 

2723 # We have to use `where=` here to avoid an optimization that 

2724 # (currently) skips the spatial postprocess-filtering because it 

2725 # recognizes that no spatial join is necessary. That's not ideal, but 

2726 # fixing it is out of scope for this ticket. 

2727 query4 = registry.queryDataIds( 

2728 ["visit", "tract"], 

2729 instrument="Cam1", 

2730 skymap="SkyMap1", 

2731 where="visit=1 AND detector=1 AND tract=0 AND patch=4", 

2732 ) 

2733 self.assertFalse(set(query4)) 

2734 self.assertTrue(query4.any(execute=False, exact=False)) 

2735 self.assertTrue(query4.any(execute=True, exact=False)) 

2736 self.assertFalse(query4.any(execute=True, exact=True)) 

2737 self.assertGreaterEqual(query4.count(exact=False), 1) 

2738 self.assertEqual(query4.count(exact=True, discard=True), 0) 

2739 messages = query4.explain_no_results() 

2740 self.assertTrue(messages) 

2741 self.assertTrue(any("overlap" in message for message in messages)) 

2742 # This query should yield results from one dataset type but not the 

2743 # other, which is not registered. 

2744 query5 = registry.queryDatasets(["bias", "nonexistent"], collections=["biases"]) 

2745 self.assertTrue(set(query5)) 

2746 self.assertTrue(query5.any(execute=False, exact=False)) 

2747 self.assertTrue(query5.any(execute=True, exact=False)) 

2748 self.assertTrue(query5.any(execute=True, exact=True)) 

2749 self.assertGreaterEqual(query5.count(exact=False), 1) 

2750 self.assertGreaterEqual(query5.count(exact=True), 1) 

2751 self.assertFalse(list(query5.explain_no_results())) 

2752 # This query applies a selection that yields no results, fully in the 

2753 # database. Explaining why it fails involves traversing the relation 

2754 # tree and running a LIMIT 1 query at each level that has the potential 

2755 # to remove rows. 

2756 query6 = registry.queryDimensionRecords( 

2757 "detector", where="detector.purpose = 'no-purpose'", instrument="Cam1" 

2758 ) 

2759 self.assertEqual(query6.count(exact=True), 0) 

2760 messages = query6.explain_no_results() 

2761 self.assertTrue(messages) 

2762 self.assertTrue(any("no-purpose" in message for message in messages)) 

2763 

2764 def testQueryDataIdsOrderBy(self): 

2765 """Test order_by and limit on result returned by queryDataIds().""" 

2766 registry = self.makeRegistry() 

2767 self.loadData(registry, "base.yaml") 

2768 self.loadData(registry, "datasets.yaml") 

2769 self.loadData(registry, "spatial.yaml") 

2770 

2771 def do_query(dimensions=("visit", "tract"), datasets=None, collections=None): 

2772 return registry.queryDataIds( 

2773 dimensions, datasets=datasets, collections=collections, instrument="Cam1", skymap="SkyMap1" 

2774 ) 

2775 

2776 Test = namedtuple( 

2777 "testQueryDataIdsOrderByTest", 

2778 ("order_by", "keys", "result", "limit", "datasets", "collections"), 

2779 defaults=(None, None, None), 

2780 ) 

2781 

2782 test_data = ( 

2783 Test("tract,visit", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))), 

2784 Test("-tract,visit", "tract,visit", ((1, 2), (1, 2), (0, 1), (0, 1), (0, 2), (0, 2))), 

2785 Test("tract,-visit", "tract,visit", ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2))), 

2786 Test("-tract,-visit", "tract,visit", ((1, 2), (1, 2), (0, 2), (0, 2), (0, 1), (0, 1))), 

2787 Test( 

2788 "tract.id,visit.id", 

2789 "tract,visit", 

2790 ((0, 1), (0, 1), (0, 2)), 

2791 limit=(3,), 

2792 ), 

2793 Test("-tract,-visit", "tract,visit", ((1, 2), (1, 2), (0, 2)), limit=(3,)), 

2794 Test("tract,visit", "tract,visit", ((0, 2), (1, 2), (1, 2)), limit=(3, 3)), 

2795 Test("-tract,-visit", "tract,visit", ((0, 1),), limit=(3, 5)), 

2796 Test( 

2797 "tract,visit.exposure_time", "tract,visit", ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2)) 

2798 ), 

2799 Test( 

2800 "-tract,-visit.exposure_time", "tract,visit", ((1, 2), (1, 2), (0, 1), (0, 1), (0, 2), (0, 2)) 

2801 ), 

2802 Test("tract,-exposure_time", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))), 

2803 Test("tract,visit.name", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))), 

2804 Test( 

2805 "tract,-timespan.begin,timespan.end", 

2806 "tract,visit", 

2807 ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2)), 

2808 ), 

2809 Test("visit.day_obs,exposure.day_obs", "visit,exposure", ()), 

2810 Test("visit.timespan.begin,-exposure.timespan.begin", "visit,exposure", ()), 

2811 Test( 

2812 "tract,detector", 

2813 "tract,detector", 

2814 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)), 

2815 datasets="flat", 

2816 collections="imported_r", 

2817 ), 

2818 Test( 

2819 "tract,detector.full_name", 

2820 "tract,detector", 

2821 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)), 

2822 datasets="flat", 

2823 collections="imported_r", 

2824 ), 

2825 Test( 

2826 "tract,detector.raft,detector.name_in_raft", 

2827 "tract,detector", 

2828 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)), 

2829 datasets="flat", 

2830 collections="imported_r", 

2831 ), 

2832 ) 

2833 

2834 for test in test_data: 

2835 order_by = test.order_by.split(",") 

2836 keys = test.keys.split(",") 

2837 query = do_query(keys, test.datasets, test.collections).order_by(*order_by) 

2838 if test.limit is not None: 

2839 query = query.limit(*test.limit) 

2840 dataIds = tuple(tuple(dataId[k] for k in keys) for dataId in query) 

2841 self.assertEqual(dataIds, test.result) 

2842 

2843 # and materialize 

2844 query = do_query(keys).order_by(*order_by) 

2845 if test.limit is not None: 

2846 query = query.limit(*test.limit) 

2847 with self.assertRaises(RelationalAlgebraError): 

2848 with query.materialize(): 

2849 pass 

2850 

2851 # errors in a name 

2852 for order_by in ("", "-"): 

2853 with self.assertRaisesRegex(ValueError, "Empty dimension name in ORDER BY"): 

2854 list(do_query().order_by(order_by)) 

2855 

2856 for order_by in ("undimension.name", "-undimension.name"): 

2857 with self.assertRaisesRegex(ValueError, "Unknown dimension element name 'undimension'"): 

2858 list(do_query().order_by(order_by)) 

2859 

2860 for order_by in ("attract", "-attract"): 

2861 with self.assertRaisesRegex(ValueError, "Metadata 'attract' cannot be found in any dimension"): 

2862 list(do_query().order_by(order_by)) 

2863 

2864 with self.assertRaisesRegex(ValueError, "Metadata 'exposure_time' exists in more than one dimension"): 

2865 list(do_query(("exposure", "visit")).order_by("exposure_time")) 

2866 

2867 with self.assertRaisesRegex(ValueError, "Timespan exists in more than one dimesion"): 

2868 list(do_query(("exposure", "visit")).order_by("timespan.begin")) 

2869 

2870 with self.assertRaisesRegex( 

2871 ValueError, "Cannot find any temporal dimension element for 'timespan.begin'" 

2872 ): 

2873 list(do_query("tract").order_by("timespan.begin")) 

2874 

2875 with self.assertRaisesRegex(ValueError, "Cannot use 'timespan.begin' with non-temporal element"): 

2876 list(do_query("tract").order_by("tract.timespan.begin")) 

2877 

2878 with self.assertRaisesRegex(ValueError, "Field 'name' does not exist in 'tract'."): 

2879 list(do_query("tract").order_by("tract.name")) 

2880 

2881 def testQueryDataIdsGovernorExceptions(self): 

2882 """Test exceptions raised by queryDataIds() for incorrect governors.""" 

2883 registry = self.makeRegistry() 

2884 self.loadData(registry, "base.yaml") 

2885 self.loadData(registry, "datasets.yaml") 

2886 self.loadData(registry, "spatial.yaml") 

2887 

2888 def do_query(dimensions, dataId=None, where="", bind=None, **kwargs): 

2889 return registry.queryDataIds(dimensions, dataId=dataId, where=where, bind=bind, **kwargs) 

2890 

2891 Test = namedtuple( 

2892 "testQueryDataIdExceptionsTest", 

2893 ("dimensions", "dataId", "where", "bind", "kwargs", "exception", "count"), 

2894 defaults=(None, None, None, {}, None, 0), 

2895 ) 

2896 

2897 test_data = ( 

2898 Test("tract,visit", count=6), 

2899 Test("tract,visit", kwargs={"instrument": "Cam1", "skymap": "SkyMap1"}, count=6), 

2900 Test( 

2901 "tract,visit", kwargs={"instrument": "Cam2", "skymap": "SkyMap1"}, exception=DataIdValueError 

2902 ), 

2903 Test("tract,visit", dataId={"instrument": "Cam1", "skymap": "SkyMap1"}, count=6), 

2904 Test( 

2905 "tract,visit", dataId={"instrument": "Cam1", "skymap": "SkyMap2"}, exception=DataIdValueError 

2906 ), 

2907 Test("tract,visit", where="instrument='Cam1' AND skymap='SkyMap1'", count=6), 

2908 Test("tract,visit", where="instrument='Cam1' AND skymap='SkyMap5'", exception=DataIdValueError), 

2909 Test( 

2910 "tract,visit", 

2911 where="instrument=cam AND skymap=map", 

2912 bind={"cam": "Cam1", "map": "SkyMap1"}, 

2913 count=6, 

2914 ), 

2915 Test( 

2916 "tract,visit", 

2917 where="instrument=cam AND skymap=map", 

2918 bind={"cam": "Cam", "map": "SkyMap"}, 

2919 exception=DataIdValueError, 

2920 ), 

2921 ) 

2922 

2923 for test in test_data: 

2924 dimensions = test.dimensions.split(",") 

2925 if test.exception: 

2926 with self.assertRaises(test.exception): 

2927 do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs).count() 

2928 else: 

2929 query = do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs) 

2930 self.assertEqual(query.count(discard=True), test.count) 

2931 

2932 # and materialize 

2933 if test.exception: 

2934 with self.assertRaises(test.exception): 

2935 query = do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs) 

2936 with query.materialize() as materialized: 

2937 materialized.count(discard=True) 

2938 else: 

2939 query = do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs) 

2940 with query.materialize() as materialized: 

2941 self.assertEqual(materialized.count(discard=True), test.count) 

2942 

2943 def testQueryDimensionRecordsOrderBy(self): 

2944 """Test order_by and limit on result returned by 

2945 queryDimensionRecords(). 

2946 """ 

2947 registry = self.makeRegistry() 

2948 self.loadData(registry, "base.yaml") 

2949 self.loadData(registry, "datasets.yaml") 

2950 self.loadData(registry, "spatial.yaml") 

2951 

2952 def do_query(element, datasets=None, collections=None): 

2953 return registry.queryDimensionRecords( 

2954 element, instrument="Cam1", datasets=datasets, collections=collections 

2955 ) 

2956 

2957 query = do_query("detector") 

2958 self.assertEqual(len(list(query)), 4) 

2959 

2960 Test = namedtuple( 

2961 "testQueryDataIdsOrderByTest", 

2962 ("element", "order_by", "result", "limit", "datasets", "collections"), 

2963 defaults=(None, None, None), 

2964 ) 

2965 

2966 test_data = ( 

2967 Test("detector", "detector", (1, 2, 3, 4)), 

2968 Test("detector", "-detector", (4, 3, 2, 1)), 

2969 Test("detector", "raft,-name_in_raft", (2, 1, 4, 3)), 

2970 Test("detector", "-detector.purpose", (4,), limit=(1,)), 

2971 Test("detector", "-purpose,detector.raft,name_in_raft", (2, 3), limit=(2, 2)), 

2972 Test("visit", "visit", (1, 2)), 

2973 Test("visit", "-visit.id", (2, 1)), 

2974 Test("visit", "zenith_angle", (1, 2)), 

2975 Test("visit", "-visit.name", (2, 1)), 

2976 Test("visit", "day_obs,-timespan.begin", (2, 1)), 

2977 ) 

2978 

2979 for test in test_data: 

2980 order_by = test.order_by.split(",") 

2981 query = do_query(test.element).order_by(*order_by) 

2982 if test.limit is not None: 

2983 query = query.limit(*test.limit) 

2984 dataIds = tuple(rec.id for rec in query) 

2985 self.assertEqual(dataIds, test.result) 

2986 

2987 # errors in a name 

2988 for order_by in ("", "-"): 

2989 with self.assertRaisesRegex(ValueError, "Empty dimension name in ORDER BY"): 

2990 list(do_query("detector").order_by(order_by)) 

2991 

2992 for order_by in ("undimension.name", "-undimension.name"): 

2993 with self.assertRaisesRegex(ValueError, "Element name mismatch: 'undimension'"): 

2994 list(do_query("detector").order_by(order_by)) 

2995 

2996 for order_by in ("attract", "-attract"): 

2997 with self.assertRaisesRegex(ValueError, "Field 'attract' does not exist in 'detector'."): 

2998 list(do_query("detector").order_by(order_by)) 

2999 

3000 def testQueryDimensionRecordsExceptions(self): 

3001 """Test exceptions raised by queryDimensionRecords().""" 

3002 registry = self.makeRegistry() 

3003 self.loadData(registry, "base.yaml") 

3004 self.loadData(registry, "datasets.yaml") 

3005 self.loadData(registry, "spatial.yaml") 

3006 

3007 result = registry.queryDimensionRecords("detector") 

3008 self.assertEqual(result.count(), 4) 

3009 result = registry.queryDimensionRecords("detector", instrument="Cam1") 

3010 self.assertEqual(result.count(), 4) 

3011 result = registry.queryDimensionRecords("detector", dataId={"instrument": "Cam1"}) 

3012 self.assertEqual(result.count(), 4) 

3013 result = registry.queryDimensionRecords("detector", where="instrument='Cam1'") 

3014 self.assertEqual(result.count(), 4) 

3015 result = registry.queryDimensionRecords("detector", where="instrument=instr", bind={"instr": "Cam1"}) 

3016 self.assertEqual(result.count(), 4) 

3017 

3018 with self.assertRaisesRegex(DataIdValueError, "dimension instrument"): 

3019 result = registry.queryDimensionRecords("detector", instrument="NotCam1") 

3020 result.count() 

3021 

3022 with self.assertRaisesRegex(DataIdValueError, "dimension instrument"): 

3023 result = registry.queryDimensionRecords("detector", dataId={"instrument": "NotCam1"}) 

3024 result.count() 

3025 

3026 with self.assertRaisesRegex(DataIdValueError, "Unknown values specified for governor dimension"): 

3027 result = registry.queryDimensionRecords("detector", where="instrument='NotCam1'") 

3028 result.count() 

3029 

3030 with self.assertRaisesRegex(DataIdValueError, "Unknown values specified for governor dimension"): 

3031 result = registry.queryDimensionRecords( 

3032 "detector", where="instrument=instr", bind={"instr": "NotCam1"} 

3033 ) 

3034 result.count() 

3035 

3036 def testDatasetConstrainedDimensionRecordQueries(self): 

3037 """Test that queryDimensionRecords works even when given a dataset 

3038 constraint whose dimensions extend beyond the requested dimension 

3039 element's. 

3040 """ 

3041 registry = self.makeRegistry() 

3042 self.loadData(registry, "base.yaml") 

3043 self.loadData(registry, "datasets.yaml") 

3044 # Query for physical_filter dimension records, using a dataset that 

3045 # has both physical_filter and dataset dimensions. 

3046 records = registry.queryDimensionRecords( 

3047 "physical_filter", 

3048 datasets=["flat"], 

3049 collections="imported_r", 

3050 ) 

3051 self.assertEqual({record.name for record in records}, {"Cam1-R1", "Cam1-R2"}) 

3052 # Trying to constrain by all dataset types is an error. 

3053 with self.assertRaises(TypeError): 

3054 list(registry.queryDimensionRecords("physical_filter", datasets=..., collections="imported_r")) 

3055 

3056 def testSkyPixDatasetQueries(self): 

3057 """Test that we can build queries involving skypix dimensions as long 

3058 as a dataset type that uses those dimensions is included. 

3059 """ 

3060 registry = self.makeRegistry() 

3061 self.loadData(registry, "base.yaml") 

3062 dataset_type = DatasetType( 

3063 "a", dimensions=["htm7", "instrument"], universe=registry.dimensions, storageClass="int" 

3064 ) 

3065 registry.registerDatasetType(dataset_type) 

3066 run = "r" 

3067 registry.registerRun(run) 

3068 # First try queries where there are no datasets; the concern is whether 

3069 # we can even build and execute these queries without raising, even 

3070 # when "doomed" query shortcuts are in play. 

3071 self.assertFalse( 

3072 list(registry.queryDataIds(["htm7", "instrument"], datasets=dataset_type, collections=run)) 

3073 ) 

3074 self.assertFalse(list(registry.queryDatasets(dataset_type, collections=run))) 

3075 # Now add a dataset and see that we can get it back. 

3076 htm7 = registry.dimensions.skypix["htm"][7].pixelization 

3077 data_id = registry.expandDataId(instrument="Cam1", htm7=htm7.universe()[0][0]) 

3078 (ref,) = registry.insertDatasets(dataset_type, [data_id], run=run) 

3079 self.assertEqual( 

3080 set(registry.queryDataIds(["htm7", "instrument"], datasets=dataset_type, collections=run)), 

3081 {data_id}, 

3082 ) 

3083 self.assertEqual(set(registry.queryDatasets(dataset_type, collections=run)), {ref}) 

3084 

3085 def testDatasetIdFactory(self): 

3086 """Simple test for DatasetIdFactory, mostly to catch potential changes 

3087 in its API. 

3088 """ 

3089 registry = self.makeRegistry() 

3090 factory = registry.datasetIdFactory 

3091 dataset_type = DatasetType( 

3092 "datasetType", 

3093 dimensions=["detector", "instrument"], 

3094 universe=registry.dimensions, 

3095 storageClass="int", 

3096 ) 

3097 run = "run" 

3098 data_id = DataCoordinate.standardize(instrument="Cam1", detector=1, graph=dataset_type.dimensions) 

3099 

3100 datasetId = factory.makeDatasetId(run, dataset_type, data_id, DatasetIdGenEnum.UNIQUE) 

3101 self.assertIsInstance(datasetId, uuid.UUID) 

3102 self.assertEqual(datasetId.version, 4) 

3103 

3104 datasetId = factory.makeDatasetId(run, dataset_type, data_id, DatasetIdGenEnum.DATAID_TYPE) 

3105 self.assertIsInstance(datasetId, uuid.UUID) 

3106 self.assertEqual(datasetId.version, 5) 

3107 

3108 datasetId = factory.makeDatasetId(run, dataset_type, data_id, DatasetIdGenEnum.DATAID_TYPE_RUN) 

3109 self.assertIsInstance(datasetId, uuid.UUID) 

3110 self.assertEqual(datasetId.version, 5) 

3111 

3112 def testExposureQueries(self): 

3113 """Test query methods using arguments sourced from the exposure log 

3114 service. 

3115 

3116 The most complete test dataset currently available to daf_butler tests 

3117 is hsc-rc2-subset.yaml export (which is unfortunately distinct from the 

3118 the lsst/rc2_subset GitHub repo), but that does not have 'exposure' 

3119 dimension records as it was focused on providing nontrivial spatial 

3120 overlaps between visit+detector and tract+patch. So in this test we 

3121 need to translate queries that originally used the exposure dimension 

3122 to use the (very similar) visit dimension instead. 

3123 """ 

3124 registry = self.makeRegistry() 

3125 self.loadData(registry, "hsc-rc2-subset.yaml") 

3126 self.assertEqual( 

3127 [ 

3128 record.id 

3129 for record in registry.queryDimensionRecords("visit", instrument="HSC") 

3130 .order_by("id") 

3131 .limit(5) 

3132 ], 

3133 [318, 322, 326, 330, 332], 

3134 ) 

3135 self.assertEqual( 

3136 [ 

3137 data_id["visit"] 

3138 for data_id in registry.queryDataIds(["visit"], instrument="HSC").order_by("id").limit(5) 

3139 ], 

3140 [318, 322, 326, 330, 332], 

3141 ) 

3142 self.assertEqual( 

3143 [ 

3144 record.id 

3145 for record in registry.queryDimensionRecords("detector", instrument="HSC") 

3146 .order_by("full_name") 

3147 .limit(5) 

3148 ], 

3149 [73, 72, 71, 70, 65], 

3150 ) 

3151 self.assertEqual( 

3152 [ 

3153 data_id["detector"] 

3154 for data_id in registry.queryDataIds(["detector"], instrument="HSC") 

3155 .order_by("full_name") 

3156 .limit(5) 

3157 ], 

3158 [73, 72, 71, 70, 65], 

3159 ) 

3160 

3161 def test_long_query_names(self) -> None: 

3162 """Test that queries involving very long names are handled correctly. 

3163 

3164 This is especially important for PostgreSQL, which truncates symbols 

3165 longer than 64 chars, but it's worth testing for all DBs. 

3166 """ 

3167 registry = self.makeRegistry() 

3168 name = "abcd" * 17 

3169 registry.registerDatasetType( 

3170 DatasetType( 

3171 name, 

3172 dimensions=(), 

3173 storageClass="Exposure", 

3174 universe=registry.dimensions, 

3175 ) 

3176 ) 

3177 # Need to search more than one collection actually containing a 

3178 # matching dataset to avoid optimizations that sidestep bugs due to 

3179 # truncation by making findFirst=True a no-op. 

3180 run1 = "run1" 

3181 registry.registerRun(run1) 

3182 run2 = "run2" 

3183 registry.registerRun(run2) 

3184 (ref1,) = registry.insertDatasets(name, [DataCoordinate.makeEmpty(registry.dimensions)], run1) 

3185 registry.insertDatasets(name, [DataCoordinate.makeEmpty(registry.dimensions)], run2) 

3186 self.assertEqual( 

3187 set(registry.queryDatasets(name, collections=[run1, run2], findFirst=True)), 

3188 {ref1}, 

3189 ) 

3190 

3191 def test_skypix_constraint_queries(self) -> None: 

3192 """Test queries spatially constrained by a skypix data ID.""" 

3193 registry = self.makeRegistry() 

3194 self.loadData(registry, "hsc-rc2-subset.yaml") 

3195 patch_regions = { 

3196 (data_id["tract"], data_id["patch"]): data_id.region 

3197 for data_id in registry.queryDataIds(["patch"]).expanded() 

3198 } 

3199 skypix_dimension: SkyPixDimension = registry.dimensions["htm11"] 

3200 # This check ensures the test doesn't become trivial due to a config 

3201 # change; if it does, just pick a different HTML level. 

3202 self.assertNotEqual(skypix_dimension, registry.dimensions.commonSkyPix) 

3203 # Gather all skypix IDs that definitely overlap at least one of these 

3204 # patches. 

3205 relevant_skypix_ids = lsst.sphgeom.RangeSet() 

3206 for patch_region in patch_regions.values(): 

3207 relevant_skypix_ids |= skypix_dimension.pixelization.interior(patch_region) 

3208 # Look for a "nontrivial" skypix_id that overlaps at least one patch 

3209 # and does not overlap at least one other patch. 

3210 for skypix_id in itertools.chain.from_iterable( 

3211 range(begin, end) for begin, end in relevant_skypix_ids 

3212 ): 

3213 skypix_region = skypix_dimension.pixelization.pixel(skypix_id) 

3214 overlapping_patches = { 

3215 patch_key 

3216 for patch_key, patch_region in patch_regions.items() 

3217 if not patch_region.isDisjointFrom(skypix_region) 

3218 } 

3219 if overlapping_patches and overlapping_patches != patch_regions.keys(): 

3220 break 

3221 else: 

3222 raise RuntimeError("Could not find usable skypix ID for this dimension configuration.") 

3223 self.assertEqual( 

3224 { 

3225 (data_id["tract"], data_id["patch"]) 

3226 for data_id in registry.queryDataIds( 

3227 ["patch"], 

3228 dataId={skypix_dimension.name: skypix_id}, 

3229 ) 

3230 }, 

3231 overlapping_patches, 

3232 ) 

3233 

3234 def test_spatial_constraint_queries(self) -> None: 

3235 """Test queries in which one spatial dimension in the constraint (data 

3236 ID or ``where`` string) constrains a different spatial dimension in the 

3237 query result columns. 

3238 """ 

3239 registry = self.makeRegistry() 

3240 self.loadData(registry, "hsc-rc2-subset.yaml") 

3241 patch_regions = { 

3242 (data_id["tract"], data_id["patch"]): data_id.region 

3243 for data_id in registry.queryDataIds(["patch"]).expanded() 

3244 } 

3245 observation_regions = { 

3246 (data_id["visit"], data_id["detector"]): data_id.region 

3247 for data_id in registry.queryDataIds(["visit", "detector"]).expanded() 

3248 } 

3249 all_combos = { 

3250 (patch_key, observation_key) 

3251 for patch_key, observation_key in itertools.product(patch_regions, observation_regions) 

3252 } 

3253 overlapping_combos = { 

3254 (patch_key, observation_key) 

3255 for patch_key, observation_key in all_combos 

3256 if not patch_regions[patch_key].isDisjointFrom(observation_regions[observation_key]) 

3257 } 

3258 # Check a direct spatial join with no constraint first. 

3259 self.assertEqual( 

3260 { 

3261 ((data_id["tract"], data_id["patch"]), (data_id["visit"], data_id["detector"])) 

3262 for data_id in registry.queryDataIds(["patch", "visit", "detector"]) 

3263 }, 

3264 overlapping_combos, 

3265 ) 

3266 overlaps_by_patch: defaultdict[tuple[int, int], set[tuple[str, str]]] = defaultdict(set) 

3267 overlaps_by_observation: defaultdict[tuple[int, int], set[tuple[str, str]]] = defaultdict(set) 

3268 for patch_key, observation_key in overlapping_combos: 

3269 overlaps_by_patch[patch_key].add(observation_key) 

3270 overlaps_by_observation[observation_key].add(patch_key) 

3271 # Find patches and observations that overlap at least one of the other 

3272 # but not all of the other. 

3273 nontrivial_patch = next( 

3274 iter( 

3275 patch_key 

3276 for patch_key, observation_keys in overlaps_by_patch.items() 

3277 if observation_keys and observation_keys != observation_regions.keys() 

3278 ) 

3279 ) 

3280 nontrivial_observation = next( 

3281 iter( 

3282 observation_key 

3283 for observation_key, patch_keys in overlaps_by_observation.items() 

3284 if patch_keys and patch_keys != patch_regions.keys() 

3285 ) 

3286 ) 

3287 # Use the nontrivial patches and observations as constraints on the 

3288 # other dimensions in various ways, first via a 'where' expression. 

3289 # It's better in general to us 'bind' instead of f-strings, but these 

3290 # all integers so there are no quoting concerns. 

3291 self.assertEqual( 

3292 { 

3293 (data_id["visit"], data_id["detector"]) 

3294 for data_id in registry.queryDataIds( 

3295 ["visit", "detector"], 

3296 where=f"tract={nontrivial_patch[0]} AND patch={nontrivial_patch[1]}", 

3297 skymap="hsc_rings_v1", 

3298 ) 

3299 }, 

3300 overlaps_by_patch[nontrivial_patch], 

3301 ) 

3302 self.assertEqual( 

3303 { 

3304 (data_id["tract"], data_id["patch"]) 

3305 for data_id in registry.queryDataIds( 

3306 ["patch"], 

3307 where=f"visit={nontrivial_observation[0]} AND detector={nontrivial_observation[1]}", 

3308 instrument="HSC", 

3309 ) 

3310 }, 

3311 overlaps_by_observation[nontrivial_observation], 

3312 ) 

3313 # and then via the dataId argument. 

3314 self.assertEqual( 

3315 { 

3316 (data_id["visit"], data_id["detector"]) 

3317 for data_id in registry.queryDataIds( 

3318 ["visit", "detector"], 

3319 dataId={ 

3320 "tract": nontrivial_patch[0], 

3321 "patch": nontrivial_patch[1], 

3322 }, 

3323 skymap="hsc_rings_v1", 

3324 ) 

3325 }, 

3326 overlaps_by_patch[nontrivial_patch], 

3327 ) 

3328 self.assertEqual( 

3329 { 

3330 (data_id["tract"], data_id["patch"]) 

3331 for data_id in registry.queryDataIds( 

3332 ["patch"], 

3333 dataId={ 

3334 "visit": nontrivial_observation[0], 

3335 "detector": nontrivial_observation[1], 

3336 }, 

3337 instrument="HSC", 

3338 ) 

3339 }, 

3340 overlaps_by_observation[nontrivial_observation], 

3341 ) 

3342 

3343 def test_query_projection_drop_postprocessing(self) -> None: 

3344 """Test that projections and deduplications on query objects can 

3345 drop post-query region filtering to ensure the query remains in 

3346 the SQL engine. 

3347 """ 

3348 registry = self.makeRegistry() 

3349 self.loadData(registry, "base.yaml") 

3350 self.loadData(registry, "spatial.yaml") 

3351 

3352 def pop_transfer(tree: Relation) -> Relation: 

3353 """If a relation tree terminates with a transfer to a new engine, 

3354 return the relation prior to that transfer. If not, return the 

3355 original relation. 

3356 """ 

3357 match tree: 

3358 case Transfer(target=target): 

3359 return target 

3360 case _: 

3361 return tree 

3362 

3363 # There's no public way to get a Query object yet, so we get one from a 

3364 # DataCoordinateQueryResults private attribute. When a public API is 

3365 # available this test should use it. 

3366 query = registry.queryDataIds(["visit", "detector", "tract", "patch"])._query 

3367 # We expect this query to terminate in the iteration engine originally, 

3368 # because region-filtering is necessary. 

3369 self.assertIsInstance(pop_transfer(query.relation).engine, iteration.Engine) 

3370 # If we deduplicate, we usually have to do that downstream of the 

3371 # filtering. That means the deduplication has to happen in the 

3372 # iteration engine. 

3373 self.assertIsInstance(pop_transfer(query.projected(unique=True).relation).engine, iteration.Engine) 

3374 # If we pass drop_postprocessing, we instead drop the region filtering 

3375 # so the deduplication can happen in SQL (though there might still be 

3376 # transfer to iteration at the tail of the tree that we can ignore; 

3377 # that's what the pop_transfer takes care of here). 

3378 self.assertIsInstance( 

3379 pop_transfer(query.projected(unique=True, drop_postprocessing=True).relation).engine, 

3380 sql.Engine, 

3381 )