Coverage for python/lsst/daf/butler/registry/tests/_registry.py: 5%

Shortcuts on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1273 statements  

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = ["RegistryTests"] 

24 

25import itertools 

26import logging 

27import os 

28import re 

29import unittest 

30import uuid 

31from abc import ABC, abstractmethod 

32from collections import defaultdict, namedtuple 

33from datetime import datetime, timedelta 

34from typing import TYPE_CHECKING, Iterator, Optional, Type, Union 

35 

36import astropy.time 

37import sqlalchemy 

38 

39try: 

40 import numpy as np 

41except ImportError: 

42 np = None 

43 

44import lsst.sphgeom 

45 

46from ...core import ( 

47 DataCoordinate, 

48 DataCoordinateSet, 

49 DatasetAssociation, 

50 DatasetRef, 

51 DatasetType, 

52 DimensionGraph, 

53 NamedValueSet, 

54 StorageClass, 

55 Timespan, 

56 ddl, 

57) 

58from .._collectionType import CollectionType 

59from .._config import RegistryConfig 

60from .._exceptions import ( 

61 ArgumentError, 

62 CollectionError, 

63 CollectionTypeError, 

64 ConflictingDefinitionError, 

65 DataIdValueError, 

66 InconsistentDataIdError, 

67 MissingCollectionError, 

68 OrphanedRecordError, 

69) 

70from ..interfaces import ButlerAttributeExistsError, DatasetIdGenEnum 

71from ..summaries import CollectionSummary 

72 

73if TYPE_CHECKING: 73 ↛ 74line 73 didn't jump to line 74, because the condition on line 73 was never true

74 from .._registry import Registry 

75 

76 

77class RegistryTests(ABC): 

78 """Generic tests for the `Registry` class that can be subclassed to 

79 generate tests for different configurations. 

80 """ 

81 

82 collectionsManager: Optional[str] = None 

83 """Name of the collections manager class, if subclass provides value for 

84 this member then it overrides name specified in default configuration 

85 (`str`). 

86 """ 

87 

88 datasetsManager: Optional[str] = None 

89 """Name of the datasets manager class, if subclass provides value for 

90 this member then it overrides name specified in default configuration 

91 (`str`). 

92 """ 

93 

94 @classmethod 

95 @abstractmethod 

96 def getDataDir(cls) -> str: 

97 """Return the root directory containing test data YAML files.""" 

98 raise NotImplementedError() 

99 

100 def makeRegistryConfig(self) -> RegistryConfig: 

101 """Create RegistryConfig used to create a registry. 

102 

103 This method should be called by a subclass from `makeRegistry`. 

104 Returned instance will be pre-configured based on the values of class 

105 members, and default-configured for all other parameters. Subclasses 

106 that need default configuration should just instantiate 

107 `RegistryConfig` directly. 

108 """ 

109 config = RegistryConfig() 

110 if self.collectionsManager: 

111 config["managers", "collections"] = self.collectionsManager 

112 if self.datasetsManager: 

113 config["managers", "datasets"] = self.datasetsManager 

114 return config 

115 

116 @abstractmethod 

117 def makeRegistry(self, share_repo_with: Optional[Registry] = None) -> Optional[Registry]: 

118 """Return the Registry instance to be tested. 

119 

120 Parameters 

121 ---------- 

122 share_repo_with : `Registry`, optional 

123 If provided, the new registry should point to the same data 

124 repository as this existing registry. 

125 

126 Returns 

127 ------- 

128 registry : `Registry` 

129 New `Registry` instance, or `None` *only* if `share_repo_with` is 

130 not `None` and this test case does not support that argument 

131 (e.g. it is impossible with in-memory SQLite DBs). 

132 """ 

133 raise NotImplementedError() 

134 

135 def loadData(self, registry: Registry, filename: str): 

136 """Load registry test data from ``getDataDir/<filename>``, 

137 which should be a YAML import/export file. 

138 """ 

139 from ...transfers import YamlRepoImportBackend 

140 

141 with open(os.path.join(self.getDataDir(), filename), "r") as stream: 

142 backend = YamlRepoImportBackend(stream, registry) 

143 backend.register() 

144 backend.load(datastore=None) 

145 

146 def checkQueryResults(self, results, expected): 

147 """Check that a query results object contains expected values. 

148 

149 Parameters 

150 ---------- 

151 results : `DataCoordinateQueryResults` or `DatasetQueryResults` 

152 A lazy-evaluation query results object. 

153 expected : `list` 

154 A list of `DataCoordinate` o `DatasetRef` objects that should be 

155 equal to results of the query, aside from ordering. 

156 """ 

157 self.assertCountEqual(list(results), expected) 

158 self.assertEqual(results.count(), len(expected)) 

159 if expected: 

160 self.assertTrue(results.any()) 

161 else: 

162 self.assertFalse(results.any()) 

163 

164 def testOpaque(self): 

165 """Tests for `Registry.registerOpaqueTable`, 

166 `Registry.insertOpaqueData`, `Registry.fetchOpaqueData`, and 

167 `Registry.deleteOpaqueData`. 

168 """ 

169 registry = self.makeRegistry() 

170 table = "opaque_table_for_testing" 

171 registry.registerOpaqueTable( 

172 table, 

173 spec=ddl.TableSpec( 

174 fields=[ 

175 ddl.FieldSpec("id", dtype=sqlalchemy.BigInteger, primaryKey=True), 

176 ddl.FieldSpec("name", dtype=sqlalchemy.String, length=16, nullable=False), 

177 ddl.FieldSpec("count", dtype=sqlalchemy.SmallInteger, nullable=True), 

178 ], 

179 ), 

180 ) 

181 rows = [ 

182 {"id": 1, "name": "one", "count": None}, 

183 {"id": 2, "name": "two", "count": 5}, 

184 {"id": 3, "name": "three", "count": 6}, 

185 ] 

186 registry.insertOpaqueData(table, *rows) 

187 self.assertCountEqual(rows, list(registry.fetchOpaqueData(table))) 

188 self.assertEqual(rows[0:1], list(registry.fetchOpaqueData(table, id=1))) 

189 self.assertEqual(rows[1:2], list(registry.fetchOpaqueData(table, name="two"))) 

190 self.assertEqual(rows[0:1], list(registry.fetchOpaqueData(table, id=(1, 3), name=("one", "two")))) 

191 self.assertEqual(rows, list(registry.fetchOpaqueData(table, id=(1, 2, 3)))) 

192 # Test very long IN clause which exceeds sqlite limit on number of 

193 # parameters. SQLite says the limit is 32k but it looks like it is 

194 # much higher. 

195 self.assertEqual(rows, list(registry.fetchOpaqueData(table, id=list(range(300_000))))) 

196 # Two IN clauses, each longer than 1k batch size, first with 

197 # duplicates, second has matching elements in different batches (after 

198 # sorting). 

199 self.assertEqual( 

200 rows[0:2], 

201 list( 

202 registry.fetchOpaqueData( 

203 table, 

204 id=list(range(1000)) + list(range(100, 0, -1)), 

205 name=["one"] + [f"q{i}" for i in range(2200)] + ["two"], 

206 ) 

207 ), 

208 ) 

209 self.assertEqual([], list(registry.fetchOpaqueData(table, id=1, name="two"))) 

210 registry.deleteOpaqueData(table, id=3) 

211 self.assertCountEqual(rows[:2], list(registry.fetchOpaqueData(table))) 

212 registry.deleteOpaqueData(table) 

213 self.assertEqual([], list(registry.fetchOpaqueData(table))) 

214 

215 def testDatasetType(self): 

216 """Tests for `Registry.registerDatasetType` and 

217 `Registry.getDatasetType`. 

218 """ 

219 registry = self.makeRegistry() 

220 # Check valid insert 

221 datasetTypeName = "test" 

222 storageClass = StorageClass("testDatasetType") 

223 registry.storageClasses.registerStorageClass(storageClass) 

224 dimensions = registry.dimensions.extract(("instrument", "visit")) 

225 differentDimensions = registry.dimensions.extract(("instrument", "patch")) 

226 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

227 # Inserting for the first time should return True 

228 self.assertTrue(registry.registerDatasetType(inDatasetType)) 

229 outDatasetType1 = registry.getDatasetType(datasetTypeName) 

230 self.assertEqual(outDatasetType1, inDatasetType) 

231 

232 # Re-inserting should work 

233 self.assertFalse(registry.registerDatasetType(inDatasetType)) 

234 # Except when they are not identical 

235 with self.assertRaises(ConflictingDefinitionError): 

236 nonIdenticalDatasetType = DatasetType(datasetTypeName, differentDimensions, storageClass) 

237 registry.registerDatasetType(nonIdenticalDatasetType) 

238 

239 # Template can be None 

240 datasetTypeName = "testNoneTemplate" 

241 storageClass = StorageClass("testDatasetType2") 

242 registry.storageClasses.registerStorageClass(storageClass) 

243 dimensions = registry.dimensions.extract(("instrument", "visit")) 

244 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

245 registry.registerDatasetType(inDatasetType) 

246 outDatasetType2 = registry.getDatasetType(datasetTypeName) 

247 self.assertEqual(outDatasetType2, inDatasetType) 

248 

249 allTypes = set(registry.queryDatasetTypes()) 

250 self.assertEqual(allTypes, {outDatasetType1, outDatasetType2}) 

251 

252 def testDimensions(self): 

253 """Tests for `Registry.insertDimensionData`, 

254 `Registry.syncDimensionData`, and `Registry.expandDataId`. 

255 """ 

256 registry = self.makeRegistry() 

257 dimensionName = "instrument" 

258 dimension = registry.dimensions[dimensionName] 

259 dimensionValue = { 

260 "name": "DummyCam", 

261 "visit_max": 10, 

262 "exposure_max": 10, 

263 "detector_max": 2, 

264 "class_name": "lsst.obs.base.Instrument", 

265 } 

266 registry.insertDimensionData(dimensionName, dimensionValue) 

267 # Inserting the same value twice should fail 

268 with self.assertRaises(sqlalchemy.exc.IntegrityError): 

269 registry.insertDimensionData(dimensionName, dimensionValue) 

270 # expandDataId should retrieve the record we just inserted 

271 self.assertEqual( 

272 registry.expandDataId(instrument="DummyCam", graph=dimension.graph) 

273 .records[dimensionName] 

274 .toDict(), 

275 dimensionValue, 

276 ) 

277 # expandDataId should raise if there is no record with the given ID. 

278 with self.assertRaises(DataIdValueError): 

279 registry.expandDataId({"instrument": "Unknown"}, graph=dimension.graph) 

280 # band doesn't have a table; insert should fail. 

281 with self.assertRaises(TypeError): 

282 registry.insertDimensionData("band", {"band": "i"}) 

283 dimensionName2 = "physical_filter" 

284 dimension2 = registry.dimensions[dimensionName2] 

285 dimensionValue2 = {"name": "DummyCam_i", "band": "i"} 

286 # Missing required dependency ("instrument") should fail 

287 with self.assertRaises(KeyError): 

288 registry.insertDimensionData(dimensionName2, dimensionValue2) 

289 # Adding required dependency should fix the failure 

290 dimensionValue2["instrument"] = "DummyCam" 

291 registry.insertDimensionData(dimensionName2, dimensionValue2) 

292 # expandDataId should retrieve the record we just inserted. 

293 self.assertEqual( 

294 registry.expandDataId(instrument="DummyCam", physical_filter="DummyCam_i", graph=dimension2.graph) 

295 .records[dimensionName2] 

296 .toDict(), 

297 dimensionValue2, 

298 ) 

299 # Use syncDimensionData to insert a new record successfully. 

300 dimensionName3 = "detector" 

301 dimensionValue3 = { 

302 "instrument": "DummyCam", 

303 "id": 1, 

304 "full_name": "one", 

305 "name_in_raft": "zero", 

306 "purpose": "SCIENCE", 

307 } 

308 self.assertTrue(registry.syncDimensionData(dimensionName3, dimensionValue3)) 

309 # Sync that again. Note that one field ("raft") is NULL, and that 

310 # should be okay. 

311 self.assertFalse(registry.syncDimensionData(dimensionName3, dimensionValue3)) 

312 # Now try that sync with the same primary key but a different value. 

313 # This should fail. 

314 with self.assertRaises(ConflictingDefinitionError): 

315 registry.syncDimensionData( 

316 dimensionName3, 

317 { 

318 "instrument": "DummyCam", 

319 "id": 1, 

320 "full_name": "one", 

321 "name_in_raft": "four", 

322 "purpose": "SCIENCE", 

323 }, 

324 ) 

325 

326 @unittest.skipIf(np is None, "numpy not available.") 

327 def testNumpyDataId(self): 

328 """Test that we can use a numpy int in a dataId.""" 

329 registry = self.makeRegistry() 

330 dimensionEntries = [ 

331 ("instrument", {"instrument": "DummyCam"}), 

332 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}), 

333 # Using an np.int64 here fails unless Records.fromDict is also 

334 # patched to look for numbers.Integral 

335 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}), 

336 ] 

337 for args in dimensionEntries: 

338 registry.insertDimensionData(*args) 

339 

340 # Try a normal integer and something that looks like an int but 

341 # is not. 

342 for visit_id in (42, np.int64(42)): 

343 with self.subTest(visit_id=visit_id, id_type=type(visit_id).__name__): 

344 expanded = registry.expandDataId({"instrument": "DummyCam", "visit": visit_id}) 

345 self.assertEqual(expanded["visit"], int(visit_id)) 

346 self.assertIsInstance(expanded["visit"], int) 

347 

348 def testDataIdRelationships(self): 

349 """Test that `Registry.expandDataId` raises an exception when the given 

350 keys are inconsistent. 

351 """ 

352 registry = self.makeRegistry() 

353 self.loadData(registry, "base.yaml") 

354 # Insert a few more dimension records for the next test. 

355 registry.insertDimensionData( 

356 "exposure", 

357 {"instrument": "Cam1", "id": 1, "obs_id": "one", "physical_filter": "Cam1-G"}, 

358 ) 

359 registry.insertDimensionData( 

360 "exposure", 

361 {"instrument": "Cam1", "id": 2, "obs_id": "two", "physical_filter": "Cam1-G"}, 

362 ) 

363 registry.insertDimensionData( 

364 "visit_system", 

365 {"instrument": "Cam1", "id": 0, "name": "one-to-one"}, 

366 ) 

367 registry.insertDimensionData( 

368 "visit", 

369 {"instrument": "Cam1", "id": 1, "name": "one", "physical_filter": "Cam1-G", "visit_system": 0}, 

370 ) 

371 registry.insertDimensionData( 

372 "visit_definition", 

373 {"instrument": "Cam1", "visit": 1, "exposure": 1, "visit_system": 0}, 

374 ) 

375 with self.assertRaises(InconsistentDataIdError): 

376 registry.expandDataId( 

377 {"instrument": "Cam1", "visit": 1, "exposure": 2}, 

378 ) 

379 

380 def testDataset(self): 

381 """Basic tests for `Registry.insertDatasets`, `Registry.getDataset`, 

382 and `Registry.removeDatasets`. 

383 """ 

384 registry = self.makeRegistry() 

385 self.loadData(registry, "base.yaml") 

386 run = "tésτ" 

387 registry.registerRun(run) 

388 datasetType = registry.getDatasetType("bias") 

389 dataId = {"instrument": "Cam1", "detector": 2} 

390 (ref,) = registry.insertDatasets(datasetType, dataIds=[dataId], run=run) 

391 outRef = registry.getDataset(ref.id) 

392 self.assertIsNotNone(ref.id) 

393 self.assertEqual(ref, outRef) 

394 with self.assertRaises(ConflictingDefinitionError): 

395 registry.insertDatasets(datasetType, dataIds=[dataId], run=run) 

396 registry.removeDatasets([ref]) 

397 self.assertIsNone(registry.findDataset(datasetType, dataId, collections=[run])) 

398 

399 def testFindDataset(self): 

400 """Tests for `Registry.findDataset`.""" 

401 registry = self.makeRegistry() 

402 self.loadData(registry, "base.yaml") 

403 run = "tésτ" 

404 datasetType = registry.getDatasetType("bias") 

405 dataId = {"instrument": "Cam1", "detector": 4} 

406 registry.registerRun(run) 

407 (inputRef,) = registry.insertDatasets(datasetType, dataIds=[dataId], run=run) 

408 outputRef = registry.findDataset(datasetType, dataId, collections=[run]) 

409 self.assertEqual(outputRef, inputRef) 

410 # Check that retrieval with invalid dataId raises 

411 with self.assertRaises(LookupError): 

412 dataId = {"instrument": "Cam1"} # no detector 

413 registry.findDataset(datasetType, dataId, collections=run) 

414 # Check that different dataIds match to different datasets 

415 dataId1 = {"instrument": "Cam1", "detector": 1} 

416 (inputRef1,) = registry.insertDatasets(datasetType, dataIds=[dataId1], run=run) 

417 dataId2 = {"instrument": "Cam1", "detector": 2} 

418 (inputRef2,) = registry.insertDatasets(datasetType, dataIds=[dataId2], run=run) 

419 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef1) 

420 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef2) 

421 self.assertNotEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef2) 

422 self.assertNotEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef1) 

423 # Check that requesting a non-existing dataId returns None 

424 nonExistingDataId = {"instrument": "Cam1", "detector": 3} 

425 self.assertIsNone(registry.findDataset(datasetType, nonExistingDataId, collections=run)) 

426 

427 def testRemoveDatasetTypeSuccess(self): 

428 """Test that Registry.removeDatasetType works when there are no 

429 datasets of that type present. 

430 """ 

431 registry = self.makeRegistry() 

432 self.loadData(registry, "base.yaml") 

433 registry.removeDatasetType("flat") 

434 with self.assertRaises(KeyError): 

435 registry.getDatasetType("flat") 

436 

437 def testRemoveDatasetTypeFailure(self): 

438 """Test that Registry.removeDatasetType raises when there are datasets 

439 of that type present or if the dataset type is for a component. 

440 """ 

441 registry = self.makeRegistry() 

442 self.loadData(registry, "base.yaml") 

443 self.loadData(registry, "datasets.yaml") 

444 with self.assertRaises(OrphanedRecordError): 

445 registry.removeDatasetType("flat") 

446 with self.assertRaises(ValueError): 

447 registry.removeDatasetType(DatasetType.nameWithComponent("flat", "image")) 

448 

449 def testImportDatasetsUUID(self): 

450 """Test for `Registry._importDatasets` with UUID dataset ID.""" 

451 if not self.datasetsManager.endswith(".ByDimensionsDatasetRecordStorageManagerUUID"): 

452 self.skipTest(f"Unexpected dataset manager {self.datasetsManager}") 

453 

454 registry = self.makeRegistry() 

455 self.loadData(registry, "base.yaml") 

456 for run in range(6): 

457 registry.registerRun(f"run{run}") 

458 datasetTypeBias = registry.getDatasetType("bias") 

459 datasetTypeFlat = registry.getDatasetType("flat") 

460 dataIdBias1 = {"instrument": "Cam1", "detector": 1} 

461 dataIdBias2 = {"instrument": "Cam1", "detector": 2} 

462 dataIdFlat1 = {"instrument": "Cam1", "detector": 1, "physical_filter": "Cam1-G", "band": "g"} 

463 

464 dataset_id = uuid.uuid4() 

465 ref = DatasetRef(datasetTypeBias, dataIdBias1, id=dataset_id, run="run0") 

466 (ref1,) = registry._importDatasets([ref]) 

467 # UUID is used without change 

468 self.assertEqual(ref.id, ref1.id) 

469 

470 # All different failure modes 

471 refs = ( 

472 # Importing same DatasetRef with different dataset ID is an error 

473 DatasetRef(datasetTypeBias, dataIdBias1, id=uuid.uuid4(), run="run0"), 

474 # Same DatasetId but different DataId 

475 DatasetRef(datasetTypeBias, dataIdBias2, id=ref1.id, run="run0"), 

476 DatasetRef(datasetTypeFlat, dataIdFlat1, id=ref1.id, run="run0"), 

477 # Same DatasetRef and DatasetId but different run 

478 DatasetRef(datasetTypeBias, dataIdBias1, id=ref1.id, run="run1"), 

479 ) 

480 for ref in refs: 

481 with self.assertRaises(ConflictingDefinitionError): 

482 registry._importDatasets([ref]) 

483 

484 # Test for non-unique IDs, they can be re-imported multiple times. 

485 for run, idGenMode in ((2, DatasetIdGenEnum.DATAID_TYPE), (4, DatasetIdGenEnum.DATAID_TYPE_RUN)): 

486 with self.subTest(idGenMode=idGenMode): 

487 

488 # Use integer dataset ID to force UUID calculation in _import 

489 ref = DatasetRef(datasetTypeBias, dataIdBias1, id=0, run=f"run{run}") 

490 (ref1,) = registry._importDatasets([ref], idGenerationMode=idGenMode) 

491 self.assertIsInstance(ref1.id, uuid.UUID) 

492 self.assertEqual(ref1.id.version, 5) 

493 

494 # Importing it again is OK 

495 (ref2,) = registry._importDatasets([ref1]) 

496 self.assertEqual(ref2.id, ref1.id) 

497 

498 # Cannot import to different run with the same ID 

499 ref = DatasetRef(datasetTypeBias, dataIdBias1, id=ref1.id, run=f"run{run+1}") 

500 with self.assertRaises(ConflictingDefinitionError): 

501 registry._importDatasets([ref]) 

502 

503 ref = DatasetRef(datasetTypeBias, dataIdBias1, id=0, run=f"run{run+1}") 

504 if idGenMode is DatasetIdGenEnum.DATAID_TYPE: 

505 # Cannot import same DATAID_TYPE ref into a new run 

506 with self.assertRaises(ConflictingDefinitionError): 

507 (ref2,) = registry._importDatasets([ref], idGenerationMode=idGenMode) 

508 else: 

509 # DATAID_TYPE_RUN ref can be imported into a new run 

510 (ref2,) = registry._importDatasets([ref], idGenerationMode=idGenMode) 

511 

512 def testImportDatasetsInt(self): 

513 """Test for `Registry._importDatasets` with integer dataset ID.""" 

514 if not self.datasetsManager.endswith(".ByDimensionsDatasetRecordStorageManager"): 

515 self.skipTest(f"Unexpected dataset manager {self.datasetsManager}") 

516 

517 registry = self.makeRegistry() 

518 self.loadData(registry, "base.yaml") 

519 run = "tésτ" 

520 registry.registerRun(run) 

521 datasetTypeBias = registry.getDatasetType("bias") 

522 datasetTypeFlat = registry.getDatasetType("flat") 

523 dataIdBias1 = {"instrument": "Cam1", "detector": 1} 

524 dataIdBias2 = {"instrument": "Cam1", "detector": 2} 

525 dataIdFlat1 = {"instrument": "Cam1", "detector": 1, "physical_filter": "Cam1-G", "band": "g"} 

526 dataset_id = 999999999 

527 

528 ref = DatasetRef(datasetTypeBias, dataIdBias1, id=dataset_id, run=run) 

529 (ref1,) = registry._importDatasets([ref]) 

530 # Should make new integer ID. 

531 self.assertNotEqual(ref1.id, ref.id) 

532 

533 # Ingesting same dataId with different dataset ID is an error 

534 ref2 = ref1.unresolved().resolved(dataset_id, run=run) 

535 with self.assertRaises(ConflictingDefinitionError): 

536 registry._importDatasets([ref2]) 

537 

538 # Ingesting different dataId with the same dataset ID should work 

539 ref3 = DatasetRef(datasetTypeBias, dataIdBias2, id=ref1.id, run=run) 

540 (ref4,) = registry._importDatasets([ref3]) 

541 self.assertNotEqual(ref4.id, ref1.id) 

542 

543 ref3 = DatasetRef(datasetTypeFlat, dataIdFlat1, id=ref1.id, run=run) 

544 (ref4,) = registry._importDatasets([ref3]) 

545 self.assertNotEqual(ref4.id, ref1.id) 

546 

547 def testDatasetTypeComponentQueries(self): 

548 """Test component options when querying for dataset types.""" 

549 registry = self.makeRegistry() 

550 self.loadData(registry, "base.yaml") 

551 self.loadData(registry, "datasets.yaml") 

552 # Test querying for dataset types with different inputs. 

553 # First query for all dataset types; components should only be included 

554 # when components=True. 

555 self.assertEqual({"bias", "flat"}, NamedValueSet(registry.queryDatasetTypes()).names) 

556 self.assertEqual({"bias", "flat"}, NamedValueSet(registry.queryDatasetTypes(components=False)).names) 

557 self.assertLess( 

558 {"bias", "flat", "bias.wcs", "flat.photoCalib"}, 

559 NamedValueSet(registry.queryDatasetTypes(components=True)).names, 

560 ) 

561 # Use a pattern that can match either parent or components. Again, 

562 # components are only returned if components=True. 

563 self.assertEqual({"bias"}, NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"))).names) 

564 self.assertEqual( 

565 {"bias"}, NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"), components=False)).names 

566 ) 

567 self.assertLess( 

568 {"bias", "bias.wcs"}, 

569 NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"), components=True)).names, 

570 ) 

571 # This pattern matches only a component. In this case we also return 

572 # that component dataset type if components=None. 

573 self.assertEqual( 

574 {"bias.wcs"}, NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"))).names 

575 ) 

576 self.assertEqual( 

577 set(), 

578 NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"), components=False)).names, 

579 ) 

580 self.assertEqual( 

581 {"bias.wcs"}, 

582 NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"), components=True)).names, 

583 ) 

584 # Add a dataset type using a StorageClass that we'll then remove; check 

585 # that this does not affect our ability to query for dataset types 

586 # (though it will warn). 

587 tempStorageClass = StorageClass( 

588 name="TempStorageClass", 

589 components={"data", registry.storageClasses.getStorageClass("StructuredDataDict")}, 

590 ) 

591 registry.storageClasses.registerStorageClass(tempStorageClass) 

592 datasetType = DatasetType( 

593 "temporary", 

594 dimensions=["instrument"], 

595 storageClass=tempStorageClass, 

596 universe=registry.dimensions, 

597 ) 

598 registry.registerDatasetType(datasetType) 

599 registry.storageClasses._unregisterStorageClass(tempStorageClass.name) 

600 datasetType._storageClass = None 

601 del tempStorageClass 

602 # Querying for all dataset types, including components, should include 

603 # at least all non-component dataset types (and I don't want to 

604 # enumerate all of the Exposure components for bias and flat here). 

605 with self.assertLogs("lsst.daf.butler.registries", logging.WARN) as cm: 

606 everything = NamedValueSet(registry.queryDatasetTypes(components=True)) 

607 self.assertIn("TempStorageClass", cm.output[0]) 

608 self.assertLess({"bias", "flat", "temporary"}, everything.names) 

609 # It should not include "temporary.columns", because we tried to remove 

610 # the storage class that would tell it about that. So if the next line 

611 # fails (i.e. "temporary.columns" _is_ in everything.names), it means 

612 # this part of the test isn't doing anything, because the _unregister 

613 # call about isn't simulating the real-life case we want it to 

614 # simulate, in which different versions of daf_butler in entirely 

615 # different Python processes interact with the same repo. 

616 self.assertNotIn("temporary.data", everything.names) 

617 # Query for dataset types that start with "temp". This should again 

618 # not include the component, and also not fail. 

619 with self.assertLogs("lsst.daf.butler.registries", logging.WARN) as cm: 

620 startsWithTemp = NamedValueSet(registry.queryDatasetTypes(re.compile("temp.*"))) 

621 self.assertIn("TempStorageClass", cm.output[0]) 

622 self.assertEqual({"temporary"}, startsWithTemp.names) 

623 # Querying with no components should not warn at all. 

624 with self.assertLogs("lsst.daf.butler.registries", logging.WARN) as cm: 

625 startsWithTemp = NamedValueSet(registry.queryDatasetTypes(re.compile("temp.*"), components=False)) 

626 # Must issue a warning of our own to be captured. 

627 logging.getLogger("lsst.daf.butler.registries").warning("test message") 

628 self.assertEqual(len(cm.output), 1) 

629 self.assertIn("test message", cm.output[0]) 

630 

631 def testComponentLookups(self): 

632 """Test searching for component datasets via their parents.""" 

633 registry = self.makeRegistry() 

634 self.loadData(registry, "base.yaml") 

635 self.loadData(registry, "datasets.yaml") 

636 # Test getting the child dataset type (which does still exist in the 

637 # Registry), and check for consistency with 

638 # DatasetRef.makeComponentRef. 

639 collection = "imported_g" 

640 parentType = registry.getDatasetType("bias") 

641 childType = registry.getDatasetType("bias.wcs") 

642 parentRefResolved = registry.findDataset( 

643 parentType, collections=collection, instrument="Cam1", detector=1 

644 ) 

645 self.assertIsInstance(parentRefResolved, DatasetRef) 

646 self.assertEqual(childType, parentRefResolved.makeComponentRef("wcs").datasetType) 

647 # Search for a single dataset with findDataset. 

648 childRef1 = registry.findDataset("bias.wcs", collections=collection, dataId=parentRefResolved.dataId) 

649 self.assertEqual(childRef1, parentRefResolved.makeComponentRef("wcs")) 

650 # Search for detector data IDs constrained by component dataset 

651 # existence with queryDataIds. 

652 dataIds = registry.queryDataIds( 

653 ["detector"], 

654 datasets=["bias.wcs"], 

655 collections=collection, 

656 ).toSet() 

657 self.assertEqual( 

658 dataIds, 

659 DataCoordinateSet( 

660 { 

661 DataCoordinate.standardize(instrument="Cam1", detector=d, graph=parentType.dimensions) 

662 for d in (1, 2, 3) 

663 }, 

664 parentType.dimensions, 

665 ), 

666 ) 

667 # Search for multiple datasets of a single type with queryDatasets. 

668 childRefs2 = set( 

669 registry.queryDatasets( 

670 "bias.wcs", 

671 collections=collection, 

672 ) 

673 ) 

674 self.assertEqual( 

675 {ref.unresolved() for ref in childRefs2}, {DatasetRef(childType, dataId) for dataId in dataIds} 

676 ) 

677 

678 def testCollections(self): 

679 """Tests for registry methods that manage collections.""" 

680 registry = self.makeRegistry() 

681 other_registry = self.makeRegistry(share_repo_with=registry) 

682 self.loadData(registry, "base.yaml") 

683 self.loadData(registry, "datasets.yaml") 

684 run1 = "imported_g" 

685 run2 = "imported_r" 

686 # Test setting a collection docstring after it has been created. 

687 registry.setCollectionDocumentation(run1, "doc for run1") 

688 self.assertEqual(registry.getCollectionDocumentation(run1), "doc for run1") 

689 registry.setCollectionDocumentation(run1, None) 

690 self.assertIsNone(registry.getCollectionDocumentation(run1)) 

691 datasetType = "bias" 

692 # Find some datasets via their run's collection. 

693 dataId1 = {"instrument": "Cam1", "detector": 1} 

694 ref1 = registry.findDataset(datasetType, dataId1, collections=run1) 

695 self.assertIsNotNone(ref1) 

696 dataId2 = {"instrument": "Cam1", "detector": 2} 

697 ref2 = registry.findDataset(datasetType, dataId2, collections=run1) 

698 self.assertIsNotNone(ref2) 

699 # Associate those into a new collection, then look for them there. 

700 tag1 = "tag1" 

701 registry.registerCollection(tag1, type=CollectionType.TAGGED, doc="doc for tag1") 

702 # Check that we can query for old and new collections by type. 

703 self.assertEqual(set(registry.queryCollections(collectionTypes=CollectionType.RUN)), {run1, run2}) 

704 self.assertEqual( 

705 set(registry.queryCollections(collectionTypes={CollectionType.TAGGED, CollectionType.RUN})), 

706 {tag1, run1, run2}, 

707 ) 

708 self.assertEqual(registry.getCollectionDocumentation(tag1), "doc for tag1") 

709 registry.associate(tag1, [ref1, ref2]) 

710 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1) 

711 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

712 # Disassociate one and verify that we can't it there anymore... 

713 registry.disassociate(tag1, [ref1]) 

714 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=tag1)) 

715 # ...but we can still find ref2 in tag1, and ref1 in the run. 

716 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run1), ref1) 

717 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

718 collections = set(registry.queryCollections()) 

719 self.assertEqual(collections, {run1, run2, tag1}) 

720 # Associate both refs into tag1 again; ref2 is already there, but that 

721 # should be a harmless no-op. 

722 registry.associate(tag1, [ref1, ref2]) 

723 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1) 

724 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

725 # Get a different dataset (from a different run) that has the same 

726 # dataset type and data ID as ref2. 

727 ref2b = registry.findDataset(datasetType, dataId2, collections=run2) 

728 self.assertNotEqual(ref2, ref2b) 

729 # Attempting to associate that into tag1 should be an error. 

730 with self.assertRaises(ConflictingDefinitionError): 

731 registry.associate(tag1, [ref2b]) 

732 # That error shouldn't have messed up what we had before. 

733 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1) 

734 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

735 # Attempt to associate the conflicting dataset again, this time with 

736 # a dataset that isn't in the collection and won't cause a conflict. 

737 # Should also fail without modifying anything. 

738 dataId3 = {"instrument": "Cam1", "detector": 3} 

739 ref3 = registry.findDataset(datasetType, dataId3, collections=run1) 

740 with self.assertRaises(ConflictingDefinitionError): 

741 registry.associate(tag1, [ref3, ref2b]) 

742 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1) 

743 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

744 self.assertIsNone(registry.findDataset(datasetType, dataId3, collections=tag1)) 

745 # Register a chained collection that searches [tag1, run2] 

746 chain1 = "chain1" 

747 registry.registerCollection(chain1, type=CollectionType.CHAINED) 

748 self.assertIs(registry.getCollectionType(chain1), CollectionType.CHAINED) 

749 # Chained collection exists, but has no collections in it. 

750 self.assertFalse(registry.getCollectionChain(chain1)) 

751 # If we query for all collections, we should get the chained collection 

752 # only if we don't ask to flatten it (i.e. yield only its children). 

753 self.assertEqual(set(registry.queryCollections(flattenChains=False)), {tag1, run1, run2, chain1}) 

754 self.assertEqual(set(registry.queryCollections(flattenChains=True)), {tag1, run1, run2}) 

755 # Attempt to set its child collections to something circular; that 

756 # should fail. 

757 with self.assertRaises(ValueError): 

758 registry.setCollectionChain(chain1, [tag1, chain1]) 

759 # Add the child collections. 

760 registry.setCollectionChain(chain1, [tag1, run2]) 

761 self.assertEqual(list(registry.getCollectionChain(chain1)), [tag1, run2]) 

762 self.assertEqual(registry.getCollectionParentChains(tag1), {chain1}) 

763 self.assertEqual(registry.getCollectionParentChains(run2), {chain1}) 

764 # Refresh the other registry that points to the same repo, and make 

765 # sure it can see the things we've done (note that this does require 

766 # an explicit refresh(); that's the documented behavior, because 

767 # caching is ~impossible otherwise). 

768 if other_registry is not None: 

769 other_registry.refresh() 

770 self.assertEqual(list(other_registry.getCollectionChain(chain1)), [tag1, run2]) 

771 self.assertEqual(other_registry.getCollectionParentChains(tag1), {chain1}) 

772 self.assertEqual(other_registry.getCollectionParentChains(run2), {chain1}) 

773 # Searching for dataId1 or dataId2 in the chain should return ref1 and 

774 # ref2, because both are in tag1. 

775 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain1), ref1) 

776 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain1), ref2) 

777 # Now disassociate ref2 from tag1. The search (for bias) with 

778 # dataId2 in chain1 should then: 

779 # 1. not find it in tag1 

780 # 2. find a different dataset in run2 

781 registry.disassociate(tag1, [ref2]) 

782 ref2b = registry.findDataset(datasetType, dataId2, collections=chain1) 

783 self.assertNotEqual(ref2b, ref2) 

784 self.assertEqual(ref2b, registry.findDataset(datasetType, dataId2, collections=run2)) 

785 # Define a new chain so we can test recursive chains. 

786 chain2 = "chain2" 

787 registry.registerCollection(chain2, type=CollectionType.CHAINED) 

788 registry.setCollectionChain(chain2, [run2, chain1]) 

789 self.assertEqual(registry.getCollectionParentChains(chain1), {chain2}) 

790 self.assertEqual(registry.getCollectionParentChains(run2), {chain1, chain2}) 

791 # Query for collections matching a regex. 

792 self.assertCountEqual( 

793 list(registry.queryCollections(re.compile("imported_."), flattenChains=False)), 

794 ["imported_r", "imported_g"], 

795 ) 

796 # Query for collections matching a regex or an explicit str. 

797 self.assertCountEqual( 

798 list(registry.queryCollections([re.compile("imported_."), "chain1"], flattenChains=False)), 

799 ["imported_r", "imported_g", "chain1"], 

800 ) 

801 # Search for bias with dataId1 should find it via tag1 in chain2, 

802 # recursing, because is not in run1. 

803 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=run2)) 

804 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain2), ref1) 

805 # Search for bias with dataId2 should find it in run2 (ref2b). 

806 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain2), ref2b) 

807 # Search for a flat that is in run2. That should not be found 

808 # at the front of chain2, because of the restriction to bias 

809 # on run2 there, but it should be found in at the end of chain1. 

810 dataId4 = {"instrument": "Cam1", "detector": 3, "physical_filter": "Cam1-R2"} 

811 ref4 = registry.findDataset("flat", dataId4, collections=run2) 

812 self.assertIsNotNone(ref4) 

813 self.assertEqual(ref4, registry.findDataset("flat", dataId4, collections=chain2)) 

814 # Deleting a collection that's part of a CHAINED collection is not 

815 # allowed, and is exception-safe. 

816 with self.assertRaises(Exception): 

817 registry.removeCollection(run2) 

818 self.assertEqual(registry.getCollectionType(run2), CollectionType.RUN) 

819 with self.assertRaises(Exception): 

820 registry.removeCollection(chain1) 

821 self.assertEqual(registry.getCollectionType(chain1), CollectionType.CHAINED) 

822 # Actually remove chain2, test that it's gone by asking for its type. 

823 registry.removeCollection(chain2) 

824 with self.assertRaises(MissingCollectionError): 

825 registry.getCollectionType(chain2) 

826 # Actually remove run2 and chain1, which should work now. 

827 registry.removeCollection(chain1) 

828 registry.removeCollection(run2) 

829 with self.assertRaises(MissingCollectionError): 

830 registry.getCollectionType(run2) 

831 with self.assertRaises(MissingCollectionError): 

832 registry.getCollectionType(chain1) 

833 # Remove tag1 as well, just to test that we can remove TAGGED 

834 # collections. 

835 registry.removeCollection(tag1) 

836 with self.assertRaises(MissingCollectionError): 

837 registry.getCollectionType(tag1) 

838 

839 def testCollectionChainFlatten(self): 

840 """Test that Registry.setCollectionChain obeys its 'flatten' option.""" 

841 registry = self.makeRegistry() 

842 registry.registerCollection("inner", CollectionType.CHAINED) 

843 registry.registerCollection("innermost", CollectionType.RUN) 

844 registry.setCollectionChain("inner", ["innermost"]) 

845 registry.registerCollection("outer", CollectionType.CHAINED) 

846 registry.setCollectionChain("outer", ["inner"], flatten=False) 

847 self.assertEqual(list(registry.getCollectionChain("outer")), ["inner"]) 

848 registry.setCollectionChain("outer", ["inner"], flatten=True) 

849 self.assertEqual(list(registry.getCollectionChain("outer")), ["innermost"]) 

850 

851 def testBasicTransaction(self): 

852 """Test that all operations within a single transaction block are 

853 rolled back if an exception propagates out of the block. 

854 """ 

855 registry = self.makeRegistry() 

856 storageClass = StorageClass("testDatasetType") 

857 registry.storageClasses.registerStorageClass(storageClass) 

858 with registry.transaction(): 

859 registry.insertDimensionData("instrument", {"name": "Cam1", "class_name": "A"}) 

860 with self.assertRaises(ValueError): 

861 with registry.transaction(): 

862 registry.insertDimensionData("instrument", {"name": "Cam2"}) 

863 raise ValueError("Oops, something went wrong") 

864 # Cam1 should exist 

865 self.assertEqual(registry.expandDataId(instrument="Cam1").records["instrument"].class_name, "A") 

866 # But Cam2 and Cam3 should both not exist 

867 with self.assertRaises(DataIdValueError): 

868 registry.expandDataId(instrument="Cam2") 

869 with self.assertRaises(DataIdValueError): 

870 registry.expandDataId(instrument="Cam3") 

871 

872 def testNestedTransaction(self): 

873 """Test that operations within a transaction block are not rolled back 

874 if an exception propagates out of an inner transaction block and is 

875 then caught. 

876 """ 

877 registry = self.makeRegistry() 

878 dimension = registry.dimensions["instrument"] 

879 dataId1 = {"instrument": "DummyCam"} 

880 dataId2 = {"instrument": "DummyCam2"} 

881 checkpointReached = False 

882 with registry.transaction(): 

883 # This should be added and (ultimately) committed. 

884 registry.insertDimensionData(dimension, dataId1) 

885 with self.assertRaises(sqlalchemy.exc.IntegrityError): 

886 with registry.transaction(savepoint=True): 

887 # This does not conflict, and should succeed (but not 

888 # be committed). 

889 registry.insertDimensionData(dimension, dataId2) 

890 checkpointReached = True 

891 # This should conflict and raise, triggerring a rollback 

892 # of the previous insertion within the same transaction 

893 # context, but not the original insertion in the outer 

894 # block. 

895 registry.insertDimensionData(dimension, dataId1) 

896 self.assertTrue(checkpointReached) 

897 self.assertIsNotNone(registry.expandDataId(dataId1, graph=dimension.graph)) 

898 with self.assertRaises(DataIdValueError): 

899 registry.expandDataId(dataId2, graph=dimension.graph) 

900 

901 def testInstrumentDimensions(self): 

902 """Test queries involving only instrument dimensions, with no joins to 

903 skymap.""" 

904 registry = self.makeRegistry() 

905 

906 # need a bunch of dimensions and datasets for test 

907 registry.insertDimensionData( 

908 "instrument", dict(name="DummyCam", visit_max=25, exposure_max=300, detector_max=6) 

909 ) 

910 registry.insertDimensionData( 

911 "physical_filter", 

912 dict(instrument="DummyCam", name="dummy_r", band="r"), 

913 dict(instrument="DummyCam", name="dummy_i", band="i"), 

914 ) 

915 registry.insertDimensionData( 

916 "detector", *[dict(instrument="DummyCam", id=i, full_name=str(i)) for i in range(1, 6)] 

917 ) 

918 registry.insertDimensionData( 

919 "visit_system", 

920 dict(instrument="DummyCam", id=1, name="default"), 

921 ) 

922 registry.insertDimensionData( 

923 "visit", 

924 dict(instrument="DummyCam", id=10, name="ten", physical_filter="dummy_i", visit_system=1), 

925 dict(instrument="DummyCam", id=11, name="eleven", physical_filter="dummy_r", visit_system=1), 

926 dict(instrument="DummyCam", id=20, name="twelve", physical_filter="dummy_r", visit_system=1), 

927 ) 

928 registry.insertDimensionData( 

929 "exposure", 

930 dict(instrument="DummyCam", id=100, obs_id="100", physical_filter="dummy_i"), 

931 dict(instrument="DummyCam", id=101, obs_id="101", physical_filter="dummy_i"), 

932 dict(instrument="DummyCam", id=110, obs_id="110", physical_filter="dummy_r"), 

933 dict(instrument="DummyCam", id=111, obs_id="111", physical_filter="dummy_r"), 

934 dict(instrument="DummyCam", id=200, obs_id="200", physical_filter="dummy_r"), 

935 dict(instrument="DummyCam", id=201, obs_id="201", physical_filter="dummy_r"), 

936 ) 

937 registry.insertDimensionData( 

938 "visit_definition", 

939 dict(instrument="DummyCam", exposure=100, visit_system=1, visit=10), 

940 dict(instrument="DummyCam", exposure=101, visit_system=1, visit=10), 

941 dict(instrument="DummyCam", exposure=110, visit_system=1, visit=11), 

942 dict(instrument="DummyCam", exposure=111, visit_system=1, visit=11), 

943 dict(instrument="DummyCam", exposure=200, visit_system=1, visit=20), 

944 dict(instrument="DummyCam", exposure=201, visit_system=1, visit=20), 

945 ) 

946 # dataset types 

947 run1 = "test1_r" 

948 run2 = "test2_r" 

949 tagged2 = "test2_t" 

950 registry.registerRun(run1) 

951 registry.registerRun(run2) 

952 registry.registerCollection(tagged2) 

953 storageClass = StorageClass("testDataset") 

954 registry.storageClasses.registerStorageClass(storageClass) 

955 rawType = DatasetType( 

956 name="RAW", 

957 dimensions=registry.dimensions.extract(("instrument", "exposure", "detector")), 

958 storageClass=storageClass, 

959 ) 

960 registry.registerDatasetType(rawType) 

961 calexpType = DatasetType( 

962 name="CALEXP", 

963 dimensions=registry.dimensions.extract(("instrument", "visit", "detector")), 

964 storageClass=storageClass, 

965 ) 

966 registry.registerDatasetType(calexpType) 

967 

968 # add pre-existing datasets 

969 for exposure in (100, 101, 110, 111): 

970 for detector in (1, 2, 3): 

971 # note that only 3 of 5 detectors have datasets 

972 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector) 

973 (ref,) = registry.insertDatasets(rawType, dataIds=[dataId], run=run1) 

974 # exposures 100 and 101 appear in both run1 and tagged2. 

975 # 100 has different datasets in the different collections 

976 # 101 has the same dataset in both collections. 

977 if exposure == 100: 

978 (ref,) = registry.insertDatasets(rawType, dataIds=[dataId], run=run2) 

979 if exposure in (100, 101): 

980 registry.associate(tagged2, [ref]) 

981 # Add pre-existing datasets to tagged2. 

982 for exposure in (200, 201): 

983 for detector in (3, 4, 5): 

984 # note that only 3 of 5 detectors have datasets 

985 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector) 

986 (ref,) = registry.insertDatasets(rawType, dataIds=[dataId], run=run2) 

987 registry.associate(tagged2, [ref]) 

988 

989 dimensions = DimensionGraph( 

990 registry.dimensions, dimensions=(rawType.dimensions.required | calexpType.dimensions.required) 

991 ) 

992 # Test that single dim string works as well as list of str 

993 rows = registry.queryDataIds("visit", datasets=rawType, collections=run1).expanded().toSet() 

994 rowsI = registry.queryDataIds(["visit"], datasets=rawType, collections=run1).expanded().toSet() 

995 self.assertEqual(rows, rowsI) 

996 # with empty expression 

997 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1).expanded().toSet() 

998 self.assertEqual(len(rows), 4 * 3) # 4 exposures times 3 detectors 

999 for dataId in rows: 

1000 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit")) 

1001 packer1 = registry.dimensions.makePacker("visit_detector", dataId) 

1002 packer2 = registry.dimensions.makePacker("exposure_detector", dataId) 

1003 self.assertEqual( 

1004 packer1.unpack(packer1.pack(dataId)), 

1005 DataCoordinate.standardize(dataId, graph=packer1.dimensions), 

1006 ) 

1007 self.assertEqual( 

1008 packer2.unpack(packer2.pack(dataId)), 

1009 DataCoordinate.standardize(dataId, graph=packer2.dimensions), 

1010 ) 

1011 self.assertNotEqual(packer1.pack(dataId), packer2.pack(dataId)) 

1012 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101, 110, 111)) 

1013 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 11)) 

1014 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3)) 

1015 

1016 # second collection 

1017 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=tagged2).toSet() 

1018 self.assertEqual(len(rows), 4 * 3) # 4 exposures times 3 detectors 

1019 for dataId in rows: 

1020 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit")) 

1021 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101, 200, 201)) 

1022 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 20)) 

1023 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3, 4, 5)) 

1024 

1025 # with two input datasets 

1026 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=[run1, tagged2]).toSet() 

1027 self.assertEqual(len(set(rows)), 6 * 3) # 6 exposures times 3 detectors; set needed to de-dupe 

1028 for dataId in rows: 

1029 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit")) 

1030 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101, 110, 111, 200, 201)) 

1031 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 11, 20)) 

1032 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3, 4, 5)) 

1033 

1034 # limit to single visit 

1035 rows = registry.queryDataIds( 

1036 dimensions, datasets=rawType, collections=run1, where="visit = 10", instrument="DummyCam" 

1037 ).toSet() 

1038 self.assertEqual(len(rows), 2 * 3) # 2 exposures times 3 detectors 

1039 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101)) 

1040 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10,)) 

1041 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3)) 

1042 

1043 # more limiting expression, using link names instead of Table.column 

1044 rows = registry.queryDataIds( 

1045 dimensions, 

1046 datasets=rawType, 

1047 collections=run1, 

1048 where="visit = 10 and detector > 1 and 'DummyCam'=instrument", 

1049 ).toSet() 

1050 self.assertEqual(len(rows), 2 * 2) # 2 exposures times 2 detectors 

1051 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101)) 

1052 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10,)) 

1053 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (2, 3)) 

1054 

1055 # queryDataIds with only one of `datasets` and `collections` is an 

1056 # error. 

1057 with self.assertRaises(CollectionError): 

1058 registry.queryDataIds(dimensions, datasets=rawType) 

1059 with self.assertRaises(ArgumentError): 

1060 registry.queryDataIds(dimensions, collections=run1) 

1061 

1062 # expression excludes everything 

1063 rows = registry.queryDataIds( 

1064 dimensions, datasets=rawType, collections=run1, where="visit > 1000", instrument="DummyCam" 

1065 ).toSet() 

1066 self.assertEqual(len(rows), 0) 

1067 

1068 # Selecting by physical_filter, this is not in the dimensions, but it 

1069 # is a part of the full expression so it should work too. 

1070 rows = registry.queryDataIds( 

1071 dimensions, 

1072 datasets=rawType, 

1073 collections=run1, 

1074 where="physical_filter = 'dummy_r'", 

1075 instrument="DummyCam", 

1076 ).toSet() 

1077 self.assertEqual(len(rows), 2 * 3) # 2 exposures times 3 detectors 

1078 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (110, 111)) 

1079 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (11,)) 

1080 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3)) 

1081 

1082 def testSkyMapDimensions(self): 

1083 """Tests involving only skymap dimensions, no joins to instrument.""" 

1084 registry = self.makeRegistry() 

1085 

1086 # need a bunch of dimensions and datasets for test, we want 

1087 # "band" in the test so also have to add physical_filter 

1088 # dimensions 

1089 registry.insertDimensionData("instrument", dict(instrument="DummyCam")) 

1090 registry.insertDimensionData( 

1091 "physical_filter", 

1092 dict(instrument="DummyCam", name="dummy_r", band="r"), 

1093 dict(instrument="DummyCam", name="dummy_i", band="i"), 

1094 ) 

1095 registry.insertDimensionData("skymap", dict(name="DummyMap", hash="sha!".encode("utf8"))) 

1096 for tract in range(10): 

1097 registry.insertDimensionData("tract", dict(skymap="DummyMap", id=tract)) 

1098 registry.insertDimensionData( 

1099 "patch", 

1100 *[dict(skymap="DummyMap", tract=tract, id=patch, cell_x=0, cell_y=0) for patch in range(10)], 

1101 ) 

1102 

1103 # dataset types 

1104 run = "tésτ" 

1105 registry.registerRun(run) 

1106 storageClass = StorageClass("testDataset") 

1107 registry.storageClasses.registerStorageClass(storageClass) 

1108 calexpType = DatasetType( 

1109 name="deepCoadd_calexp", 

1110 dimensions=registry.dimensions.extract(("skymap", "tract", "patch", "band")), 

1111 storageClass=storageClass, 

1112 ) 

1113 registry.registerDatasetType(calexpType) 

1114 mergeType = DatasetType( 

1115 name="deepCoadd_mergeDet", 

1116 dimensions=registry.dimensions.extract(("skymap", "tract", "patch")), 

1117 storageClass=storageClass, 

1118 ) 

1119 registry.registerDatasetType(mergeType) 

1120 measType = DatasetType( 

1121 name="deepCoadd_meas", 

1122 dimensions=registry.dimensions.extract(("skymap", "tract", "patch", "band")), 

1123 storageClass=storageClass, 

1124 ) 

1125 registry.registerDatasetType(measType) 

1126 

1127 dimensions = DimensionGraph( 

1128 registry.dimensions, 

1129 dimensions=( 

1130 calexpType.dimensions.required | mergeType.dimensions.required | measType.dimensions.required 

1131 ), 

1132 ) 

1133 

1134 # add pre-existing datasets 

1135 for tract in (1, 3, 5): 

1136 for patch in (2, 4, 6, 7): 

1137 dataId = dict(skymap="DummyMap", tract=tract, patch=patch) 

1138 registry.insertDatasets(mergeType, dataIds=[dataId], run=run) 

1139 for aFilter in ("i", "r"): 

1140 dataId = dict(skymap="DummyMap", tract=tract, patch=patch, band=aFilter) 

1141 registry.insertDatasets(calexpType, dataIds=[dataId], run=run) 

1142 

1143 # with empty expression 

1144 rows = registry.queryDataIds(dimensions, datasets=[calexpType, mergeType], collections=run).toSet() 

1145 self.assertEqual(len(rows), 3 * 4 * 2) # 4 tracts x 4 patches x 2 filters 

1146 for dataId in rows: 

1147 self.assertCountEqual(dataId.keys(), ("skymap", "tract", "patch", "band")) 

1148 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 3, 5)) 

1149 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 4, 6, 7)) 

1150 self.assertCountEqual(set(dataId["band"] for dataId in rows), ("i", "r")) 

1151 

1152 # limit to 2 tracts and 2 patches 

1153 rows = registry.queryDataIds( 

1154 dimensions, 

1155 datasets=[calexpType, mergeType], 

1156 collections=run, 

1157 where="tract IN (1, 5) AND patch IN (2, 7)", 

1158 skymap="DummyMap", 

1159 ).toSet() 

1160 self.assertEqual(len(rows), 2 * 2 * 2) # 2 tracts x 2 patches x 2 filters 

1161 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 5)) 

1162 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 7)) 

1163 self.assertCountEqual(set(dataId["band"] for dataId in rows), ("i", "r")) 

1164 

1165 # limit to single filter 

1166 rows = registry.queryDataIds( 

1167 dimensions, datasets=[calexpType, mergeType], collections=run, where="band = 'i'" 

1168 ).toSet() 

1169 self.assertEqual(len(rows), 3 * 4 * 1) # 4 tracts x 4 patches x 2 filters 

1170 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 3, 5)) 

1171 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 4, 6, 7)) 

1172 self.assertCountEqual(set(dataId["band"] for dataId in rows), ("i",)) 

1173 

1174 # Specifying non-existing skymap is an exception 

1175 with self.assertRaisesRegex(DataIdValueError, "Unknown values specified for governor dimension"): 

1176 rows = registry.queryDataIds( 

1177 dimensions, datasets=[calexpType, mergeType], collections=run, where="skymap = 'Mars'" 

1178 ).toSet() 

1179 

1180 def testSpatialJoin(self): 

1181 """Test queries that involve spatial overlap joins.""" 

1182 registry = self.makeRegistry() 

1183 self.loadData(registry, "hsc-rc2-subset.yaml") 

1184 

1185 # Dictionary of spatial DatabaseDimensionElements, keyed by the name of 

1186 # the TopologicalFamily they belong to. We'll relate all elements in 

1187 # each family to all of the elements in each other family. 

1188 families = defaultdict(set) 

1189 # Dictionary of {element.name: {dataId: region}}. 

1190 regions = {} 

1191 for element in registry.dimensions.getDatabaseElements(): 

1192 if element.spatial is not None: 

1193 families[element.spatial.name].add(element) 

1194 regions[element.name] = { 

1195 record.dataId: record.region for record in registry.queryDimensionRecords(element) 

1196 } 

1197 

1198 # If this check fails, it's not necessarily a problem - it may just be 

1199 # a reasonable change to the default dimension definitions - but the 

1200 # test below depends on there being more than one family to do anything 

1201 # useful. 

1202 self.assertEqual(len(families), 2) 

1203 

1204 # Overlap DatabaseDimensionElements with each other. 

1205 for family1, family2 in itertools.combinations(families, 2): 

1206 for element1, element2 in itertools.product(families[family1], families[family2]): 

1207 graph = DimensionGraph.union(element1.graph, element2.graph) 

1208 # Construct expected set of overlapping data IDs via a 

1209 # brute-force comparison of the regions we've already fetched. 

1210 expected = { 

1211 DataCoordinate.standardize({**dataId1.byName(), **dataId2.byName()}, graph=graph) 

1212 for (dataId1, region1), (dataId2, region2) in itertools.product( 

1213 regions[element1.name].items(), regions[element2.name].items() 

1214 ) 

1215 if not region1.isDisjointFrom(region2) 

1216 } 

1217 self.assertGreater(len(expected), 2, msg="Test that we aren't just comparing empty sets.") 

1218 queried = set(registry.queryDataIds(graph)) 

1219 self.assertEqual(expected, queried) 

1220 

1221 # Overlap each DatabaseDimensionElement with the commonSkyPix system. 

1222 commonSkyPix = registry.dimensions.commonSkyPix 

1223 for elementName, regions in regions.items(): 

1224 graph = DimensionGraph.union(registry.dimensions[elementName].graph, commonSkyPix.graph) 

1225 expected = set() 

1226 for dataId, region in regions.items(): 

1227 for begin, end in commonSkyPix.pixelization.envelope(region): 

1228 expected.update( 

1229 DataCoordinate.standardize({commonSkyPix.name: index, **dataId.byName()}, graph=graph) 

1230 for index in range(begin, end) 

1231 ) 

1232 self.assertGreater(len(expected), 2, msg="Test that we aren't just comparing empty sets.") 

1233 queried = set(registry.queryDataIds(graph)) 

1234 self.assertEqual(expected, queried) 

1235 

1236 def testAbstractQuery(self): 

1237 """Test that we can run a query that just lists the known 

1238 bands. This is tricky because band is 

1239 backed by a query against physical_filter. 

1240 """ 

1241 registry = self.makeRegistry() 

1242 registry.insertDimensionData("instrument", dict(name="DummyCam")) 

1243 registry.insertDimensionData( 

1244 "physical_filter", 

1245 dict(instrument="DummyCam", name="dummy_i", band="i"), 

1246 dict(instrument="DummyCam", name="dummy_i2", band="i"), 

1247 dict(instrument="DummyCam", name="dummy_r", band="r"), 

1248 ) 

1249 rows = registry.queryDataIds(["band"]).toSet() 

1250 self.assertCountEqual( 

1251 rows, 

1252 [ 

1253 DataCoordinate.standardize(band="i", universe=registry.dimensions), 

1254 DataCoordinate.standardize(band="r", universe=registry.dimensions), 

1255 ], 

1256 ) 

1257 

1258 def testAttributeManager(self): 

1259 """Test basic functionality of attribute manager.""" 

1260 # number of attributes with schema versions in a fresh database, 

1261 # 6 managers with 3 records per manager, plus config for dimensions 

1262 VERSION_COUNT = 6 * 3 + 1 

1263 

1264 registry = self.makeRegistry() 

1265 attributes = registry._managers.attributes 

1266 

1267 # check what get() returns for non-existing key 

1268 self.assertIsNone(attributes.get("attr")) 

1269 self.assertEqual(attributes.get("attr", ""), "") 

1270 self.assertEqual(attributes.get("attr", "Value"), "Value") 

1271 self.assertEqual(len(list(attributes.items())), VERSION_COUNT) 

1272 

1273 # cannot store empty key or value 

1274 with self.assertRaises(ValueError): 

1275 attributes.set("", "value") 

1276 with self.assertRaises(ValueError): 

1277 attributes.set("attr", "") 

1278 

1279 # set value of non-existing key 

1280 attributes.set("attr", "value") 

1281 self.assertEqual(len(list(attributes.items())), VERSION_COUNT + 1) 

1282 self.assertEqual(attributes.get("attr"), "value") 

1283 

1284 # update value of existing key 

1285 with self.assertRaises(ButlerAttributeExistsError): 

1286 attributes.set("attr", "value2") 

1287 

1288 attributes.set("attr", "value2", force=True) 

1289 self.assertEqual(len(list(attributes.items())), VERSION_COUNT + 1) 

1290 self.assertEqual(attributes.get("attr"), "value2") 

1291 

1292 # delete existing key 

1293 self.assertTrue(attributes.delete("attr")) 

1294 self.assertEqual(len(list(attributes.items())), VERSION_COUNT) 

1295 

1296 # delete non-existing key 

1297 self.assertFalse(attributes.delete("non-attr")) 

1298 

1299 # store bunch of keys and get the list back 

1300 data = [ 

1301 ("version.core", "1.2.3"), 

1302 ("version.dimensions", "3.2.1"), 

1303 ("config.managers.opaque", "ByNameOpaqueTableStorageManager"), 

1304 ] 

1305 for key, value in data: 

1306 attributes.set(key, value) 

1307 items = dict(attributes.items()) 

1308 for key, value in data: 

1309 self.assertEqual(items[key], value) 

1310 

1311 def testQueryDatasetsDeduplication(self): 

1312 """Test that the findFirst option to queryDatasets selects datasets 

1313 from collections in the order given". 

1314 """ 

1315 registry = self.makeRegistry() 

1316 self.loadData(registry, "base.yaml") 

1317 self.loadData(registry, "datasets.yaml") 

1318 self.assertCountEqual( 

1319 list(registry.queryDatasets("bias", collections=["imported_g", "imported_r"])), 

1320 [ 

1321 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"), 

1322 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g"), 

1323 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g"), 

1324 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r"), 

1325 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r"), 

1326 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

1327 ], 

1328 ) 

1329 self.assertCountEqual( 

1330 list(registry.queryDatasets("bias", collections=["imported_g", "imported_r"], findFirst=True)), 

1331 [ 

1332 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"), 

1333 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g"), 

1334 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g"), 

1335 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

1336 ], 

1337 ) 

1338 self.assertCountEqual( 

1339 list(registry.queryDatasets("bias", collections=["imported_r", "imported_g"], findFirst=True)), 

1340 [ 

1341 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"), 

1342 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r"), 

1343 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r"), 

1344 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

1345 ], 

1346 ) 

1347 

1348 def testQueryResults(self): 

1349 """Test querying for data IDs and then manipulating the QueryResults 

1350 object returned to perform other queries. 

1351 """ 

1352 registry = self.makeRegistry() 

1353 self.loadData(registry, "base.yaml") 

1354 self.loadData(registry, "datasets.yaml") 

1355 bias = registry.getDatasetType("bias") 

1356 flat = registry.getDatasetType("flat") 

1357 # Obtain expected results from methods other than those we're testing 

1358 # here. That includes: 

1359 # - the dimensions of the data IDs we want to query: 

1360 expectedGraph = DimensionGraph(registry.dimensions, names=["detector", "physical_filter"]) 

1361 # - the dimensions of some other data IDs we'll extract from that: 

1362 expectedSubsetGraph = DimensionGraph(registry.dimensions, names=["detector"]) 

1363 # - the data IDs we expect to obtain from the first queries: 

1364 expectedDataIds = DataCoordinateSet( 

1365 { 

1366 DataCoordinate.standardize( 

1367 instrument="Cam1", detector=d, physical_filter=p, universe=registry.dimensions 

1368 ) 

1369 for d, p in itertools.product({1, 2, 3}, {"Cam1-G", "Cam1-R1", "Cam1-R2"}) 

1370 }, 

1371 graph=expectedGraph, 

1372 hasFull=False, 

1373 hasRecords=False, 

1374 ) 

1375 # - the flat datasets we expect to find from those data IDs, in just 

1376 # one collection (so deduplication is irrelevant): 

1377 expectedFlats = [ 

1378 registry.findDataset( 

1379 flat, instrument="Cam1", detector=1, physical_filter="Cam1-R1", collections="imported_r" 

1380 ), 

1381 registry.findDataset( 

1382 flat, instrument="Cam1", detector=2, physical_filter="Cam1-R1", collections="imported_r" 

1383 ), 

1384 registry.findDataset( 

1385 flat, instrument="Cam1", detector=3, physical_filter="Cam1-R2", collections="imported_r" 

1386 ), 

1387 ] 

1388 # - the data IDs we expect to extract from that: 

1389 expectedSubsetDataIds = expectedDataIds.subset(expectedSubsetGraph) 

1390 # - the bias datasets we expect to find from those data IDs, after we 

1391 # subset-out the physical_filter dimension, both with duplicates: 

1392 expectedAllBiases = [ 

1393 registry.findDataset(bias, instrument="Cam1", detector=1, collections="imported_g"), 

1394 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_g"), 

1395 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_g"), 

1396 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_r"), 

1397 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_r"), 

1398 ] 

1399 # - ...and without duplicates: 

1400 expectedDeduplicatedBiases = [ 

1401 registry.findDataset(bias, instrument="Cam1", detector=1, collections="imported_g"), 

1402 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_r"), 

1403 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_r"), 

1404 ] 

1405 # Test against those expected results, using a "lazy" query for the 

1406 # data IDs (which re-executes that query each time we use it to do 

1407 # something new). 

1408 dataIds = registry.queryDataIds( 

1409 ["detector", "physical_filter"], 

1410 where="detector.purpose = 'SCIENCE'", # this rejects detector=4 

1411 instrument="Cam1", 

1412 ) 

1413 self.assertEqual(dataIds.graph, expectedGraph) 

1414 self.assertEqual(dataIds.toSet(), expectedDataIds) 

1415 self.assertCountEqual( 

1416 list( 

1417 dataIds.findDatasets( 

1418 flat, 

1419 collections=["imported_r"], 

1420 ) 

1421 ), 

1422 expectedFlats, 

1423 ) 

1424 subsetDataIds = dataIds.subset(expectedSubsetGraph, unique=True) 

1425 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph) 

1426 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds) 

1427 self.assertCountEqual( 

1428 list(subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=False)), 

1429 expectedAllBiases, 

1430 ) 

1431 self.assertCountEqual( 

1432 list(subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=True)), 

1433 expectedDeduplicatedBiases, 

1434 ) 

1435 # Materialize the bias dataset queries (only) by putting the results 

1436 # into temporary tables, then repeat those tests. 

1437 with subsetDataIds.findDatasets( 

1438 bias, collections=["imported_r", "imported_g"], findFirst=False 

1439 ).materialize() as biases: 

1440 self.assertCountEqual(list(biases), expectedAllBiases) 

1441 with subsetDataIds.findDatasets( 

1442 bias, collections=["imported_r", "imported_g"], findFirst=True 

1443 ).materialize() as biases: 

1444 self.assertCountEqual(list(biases), expectedDeduplicatedBiases) 

1445 # Materialize the data ID subset query, but not the dataset queries. 

1446 with subsetDataIds.materialize() as subsetDataIds: 

1447 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph) 

1448 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds) 

1449 self.assertCountEqual( 

1450 list( 

1451 subsetDataIds.findDatasets( 

1452 bias, collections=["imported_r", "imported_g"], findFirst=False 

1453 ) 

1454 ), 

1455 expectedAllBiases, 

1456 ) 

1457 self.assertCountEqual( 

1458 list( 

1459 subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=True) 

1460 ), 

1461 expectedDeduplicatedBiases, 

1462 ) 

1463 # Materialize the dataset queries, too. 

1464 with subsetDataIds.findDatasets( 

1465 bias, collections=["imported_r", "imported_g"], findFirst=False 

1466 ).materialize() as biases: 

1467 self.assertCountEqual(list(biases), expectedAllBiases) 

1468 with subsetDataIds.findDatasets( 

1469 bias, collections=["imported_r", "imported_g"], findFirst=True 

1470 ).materialize() as biases: 

1471 self.assertCountEqual(list(biases), expectedDeduplicatedBiases) 

1472 # Materialize the original query, but none of the follow-up queries. 

1473 with dataIds.materialize() as dataIds: 

1474 self.assertEqual(dataIds.graph, expectedGraph) 

1475 self.assertEqual(dataIds.toSet(), expectedDataIds) 

1476 self.assertCountEqual( 

1477 list( 

1478 dataIds.findDatasets( 

1479 flat, 

1480 collections=["imported_r"], 

1481 ) 

1482 ), 

1483 expectedFlats, 

1484 ) 

1485 subsetDataIds = dataIds.subset(expectedSubsetGraph, unique=True) 

1486 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph) 

1487 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds) 

1488 self.assertCountEqual( 

1489 list( 

1490 subsetDataIds.findDatasets( 

1491 bias, collections=["imported_r", "imported_g"], findFirst=False 

1492 ) 

1493 ), 

1494 expectedAllBiases, 

1495 ) 

1496 self.assertCountEqual( 

1497 list( 

1498 subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=True) 

1499 ), 

1500 expectedDeduplicatedBiases, 

1501 ) 

1502 # Materialize just the bias dataset queries. 

1503 with subsetDataIds.findDatasets( 

1504 bias, collections=["imported_r", "imported_g"], findFirst=False 

1505 ).materialize() as biases: 

1506 self.assertCountEqual(list(biases), expectedAllBiases) 

1507 with subsetDataIds.findDatasets( 

1508 bias, collections=["imported_r", "imported_g"], findFirst=True 

1509 ).materialize() as biases: 

1510 self.assertCountEqual(list(biases), expectedDeduplicatedBiases) 

1511 # Materialize the subset data ID query, but not the dataset 

1512 # queries. 

1513 with subsetDataIds.materialize() as subsetDataIds: 

1514 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph) 

1515 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds) 

1516 self.assertCountEqual( 

1517 list( 

1518 subsetDataIds.findDatasets( 

1519 bias, collections=["imported_r", "imported_g"], findFirst=False 

1520 ) 

1521 ), 

1522 expectedAllBiases, 

1523 ) 

1524 self.assertCountEqual( 

1525 list( 

1526 subsetDataIds.findDatasets( 

1527 bias, collections=["imported_r", "imported_g"], findFirst=True 

1528 ) 

1529 ), 

1530 expectedDeduplicatedBiases, 

1531 ) 

1532 # Materialize the bias dataset queries, too, so now we're 

1533 # materializing every single step. 

1534 with subsetDataIds.findDatasets( 

1535 bias, collections=["imported_r", "imported_g"], findFirst=False 

1536 ).materialize() as biases: 

1537 self.assertCountEqual(list(biases), expectedAllBiases) 

1538 with subsetDataIds.findDatasets( 

1539 bias, collections=["imported_r", "imported_g"], findFirst=True 

1540 ).materialize() as biases: 

1541 self.assertCountEqual(list(biases), expectedDeduplicatedBiases) 

1542 

1543 def testEmptyDimensionsQueries(self): 

1544 """Test Query and QueryResults objects in the case where there are no 

1545 dimensions. 

1546 """ 

1547 # Set up test data: one dataset type, two runs, one dataset in each. 

1548 registry = self.makeRegistry() 

1549 self.loadData(registry, "base.yaml") 

1550 schema = DatasetType("schema", dimensions=registry.dimensions.empty, storageClass="Catalog") 

1551 registry.registerDatasetType(schema) 

1552 dataId = DataCoordinate.makeEmpty(registry.dimensions) 

1553 run1 = "run1" 

1554 run2 = "run2" 

1555 registry.registerRun(run1) 

1556 registry.registerRun(run2) 

1557 (dataset1,) = registry.insertDatasets(schema, dataIds=[dataId], run=run1) 

1558 (dataset2,) = registry.insertDatasets(schema, dataIds=[dataId], run=run2) 

1559 # Query directly for both of the datasets, and each one, one at a time. 

1560 self.checkQueryResults( 

1561 registry.queryDatasets(schema, collections=[run1, run2], findFirst=False), [dataset1, dataset2] 

1562 ) 

1563 self.checkQueryResults( 

1564 registry.queryDatasets(schema, collections=[run1, run2], findFirst=True), 

1565 [dataset1], 

1566 ) 

1567 self.checkQueryResults( 

1568 registry.queryDatasets(schema, collections=[run2, run1], findFirst=True), 

1569 [dataset2], 

1570 ) 

1571 # Query for data IDs with no dimensions. 

1572 dataIds = registry.queryDataIds([]) 

1573 self.checkQueryResults(dataIds, [dataId]) 

1574 # Use queried data IDs to find the datasets. 

1575 self.checkQueryResults( 

1576 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False), 

1577 [dataset1, dataset2], 

1578 ) 

1579 self.checkQueryResults( 

1580 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), 

1581 [dataset1], 

1582 ) 

1583 self.checkQueryResults( 

1584 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), 

1585 [dataset2], 

1586 ) 

1587 # Now materialize the data ID query results and repeat those tests. 

1588 with dataIds.materialize() as dataIds: 

1589 self.checkQueryResults(dataIds, [dataId]) 

1590 self.checkQueryResults( 

1591 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), 

1592 [dataset1], 

1593 ) 

1594 self.checkQueryResults( 

1595 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), 

1596 [dataset2], 

1597 ) 

1598 # Query for non-empty data IDs, then subset that to get the empty one. 

1599 # Repeat the above tests starting from that. 

1600 dataIds = registry.queryDataIds(["instrument"]).subset(registry.dimensions.empty, unique=True) 

1601 self.checkQueryResults(dataIds, [dataId]) 

1602 self.checkQueryResults( 

1603 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False), 

1604 [dataset1, dataset2], 

1605 ) 

1606 self.checkQueryResults( 

1607 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), 

1608 [dataset1], 

1609 ) 

1610 self.checkQueryResults( 

1611 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), 

1612 [dataset2], 

1613 ) 

1614 with dataIds.materialize() as dataIds: 

1615 self.checkQueryResults(dataIds, [dataId]) 

1616 self.checkQueryResults( 

1617 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False), 

1618 [dataset1, dataset2], 

1619 ) 

1620 self.checkQueryResults( 

1621 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), 

1622 [dataset1], 

1623 ) 

1624 self.checkQueryResults( 

1625 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), 

1626 [dataset2], 

1627 ) 

1628 # Query for non-empty data IDs, then materialize, then subset to get 

1629 # the empty one. Repeat again. 

1630 with registry.queryDataIds(["instrument"]).materialize() as nonEmptyDataIds: 

1631 dataIds = nonEmptyDataIds.subset(registry.dimensions.empty, unique=True) 

1632 self.checkQueryResults(dataIds, [dataId]) 

1633 self.checkQueryResults( 

1634 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False), 

1635 [dataset1, dataset2], 

1636 ) 

1637 self.checkQueryResults( 

1638 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), 

1639 [dataset1], 

1640 ) 

1641 self.checkQueryResults( 

1642 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), 

1643 [dataset2], 

1644 ) 

1645 with dataIds.materialize() as dataIds: 

1646 self.checkQueryResults(dataIds, [dataId]) 

1647 self.checkQueryResults( 

1648 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False), 

1649 [dataset1, dataset2], 

1650 ) 

1651 self.checkQueryResults( 

1652 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), 

1653 [dataset1], 

1654 ) 

1655 self.checkQueryResults( 

1656 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), 

1657 [dataset2], 

1658 ) 

1659 

1660 def testDimensionDataModifications(self): 

1661 """Test that modifying dimension records via: 

1662 syncDimensionData(..., update=True) and 

1663 insertDimensionData(..., replace=True) works as expected, even in the 

1664 presence of datasets using those dimensions and spatial overlap 

1665 relationships. 

1666 """ 

1667 

1668 def unpack_range_set(ranges: lsst.sphgeom.RangeSet) -> Iterator[int]: 

1669 """Unpack a sphgeom.RangeSet into the integers it contains.""" 

1670 for begin, end in ranges: 

1671 yield from range(begin, end) 

1672 

1673 def range_set_hull( 

1674 ranges: lsst.sphgeom.RangeSet, 

1675 pixelization: lsst.sphgeom.HtmPixelization, 

1676 ) -> lsst.sphgeom.ConvexPolygon: 

1677 """Create a ConvexPolygon hull of the region defined by a set of 

1678 HTM pixelization index ranges. 

1679 """ 

1680 points = [] 

1681 for index in unpack_range_set(ranges): 

1682 points.extend(pixelization.triangle(index).getVertices()) 

1683 return lsst.sphgeom.ConvexPolygon(points) 

1684 

1685 # Use HTM to set up an initial parent region (one arbitrary trixel) 

1686 # and four child regions (the trixels within the parent at the next 

1687 # level. We'll use the parent as a tract/visit region and the children 

1688 # as its patch/visit_detector regions. 

1689 registry = self.makeRegistry() 

1690 htm6 = registry.dimensions.skypix["htm"][6].pixelization 

1691 commonSkyPix = registry.dimensions.commonSkyPix.pixelization 

1692 index = 12288 

1693 child_ranges_small = lsst.sphgeom.RangeSet(index).scaled(4) 

1694 assert htm6.universe().contains(child_ranges_small) 

1695 child_regions_small = [htm6.triangle(i) for i in unpack_range_set(child_ranges_small)] 

1696 parent_region_small = lsst.sphgeom.ConvexPolygon( 

1697 list(itertools.chain.from_iterable(c.getVertices() for c in child_regions_small)) 

1698 ) 

1699 assert all(parent_region_small.contains(c) for c in child_regions_small) 

1700 # Make a larger version of each child region, defined to be the set of 

1701 # htm6 trixels that overlap the original's bounding circle. Make a new 

1702 # parent that's the convex hull of the new children. 

1703 child_regions_large = [ 

1704 range_set_hull(htm6.envelope(c.getBoundingCircle()), htm6) for c in child_regions_small 

1705 ] 

1706 assert all(large.contains(small) for large, small in zip(child_regions_large, child_regions_small)) 

1707 parent_region_large = lsst.sphgeom.ConvexPolygon( 

1708 list(itertools.chain.from_iterable(c.getVertices() for c in child_regions_large)) 

1709 ) 

1710 assert all(parent_region_large.contains(c) for c in child_regions_large) 

1711 assert parent_region_large.contains(parent_region_small) 

1712 assert not parent_region_small.contains(parent_region_large) 

1713 assert not all(parent_region_small.contains(c) for c in child_regions_large) 

1714 # Find some commonSkyPix indices that overlap the large regions but not 

1715 # overlap the small regions. We use commonSkyPix here to make sure the 

1716 # real tests later involve what's in the database, not just post-query 

1717 # region filtering. 

1718 child_difference_indices = [] 

1719 for large, small in zip(child_regions_large, child_regions_small): 

1720 difference = list(unpack_range_set(commonSkyPix.envelope(large) - commonSkyPix.envelope(small))) 

1721 assert difference, "if this is empty, we can't test anything useful with these regions" 

1722 assert all( 

1723 not commonSkyPix.triangle(d).isDisjointFrom(large) 

1724 and commonSkyPix.triangle(d).isDisjointFrom(small) 

1725 for d in difference 

1726 ) 

1727 child_difference_indices.append(difference) 

1728 parent_difference_indices = list( 

1729 unpack_range_set( 

1730 commonSkyPix.envelope(parent_region_large) - commonSkyPix.envelope(parent_region_small) 

1731 ) 

1732 ) 

1733 assert parent_difference_indices, "if this is empty, we can't test anything useful with these regions" 

1734 assert all( 

1735 ( 

1736 not commonSkyPix.triangle(d).isDisjointFrom(parent_region_large) 

1737 and commonSkyPix.triangle(d).isDisjointFrom(parent_region_small) 

1738 ) 

1739 for d in parent_difference_indices 

1740 ) 

1741 # Now that we've finally got those regions, we'll insert the large ones 

1742 # as tract/patch dimension records. 

1743 skymap_name = "testing_v1" 

1744 registry.insertDimensionData( 

1745 "skymap", 

1746 { 

1747 "name": skymap_name, 

1748 "hash": bytes([42]), 

1749 "tract_max": 1, 

1750 "patch_nx_max": 2, 

1751 "patch_ny_max": 2, 

1752 }, 

1753 ) 

1754 registry.insertDimensionData("tract", {"skymap": skymap_name, "id": 0, "region": parent_region_large}) 

1755 registry.insertDimensionData( 

1756 "patch", 

1757 *[ 

1758 {"skymap": skymap_name, "tract": 0, "id": n, "cell_x": n % 2, "cell_y": n // 2, "region": c} 

1759 for n, c in enumerate(child_regions_large) 

1760 ], 

1761 ) 

1762 # Add at dataset that uses these dimensions to make sure that modifying 

1763 # them doesn't disrupt foreign keys (need to make sure DB doesn't 

1764 # implement insert with replace=True as delete-then-insert). 

1765 dataset_type = DatasetType( 

1766 "coadd", 

1767 dimensions=["tract", "patch"], 

1768 universe=registry.dimensions, 

1769 storageClass="Exposure", 

1770 ) 

1771 registry.registerDatasetType(dataset_type) 

1772 registry.registerCollection("the_run", CollectionType.RUN) 

1773 registry.insertDatasets( 

1774 dataset_type, 

1775 [{"skymap": skymap_name, "tract": 0, "patch": 2}], 

1776 run="the_run", 

1777 ) 

1778 # Query for tracts and patches that overlap some "difference" htm9 

1779 # pixels; there should be overlaps, because the database has 

1780 # the "large" suite of regions. 

1781 self.assertEqual( 

1782 {0}, 

1783 { 

1784 data_id["tract"] 

1785 for data_id in registry.queryDataIds( 

1786 ["tract"], 

1787 skymap=skymap_name, 

1788 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]}, 

1789 ) 

1790 }, 

1791 ) 

1792 for patch_id, patch_difference_indices in enumerate(child_difference_indices): 

1793 self.assertIn( 

1794 patch_id, 

1795 { 

1796 data_id["patch"] 

1797 for data_id in registry.queryDataIds( 

1798 ["patch"], 

1799 skymap=skymap_name, 

1800 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]}, 

1801 ) 

1802 }, 

1803 ) 

1804 # Use sync to update the tract region and insert to update the patch 

1805 # regions, to the "small" suite. 

1806 updated = registry.syncDimensionData( 

1807 "tract", 

1808 {"skymap": skymap_name, "id": 0, "region": parent_region_small}, 

1809 update=True, 

1810 ) 

1811 self.assertEqual(updated, {"region": parent_region_large}) 

1812 registry.insertDimensionData( 

1813 "patch", 

1814 *[ 

1815 {"skymap": skymap_name, "tract": 0, "id": n, "cell_x": n % 2, "cell_y": n // 2, "region": c} 

1816 for n, c in enumerate(child_regions_small) 

1817 ], 

1818 replace=True, 

1819 ) 

1820 # Query again; there now should be no such overlaps, because the 

1821 # database has the "small" suite of regions. 

1822 self.assertFalse( 

1823 set( 

1824 registry.queryDataIds( 

1825 ["tract"], 

1826 skymap=skymap_name, 

1827 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]}, 

1828 ) 

1829 ) 

1830 ) 

1831 for patch_id, patch_difference_indices in enumerate(child_difference_indices): 

1832 self.assertNotIn( 

1833 patch_id, 

1834 { 

1835 data_id["patch"] 

1836 for data_id in registry.queryDataIds( 

1837 ["patch"], 

1838 skymap=skymap_name, 

1839 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]}, 

1840 ) 

1841 }, 

1842 ) 

1843 # Update back to the large regions and query one more time. 

1844 updated = registry.syncDimensionData( 

1845 "tract", 

1846 {"skymap": skymap_name, "id": 0, "region": parent_region_large}, 

1847 update=True, 

1848 ) 

1849 self.assertEqual(updated, {"region": parent_region_small}) 

1850 registry.insertDimensionData( 

1851 "patch", 

1852 *[ 

1853 {"skymap": skymap_name, "tract": 0, "id": n, "cell_x": n % 2, "cell_y": n // 2, "region": c} 

1854 for n, c in enumerate(child_regions_large) 

1855 ], 

1856 replace=True, 

1857 ) 

1858 self.assertEqual( 

1859 {0}, 

1860 { 

1861 data_id["tract"] 

1862 for data_id in registry.queryDataIds( 

1863 ["tract"], 

1864 skymap=skymap_name, 

1865 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]}, 

1866 ) 

1867 }, 

1868 ) 

1869 for patch_id, patch_difference_indices in enumerate(child_difference_indices): 

1870 self.assertIn( 

1871 patch_id, 

1872 { 

1873 data_id["patch"] 

1874 for data_id in registry.queryDataIds( 

1875 ["patch"], 

1876 skymap=skymap_name, 

1877 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]}, 

1878 ) 

1879 }, 

1880 ) 

1881 

1882 def testCalibrationCollections(self): 

1883 """Test operations on `~CollectionType.CALIBRATION` collections, 

1884 including `Registry.certify`, `Registry.decertify`, and 

1885 `Registry.findDataset`. 

1886 """ 

1887 # Setup - make a Registry, fill it with some datasets in 

1888 # non-calibration collections. 

1889 registry = self.makeRegistry() 

1890 self.loadData(registry, "base.yaml") 

1891 self.loadData(registry, "datasets.yaml") 

1892 # Set up some timestamps. 

1893 t1 = astropy.time.Time("2020-01-01T01:00:00", format="isot", scale="tai") 

1894 t2 = astropy.time.Time("2020-01-01T02:00:00", format="isot", scale="tai") 

1895 t3 = astropy.time.Time("2020-01-01T03:00:00", format="isot", scale="tai") 

1896 t4 = astropy.time.Time("2020-01-01T04:00:00", format="isot", scale="tai") 

1897 t5 = astropy.time.Time("2020-01-01T05:00:00", format="isot", scale="tai") 

1898 allTimespans = [ 

1899 Timespan(a, b) for a, b in itertools.combinations([None, t1, t2, t3, t4, t5, None], r=2) 

1900 ] 

1901 # Get references to some datasets. 

1902 bias2a = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g") 

1903 bias3a = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g") 

1904 bias2b = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r") 

1905 bias3b = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r") 

1906 # Register the main calibration collection we'll be working with. 

1907 collection = "Cam1/calibs/default" 

1908 registry.registerCollection(collection, type=CollectionType.CALIBRATION) 

1909 # Cannot associate into a calibration collection (no timespan). 

1910 with self.assertRaises(CollectionTypeError): 

1911 registry.associate(collection, [bias2a]) 

1912 # Certify 2a dataset with [t2, t4) validity. 

1913 registry.certify(collection, [bias2a], Timespan(begin=t2, end=t4)) 

1914 # We should not be able to certify 2b with anything overlapping that 

1915 # window. 

1916 with self.assertRaises(ConflictingDefinitionError): 

1917 registry.certify(collection, [bias2b], Timespan(begin=None, end=t3)) 

1918 with self.assertRaises(ConflictingDefinitionError): 

1919 registry.certify(collection, [bias2b], Timespan(begin=None, end=t5)) 

1920 with self.assertRaises(ConflictingDefinitionError): 

1921 registry.certify(collection, [bias2b], Timespan(begin=t1, end=t3)) 

1922 with self.assertRaises(ConflictingDefinitionError): 

1923 registry.certify(collection, [bias2b], Timespan(begin=t1, end=t5)) 

1924 with self.assertRaises(ConflictingDefinitionError): 

1925 registry.certify(collection, [bias2b], Timespan(begin=t1, end=None)) 

1926 with self.assertRaises(ConflictingDefinitionError): 

1927 registry.certify(collection, [bias2b], Timespan(begin=t2, end=t3)) 

1928 with self.assertRaises(ConflictingDefinitionError): 

1929 registry.certify(collection, [bias2b], Timespan(begin=t2, end=t5)) 

1930 with self.assertRaises(ConflictingDefinitionError): 

1931 registry.certify(collection, [bias2b], Timespan(begin=t2, end=None)) 

1932 # We should be able to certify 3a with a range overlapping that window, 

1933 # because it's for a different detector. 

1934 # We'll certify 3a over [t1, t3). 

1935 registry.certify(collection, [bias3a], Timespan(begin=t1, end=t3)) 

1936 # Now we'll certify 2b and 3b together over [t4, ∞). 

1937 registry.certify(collection, [bias2b, bias3b], Timespan(begin=t4, end=None)) 

1938 

1939 # Fetch all associations and check that they are what we expect. 

1940 self.assertCountEqual( 

1941 list( 

1942 registry.queryDatasetAssociations( 

1943 "bias", 

1944 collections=[collection, "imported_g", "imported_r"], 

1945 ) 

1946 ), 

1947 [ 

1948 DatasetAssociation( 

1949 ref=registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"), 

1950 collection="imported_g", 

1951 timespan=None, 

1952 ), 

1953 DatasetAssociation( 

1954 ref=registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

1955 collection="imported_r", 

1956 timespan=None, 

1957 ), 

1958 DatasetAssociation(ref=bias2a, collection="imported_g", timespan=None), 

1959 DatasetAssociation(ref=bias3a, collection="imported_g", timespan=None), 

1960 DatasetAssociation(ref=bias2b, collection="imported_r", timespan=None), 

1961 DatasetAssociation(ref=bias3b, collection="imported_r", timespan=None), 

1962 DatasetAssociation(ref=bias2a, collection=collection, timespan=Timespan(begin=t2, end=t4)), 

1963 DatasetAssociation(ref=bias3a, collection=collection, timespan=Timespan(begin=t1, end=t3)), 

1964 DatasetAssociation(ref=bias2b, collection=collection, timespan=Timespan(begin=t4, end=None)), 

1965 DatasetAssociation(ref=bias3b, collection=collection, timespan=Timespan(begin=t4, end=None)), 

1966 ], 

1967 ) 

1968 

1969 class Ambiguous: 

1970 """Tag class to denote lookups that should be ambiguous.""" 

1971 

1972 pass 

1973 

1974 def assertLookup( 

1975 detector: int, timespan: Timespan, expected: Optional[Union[DatasetRef, Type[Ambiguous]]] 

1976 ) -> None: 

1977 """Local function that asserts that a bias lookup returns the given 

1978 expected result. 

1979 """ 

1980 if expected is Ambiguous: 

1981 with self.assertRaises(RuntimeError): 

1982 registry.findDataset( 

1983 "bias", 

1984 collections=collection, 

1985 instrument="Cam1", 

1986 detector=detector, 

1987 timespan=timespan, 

1988 ) 

1989 else: 

1990 self.assertEqual( 

1991 expected, 

1992 registry.findDataset( 

1993 "bias", 

1994 collections=collection, 

1995 instrument="Cam1", 

1996 detector=detector, 

1997 timespan=timespan, 

1998 ), 

1999 ) 

2000 

2001 # Systematically test lookups against expected results. 

2002 assertLookup(detector=2, timespan=Timespan(None, t1), expected=None) 

2003 assertLookup(detector=2, timespan=Timespan(None, t2), expected=None) 

2004 assertLookup(detector=2, timespan=Timespan(None, t3), expected=bias2a) 

2005 assertLookup(detector=2, timespan=Timespan(None, t4), expected=bias2a) 

2006 assertLookup(detector=2, timespan=Timespan(None, t5), expected=Ambiguous) 

2007 assertLookup(detector=2, timespan=Timespan(None, None), expected=Ambiguous) 

2008 assertLookup(detector=2, timespan=Timespan(t1, t2), expected=None) 

2009 assertLookup(detector=2, timespan=Timespan(t1, t3), expected=bias2a) 

2010 assertLookup(detector=2, timespan=Timespan(t1, t4), expected=bias2a) 

2011 assertLookup(detector=2, timespan=Timespan(t1, t5), expected=Ambiguous) 

2012 assertLookup(detector=2, timespan=Timespan(t1, None), expected=Ambiguous) 

2013 assertLookup(detector=2, timespan=Timespan(t2, t3), expected=bias2a) 

2014 assertLookup(detector=2, timespan=Timespan(t2, t4), expected=bias2a) 

2015 assertLookup(detector=2, timespan=Timespan(t2, t5), expected=Ambiguous) 

2016 assertLookup(detector=2, timespan=Timespan(t2, None), expected=Ambiguous) 

2017 assertLookup(detector=2, timespan=Timespan(t3, t4), expected=bias2a) 

2018 assertLookup(detector=2, timespan=Timespan(t3, t5), expected=Ambiguous) 

2019 assertLookup(detector=2, timespan=Timespan(t3, None), expected=Ambiguous) 

2020 assertLookup(detector=2, timespan=Timespan(t4, t5), expected=bias2b) 

2021 assertLookup(detector=2, timespan=Timespan(t4, None), expected=bias2b) 

2022 assertLookup(detector=2, timespan=Timespan(t5, None), expected=bias2b) 

2023 assertLookup(detector=3, timespan=Timespan(None, t1), expected=None) 

2024 assertLookup(detector=3, timespan=Timespan(None, t2), expected=bias3a) 

2025 assertLookup(detector=3, timespan=Timespan(None, t3), expected=bias3a) 

2026 assertLookup(detector=3, timespan=Timespan(None, t4), expected=bias3a) 

2027 assertLookup(detector=3, timespan=Timespan(None, t5), expected=Ambiguous) 

2028 assertLookup(detector=3, timespan=Timespan(None, None), expected=Ambiguous) 

2029 assertLookup(detector=3, timespan=Timespan(t1, t2), expected=bias3a) 

2030 assertLookup(detector=3, timespan=Timespan(t1, t3), expected=bias3a) 

2031 assertLookup(detector=3, timespan=Timespan(t1, t4), expected=bias3a) 

2032 assertLookup(detector=3, timespan=Timespan(t1, t5), expected=Ambiguous) 

2033 assertLookup(detector=3, timespan=Timespan(t1, None), expected=Ambiguous) 

2034 assertLookup(detector=3, timespan=Timespan(t2, t3), expected=bias3a) 

2035 assertLookup(detector=3, timespan=Timespan(t2, t4), expected=bias3a) 

2036 assertLookup(detector=3, timespan=Timespan(t2, t5), expected=Ambiguous) 

2037 assertLookup(detector=3, timespan=Timespan(t2, None), expected=Ambiguous) 

2038 assertLookup(detector=3, timespan=Timespan(t3, t4), expected=None) 

2039 assertLookup(detector=3, timespan=Timespan(t3, t5), expected=bias3b) 

2040 assertLookup(detector=3, timespan=Timespan(t3, None), expected=bias3b) 

2041 assertLookup(detector=3, timespan=Timespan(t4, t5), expected=bias3b) 

2042 assertLookup(detector=3, timespan=Timespan(t4, None), expected=bias3b) 

2043 assertLookup(detector=3, timespan=Timespan(t5, None), expected=bias3b) 

2044 

2045 # Decertify [t3, t5) for all data IDs, and do test lookups again. 

2046 # This should truncate bias2a to [t2, t3), leave bias3a unchanged at 

2047 # [t1, t3), and truncate bias2b and bias3b to [t5, ∞). 

2048 registry.decertify(collection=collection, datasetType="bias", timespan=Timespan(t3, t5)) 

2049 assertLookup(detector=2, timespan=Timespan(None, t1), expected=None) 

2050 assertLookup(detector=2, timespan=Timespan(None, t2), expected=None) 

2051 assertLookup(detector=2, timespan=Timespan(None, t3), expected=bias2a) 

2052 assertLookup(detector=2, timespan=Timespan(None, t4), expected=bias2a) 

2053 assertLookup(detector=2, timespan=Timespan(None, t5), expected=bias2a) 

2054 assertLookup(detector=2, timespan=Timespan(None, None), expected=Ambiguous) 

2055 assertLookup(detector=2, timespan=Timespan(t1, t2), expected=None) 

2056 assertLookup(detector=2, timespan=Timespan(t1, t3), expected=bias2a) 

2057 assertLookup(detector=2, timespan=Timespan(t1, t4), expected=bias2a) 

2058 assertLookup(detector=2, timespan=Timespan(t1, t5), expected=bias2a) 

2059 assertLookup(detector=2, timespan=Timespan(t1, None), expected=Ambiguous) 

2060 assertLookup(detector=2, timespan=Timespan(t2, t3), expected=bias2a) 

2061 assertLookup(detector=2, timespan=Timespan(t2, t4), expected=bias2a) 

2062 assertLookup(detector=2, timespan=Timespan(t2, t5), expected=bias2a) 

2063 assertLookup(detector=2, timespan=Timespan(t2, None), expected=Ambiguous) 

2064 assertLookup(detector=2, timespan=Timespan(t3, t4), expected=None) 

2065 assertLookup(detector=2, timespan=Timespan(t3, t5), expected=None) 

2066 assertLookup(detector=2, timespan=Timespan(t3, None), expected=bias2b) 

2067 assertLookup(detector=2, timespan=Timespan(t4, t5), expected=None) 

2068 assertLookup(detector=2, timespan=Timespan(t4, None), expected=bias2b) 

2069 assertLookup(detector=2, timespan=Timespan(t5, None), expected=bias2b) 

2070 assertLookup(detector=3, timespan=Timespan(None, t1), expected=None) 

2071 assertLookup(detector=3, timespan=Timespan(None, t2), expected=bias3a) 

2072 assertLookup(detector=3, timespan=Timespan(None, t3), expected=bias3a) 

2073 assertLookup(detector=3, timespan=Timespan(None, t4), expected=bias3a) 

2074 assertLookup(detector=3, timespan=Timespan(None, t5), expected=bias3a) 

2075 assertLookup(detector=3, timespan=Timespan(None, None), expected=Ambiguous) 

2076 assertLookup(detector=3, timespan=Timespan(t1, t2), expected=bias3a) 

2077 assertLookup(detector=3, timespan=Timespan(t1, t3), expected=bias3a) 

2078 assertLookup(detector=3, timespan=Timespan(t1, t4), expected=bias3a) 

2079 assertLookup(detector=3, timespan=Timespan(t1, t5), expected=bias3a) 

2080 assertLookup(detector=3, timespan=Timespan(t1, None), expected=Ambiguous) 

2081 assertLookup(detector=3, timespan=Timespan(t2, t3), expected=bias3a) 

2082 assertLookup(detector=3, timespan=Timespan(t2, t4), expected=bias3a) 

2083 assertLookup(detector=3, timespan=Timespan(t2, t5), expected=bias3a) 

2084 assertLookup(detector=3, timespan=Timespan(t2, None), expected=Ambiguous) 

2085 assertLookup(detector=3, timespan=Timespan(t3, t4), expected=None) 

2086 assertLookup(detector=3, timespan=Timespan(t3, t5), expected=None) 

2087 assertLookup(detector=3, timespan=Timespan(t3, None), expected=bias3b) 

2088 assertLookup(detector=3, timespan=Timespan(t4, t5), expected=None) 

2089 assertLookup(detector=3, timespan=Timespan(t4, None), expected=bias3b) 

2090 assertLookup(detector=3, timespan=Timespan(t5, None), expected=bias3b) 

2091 

2092 # Decertify everything, this time with explicit data IDs, then check 

2093 # that no lookups succeed. 

2094 registry.decertify( 

2095 collection, 

2096 "bias", 

2097 Timespan(None, None), 

2098 dataIds=[ 

2099 dict(instrument="Cam1", detector=2), 

2100 dict(instrument="Cam1", detector=3), 

2101 ], 

2102 ) 

2103 for detector in (2, 3): 

2104 for timespan in allTimespans: 

2105 assertLookup(detector=detector, timespan=timespan, expected=None) 

2106 # Certify bias2a and bias3a over (-∞, ∞), check that all lookups return 

2107 # those. 

2108 registry.certify( 

2109 collection, 

2110 [bias2a, bias3a], 

2111 Timespan(None, None), 

2112 ) 

2113 for timespan in allTimespans: 

2114 assertLookup(detector=2, timespan=timespan, expected=bias2a) 

2115 assertLookup(detector=3, timespan=timespan, expected=bias3a) 

2116 # Decertify just bias2 over [t2, t4). 

2117 # This should split a single certification row into two (and leave the 

2118 # other existing row, for bias3a, alone). 

2119 registry.decertify( 

2120 collection, "bias", Timespan(t2, t4), dataIds=[dict(instrument="Cam1", detector=2)] 

2121 ) 

2122 for timespan in allTimespans: 

2123 assertLookup(detector=3, timespan=timespan, expected=bias3a) 

2124 overlapsBefore = timespan.overlaps(Timespan(None, t2)) 

2125 overlapsAfter = timespan.overlaps(Timespan(t4, None)) 

2126 if overlapsBefore and overlapsAfter: 

2127 expected = Ambiguous 

2128 elif overlapsBefore or overlapsAfter: 

2129 expected = bias2a 

2130 else: 

2131 expected = None 

2132 assertLookup(detector=2, timespan=timespan, expected=expected) 

2133 

2134 def testSkipCalibs(self): 

2135 """Test how queries handle skipping of calibration collections.""" 

2136 registry = self.makeRegistry() 

2137 self.loadData(registry, "base.yaml") 

2138 self.loadData(registry, "datasets.yaml") 

2139 

2140 coll_calib = "Cam1/calibs/default" 

2141 registry.registerCollection(coll_calib, type=CollectionType.CALIBRATION) 

2142 

2143 # Add all biases to the calibration collection. 

2144 # Without this, the logic that prunes dataset subqueries based on 

2145 # datasetType-collection summary information will fire before the logic 

2146 # we want to test below. This is a good thing (it avoids the dreaded 

2147 # NotImplementedError a bit more often) everywhere but here. 

2148 registry.certify(coll_calib, registry.queryDatasets("bias", collections=...), Timespan(None, None)) 

2149 

2150 coll_list = [coll_calib, "imported_g", "imported_r"] 

2151 chain = "Cam1/chain" 

2152 registry.registerCollection(chain, type=CollectionType.CHAINED) 

2153 registry.setCollectionChain(chain, coll_list) 

2154 

2155 # explicit list will raise if findFirst=True or there are temporal 

2156 # dimensions 

2157 with self.assertRaises(NotImplementedError): 

2158 registry.queryDatasets("bias", collections=coll_list, findFirst=True) 

2159 with self.assertRaises(NotImplementedError): 

2160 registry.queryDataIds( 

2161 ["instrument", "detector", "exposure"], datasets="bias", collections=coll_list 

2162 ).count() 

2163 

2164 # chain will skip 

2165 datasets = list(registry.queryDatasets("bias", collections=chain)) 

2166 self.assertGreater(len(datasets), 0) 

2167 

2168 dataIds = list(registry.queryDataIds(["instrument", "detector"], datasets="bias", collections=chain)) 

2169 self.assertGreater(len(dataIds), 0) 

2170 

2171 # glob will skip too 

2172 datasets = list(registry.queryDatasets("bias", collections="*d*")) 

2173 self.assertGreater(len(datasets), 0) 

2174 

2175 # regular expression will skip too 

2176 pattern = re.compile(".*") 

2177 datasets = list(registry.queryDatasets("bias", collections=pattern)) 

2178 self.assertGreater(len(datasets), 0) 

2179 

2180 # ellipsis should work as usual 

2181 datasets = list(registry.queryDatasets("bias", collections=...)) 

2182 self.assertGreater(len(datasets), 0) 

2183 

2184 # few tests with findFirst 

2185 datasets = list(registry.queryDatasets("bias", collections=chain, findFirst=True)) 

2186 self.assertGreater(len(datasets), 0) 

2187 

2188 def testIngestTimeQuery(self): 

2189 

2190 registry = self.makeRegistry() 

2191 self.loadData(registry, "base.yaml") 

2192 dt0 = datetime.utcnow() 

2193 self.loadData(registry, "datasets.yaml") 

2194 dt1 = datetime.utcnow() 

2195 

2196 datasets = list(registry.queryDatasets(..., collections=...)) 

2197 len0 = len(datasets) 

2198 self.assertGreater(len0, 0) 

2199 

2200 where = "ingest_date > T'2000-01-01'" 

2201 datasets = list(registry.queryDatasets(..., collections=..., where=where)) 

2202 len1 = len(datasets) 

2203 self.assertEqual(len0, len1) 

2204 

2205 # no one will ever use this piece of software in 30 years 

2206 where = "ingest_date > T'2050-01-01'" 

2207 datasets = list(registry.queryDatasets(..., collections=..., where=where)) 

2208 len2 = len(datasets) 

2209 self.assertEqual(len2, 0) 

2210 

2211 # Check more exact timing to make sure there is no 37 seconds offset 

2212 # (after fixing DM-30124). SQLite time precision is 1 second, make 

2213 # sure that we don't test with higher precision. 

2214 tests = [ 

2215 # format: (timestamp, operator, expected_len) 

2216 (dt0 - timedelta(seconds=1), ">", len0), 

2217 (dt0 - timedelta(seconds=1), "<", 0), 

2218 (dt1 + timedelta(seconds=1), "<", len0), 

2219 (dt1 + timedelta(seconds=1), ">", 0), 

2220 ] 

2221 for dt, op, expect_len in tests: 

2222 dt_str = dt.isoformat(sep=" ") 

2223 

2224 where = f"ingest_date {op} T'{dt_str}'" 

2225 datasets = list(registry.queryDatasets(..., collections=..., where=where)) 

2226 self.assertEqual(len(datasets), expect_len) 

2227 

2228 # same with bind using datetime or astropy Time 

2229 where = f"ingest_date {op} ingest_time" 

2230 datasets = list( 

2231 registry.queryDatasets(..., collections=..., where=where, bind={"ingest_time": dt}) 

2232 ) 

2233 self.assertEqual(len(datasets), expect_len) 

2234 

2235 dt_astropy = astropy.time.Time(dt, format="datetime") 

2236 datasets = list( 

2237 registry.queryDatasets(..., collections=..., where=where, bind={"ingest_time": dt_astropy}) 

2238 ) 

2239 self.assertEqual(len(datasets), expect_len) 

2240 

2241 def testTimespanQueries(self): 

2242 """Test query expressions involving timespans.""" 

2243 registry = self.makeRegistry() 

2244 self.loadData(registry, "hsc-rc2-subset.yaml") 

2245 # All exposures in the database; mapping from ID to timespan. 

2246 visits = {record.id: record.timespan for record in registry.queryDimensionRecords("visit")} 

2247 # Just those IDs, sorted (which is also temporal sorting, because HSC 

2248 # exposure IDs are monotonically increasing). 

2249 ids = sorted(visits.keys()) 

2250 self.assertGreater(len(ids), 20) 

2251 # Pick some quasi-random indexes into `ids` to play with. 

2252 i1 = int(len(ids) * 0.1) 

2253 i2 = int(len(ids) * 0.3) 

2254 i3 = int(len(ids) * 0.6) 

2255 i4 = int(len(ids) * 0.8) 

2256 # Extract some times from those: just before the beginning of i1 (which 

2257 # should be after the end of the exposure before), exactly the 

2258 # beginning of i2, just after the beginning of i3 (and before its end), 

2259 # and the exact end of i4. 

2260 t1 = visits[ids[i1]].begin - astropy.time.TimeDelta(1.0, format="sec") 

2261 self.assertGreater(t1, visits[ids[i1 - 1]].end) 

2262 t2 = visits[ids[i2]].begin 

2263 t3 = visits[ids[i3]].begin + astropy.time.TimeDelta(1.0, format="sec") 

2264 self.assertLess(t3, visits[ids[i3]].end) 

2265 t4 = visits[ids[i4]].end 

2266 # Make sure those are actually in order. 

2267 self.assertEqual([t1, t2, t3, t4], sorted([t4, t3, t2, t1])) 

2268 

2269 bind = { 

2270 "t1": t1, 

2271 "t2": t2, 

2272 "t3": t3, 

2273 "t4": t4, 

2274 "ts23": Timespan(t2, t3), 

2275 } 

2276 

2277 def query(where): 

2278 """Helper function that queries for visit data IDs and returns 

2279 results as a sorted, deduplicated list of visit IDs. 

2280 """ 

2281 return sorted( 

2282 { 

2283 dataId["visit"] 

2284 for dataId in registry.queryDataIds("visit", instrument="HSC", bind=bind, where=where) 

2285 } 

2286 ) 

2287 

2288 # Try a bunch of timespan queries, mixing up the bounds themselves, 

2289 # where they appear in the expression, and how we get the timespan into 

2290 # the expression. 

2291 

2292 # t1 is before the start of i1, so this should not include i1. 

2293 self.assertEqual(ids[:i1], query("visit.timespan OVERLAPS (null, t1)")) 

2294 # t2 is exactly at the start of i2, but ends are exclusive, so these 

2295 # should not include i2. 

2296 self.assertEqual(ids[i1:i2], query("(t1, t2) OVERLAPS visit.timespan")) 

2297 self.assertEqual(ids[:i2], query("visit.timespan < (t2, t4)")) 

2298 # t3 is in the middle of i3, so this should include i3. 

2299 self.assertEqual(ids[i2 : i3 + 1], query("visit.timespan OVERLAPS ts23")) 

2300 # This one should not include t3 by the same reasoning. 

2301 self.assertEqual(ids[i3 + 1 :], query("visit.timespan > (t1, t3)")) 

2302 # t4 is exactly at the end of i4, so this should include i4. 

2303 self.assertEqual(ids[i3 : i4 + 1], query(f"visit.timespan OVERLAPS (T'{t3.tai.isot}', t4)")) 

2304 # i4's upper bound of t4 is exclusive so this should not include t4. 

2305 self.assertEqual(ids[i4 + 1 :], query("visit.timespan OVERLAPS (t4, NULL)")) 

2306 

2307 # Now some timespan vs. time scalar queries. 

2308 self.assertEqual(ids[:i2], query("visit.timespan < t2")) 

2309 self.assertEqual(ids[:i2], query("t2 > visit.timespan")) 

2310 self.assertEqual(ids[i3 + 1 :], query("visit.timespan > t3")) 

2311 self.assertEqual(ids[i3 + 1 :], query("t3 < visit.timespan")) 

2312 self.assertEqual(ids[i3 : i3 + 1], query("visit.timespan OVERLAPS t3")) 

2313 self.assertEqual(ids[i3 : i3 + 1], query(f"T'{t3.tai.isot}' OVERLAPS visit.timespan")) 

2314 

2315 # Empty timespans should not overlap anything. 

2316 self.assertEqual([], query("visit.timespan OVERLAPS (t3, t2)")) 

2317 

2318 def testCollectionSummaries(self): 

2319 """Test recording and retrieval of collection summaries.""" 

2320 self.maxDiff = None 

2321 registry = self.makeRegistry() 

2322 # Importing datasets from yaml should go through the code path where 

2323 # we update collection summaries as we insert datasets. 

2324 self.loadData(registry, "base.yaml") 

2325 self.loadData(registry, "datasets.yaml") 

2326 flat = registry.getDatasetType("flat") 

2327 expected1 = CollectionSummary.makeEmpty(registry.dimensions) 

2328 expected1.datasetTypes.add(registry.getDatasetType("bias")) 

2329 expected1.datasetTypes.add(flat) 

2330 expected1.dimensions.update_extract( 

2331 DataCoordinate.standardize(instrument="Cam1", universe=registry.dimensions) 

2332 ) 

2333 self.assertEqual(registry.getCollectionSummary("imported_g"), expected1) 

2334 self.assertEqual(registry.getCollectionSummary("imported_r"), expected1) 

2335 # Create a chained collection with both of the imported runs; the 

2336 # summary should be the same, because it's a union with itself. 

2337 chain = "chain" 

2338 registry.registerCollection(chain, CollectionType.CHAINED) 

2339 registry.setCollectionChain(chain, ["imported_r", "imported_g"]) 

2340 self.assertEqual(registry.getCollectionSummary(chain), expected1) 

2341 # Associate flats only into a tagged collection and a calibration 

2342 # collection to check summaries of those. 

2343 tag = "tag" 

2344 registry.registerCollection(tag, CollectionType.TAGGED) 

2345 registry.associate(tag, registry.queryDatasets(flat, collections="imported_g")) 

2346 calibs = "calibs" 

2347 registry.registerCollection(calibs, CollectionType.CALIBRATION) 

2348 registry.certify( 

2349 calibs, registry.queryDatasets(flat, collections="imported_g"), timespan=Timespan(None, None) 

2350 ) 

2351 expected2 = expected1.copy() 

2352 expected2.datasetTypes.discard("bias") 

2353 self.assertEqual(registry.getCollectionSummary(tag), expected2) 

2354 self.assertEqual(registry.getCollectionSummary(calibs), expected2) 

2355 # Explicitly calling Registry.refresh() should load those same 

2356 # summaries, via a totally different code path. 

2357 registry.refresh() 

2358 self.assertEqual(registry.getCollectionSummary("imported_g"), expected1) 

2359 self.assertEqual(registry.getCollectionSummary("imported_r"), expected1) 

2360 self.assertEqual(registry.getCollectionSummary(tag), expected2) 

2361 self.assertEqual(registry.getCollectionSummary(calibs), expected2) 

2362 

2363 def testBindInQueryDatasets(self): 

2364 """Test that the bind parameter is correctly forwarded in 

2365 queryDatasets recursion. 

2366 """ 

2367 registry = self.makeRegistry() 

2368 # Importing datasets from yaml should go through the code path where 

2369 # we update collection summaries as we insert datasets. 

2370 self.loadData(registry, "base.yaml") 

2371 self.loadData(registry, "datasets.yaml") 

2372 self.assertEqual( 

2373 set(registry.queryDatasets("flat", band="r", collections=...)), 

2374 set(registry.queryDatasets("flat", where="band=my_band", bind={"my_band": "r"}, collections=...)), 

2375 ) 

2376 

2377 def testQueryResultSummaries(self): 

2378 """Test summary methods like `count`, `any`, and `explain_no_results` 

2379 on `DataCoordinateQueryResults` and `DatasetQueryResults` 

2380 """ 

2381 registry = self.makeRegistry() 

2382 self.loadData(registry, "base.yaml") 

2383 self.loadData(registry, "datasets.yaml") 

2384 self.loadData(registry, "spatial.yaml") 

2385 # Default test dataset has two collections, each with both flats and 

2386 # biases. Add a new collection with only biases. 

2387 registry.registerCollection("biases", CollectionType.TAGGED) 

2388 registry.associate("biases", registry.queryDatasets("bias", collections=["imported_g"])) 

2389 # First query yields two results, and involves no postprocessing. 

2390 query1 = registry.queryDataIds(["physical_filter"], band="r") 

2391 self.assertTrue(query1.any(execute=False, exact=False)) 

2392 self.assertTrue(query1.any(execute=True, exact=False)) 

2393 self.assertTrue(query1.any(execute=True, exact=True)) 

2394 self.assertEqual(query1.count(exact=False), 2) 

2395 self.assertEqual(query1.count(exact=True), 2) 

2396 self.assertFalse(list(query1.explain_no_results())) 

2397 # Second query should yield no results, but this isn't detectable 

2398 # unless we actually run a query. 

2399 query2 = registry.queryDataIds(["physical_filter"], band="h") 

2400 self.assertTrue(query2.any(execute=False, exact=False)) 

2401 self.assertFalse(query2.any(execute=True, exact=False)) 

2402 self.assertFalse(query2.any(execute=True, exact=True)) 

2403 self.assertEqual(query2.count(exact=False), 0) 

2404 self.assertEqual(query2.count(exact=True), 0) 

2405 self.assertFalse(list(query2.explain_no_results())) 

2406 # These queries yield no results due to various problems that can be 

2407 # spotted prior to execution, yielding helpful diagnostics. 

2408 base_query = registry.queryDataIds(["detector", "physical_filter"]) 

2409 for query, snippets in [ 

2410 ( 

2411 # Dataset type name doesn't match any existing dataset types. 

2412 registry.queryDatasets("nonexistent", collections=...), 

2413 ["nonexistent"], 

2414 ), 

2415 ( 

2416 # Dataset type name doesn't match any existing dataset types. 

2417 base_query.findDatasets("nonexistent", collections=["biases"]), 

2418 ["nonexistent"], 

2419 ), 

2420 ( 

2421 # Dataset type name doesn't match any existing dataset types. 

2422 registry.queryDataIds(["detector"], datasets=["nonexistent"], collections=...), 

2423 ["nonexistent"], 

2424 ), 

2425 ( 

2426 # Dataset type object isn't registered. 

2427 registry.queryDatasets( 

2428 DatasetType( 

2429 "nonexistent", 

2430 dimensions=["instrument"], 

2431 universe=registry.dimensions, 

2432 storageClass="Image", 

2433 ), 

2434 collections=..., 

2435 ), 

2436 ["nonexistent"], 

2437 ), 

2438 ( 

2439 # Dataset type object isn't registered. 

2440 base_query.findDatasets( 

2441 DatasetType( 

2442 "nonexistent", 

2443 dimensions=["instrument"], 

2444 universe=registry.dimensions, 

2445 storageClass="Image", 

2446 ), 

2447 collections=["biases"], 

2448 ), 

2449 ["nonexistent"], 

2450 ), 

2451 ( 

2452 # No datasets of this type in this collection. 

2453 registry.queryDatasets("flat", collections=["biases"]), 

2454 ["flat", "biases"], 

2455 ), 

2456 ( 

2457 # No datasets of this type in this collection. 

2458 base_query.findDatasets("flat", collections=["biases"]), 

2459 ["flat", "biases"], 

2460 ), 

2461 ( 

2462 # No collections matching at all. 

2463 registry.queryDatasets("flat", collections=re.compile("potato.+")), 

2464 ["potato"], 

2465 ), 

2466 ( 

2467 # Dataset type name doesn't match any existing dataset types. 

2468 registry.queryDimensionRecords("detector", datasets=["nonexistent"], collections=...), 

2469 ["nonexistent"], 

2470 ), 

2471 ]: 

2472 

2473 self.assertFalse(query.any(execute=False, exact=False)) 

2474 self.assertFalse(query.any(execute=True, exact=False)) 

2475 self.assertFalse(query.any(execute=True, exact=True)) 

2476 self.assertEqual(query.count(exact=False), 0) 

2477 self.assertEqual(query.count(exact=True), 0) 

2478 messages = list(query.explain_no_results()) 

2479 self.assertTrue(messages) 

2480 # Want all expected snippets to appear in at least one message. 

2481 self.assertTrue( 

2482 any( 

2483 all(snippet in message for snippet in snippets) for message in query.explain_no_results() 

2484 ), 

2485 messages, 

2486 ) 

2487 

2488 # These queries yield no results due to problems that can be identified 

2489 # by cheap follow-up queries, yielding helpful diagnostics. 

2490 for query, snippets in [ 

2491 ( 

2492 # No records for one of the involved dimensions. 

2493 registry.queryDataIds(["subfilter"]), 

2494 ["dimension records", "subfilter"], 

2495 ), 

2496 ( 

2497 # No records for one of the involved dimensions. 

2498 registry.queryDimensionRecords("subfilter"), 

2499 ["dimension records", "subfilter"], 

2500 ), 

2501 ]: 

2502 self.assertFalse(query.any(execute=True, exact=False)) 

2503 self.assertFalse(query.any(execute=True, exact=True)) 

2504 self.assertEqual(query.count(exact=True), 0) 

2505 messages = list(query.explain_no_results()) 

2506 self.assertTrue(messages) 

2507 # Want all expected snippets to appear in at least one message. 

2508 self.assertTrue( 

2509 any( 

2510 all(snippet in message for snippet in snippets) for message in query.explain_no_results() 

2511 ), 

2512 messages, 

2513 ) 

2514 

2515 # This query yields four overlaps in the database, but one is filtered 

2516 # out in postprocessing. The count queries aren't accurate because 

2517 # they don't account for duplication that happens due to an internal 

2518 # join against commonSkyPix. 

2519 query3 = registry.queryDataIds(["visit", "tract"], instrument="Cam1", skymap="SkyMap1") 

2520 self.assertEqual( 

2521 { 

2522 DataCoordinate.standardize( 

2523 instrument="Cam1", 

2524 skymap="SkyMap1", 

2525 visit=v, 

2526 tract=t, 

2527 universe=registry.dimensions, 

2528 ) 

2529 for v, t in [(1, 0), (2, 0), (2, 1)] 

2530 }, 

2531 set(query3), 

2532 ) 

2533 self.assertTrue(query3.any(execute=False, exact=False)) 

2534 self.assertTrue(query3.any(execute=True, exact=False)) 

2535 self.assertTrue(query3.any(execute=True, exact=True)) 

2536 self.assertGreaterEqual(query3.count(exact=False), 4) 

2537 self.assertGreaterEqual(query3.count(exact=True), 3) 

2538 self.assertFalse(list(query3.explain_no_results())) 

2539 # This query yields overlaps in the database, but all are filtered 

2540 # out in postprocessing. The count queries again aren't very useful. 

2541 # We have to use `where=` here to avoid an optimization that 

2542 # (currently) skips the spatial postprocess-filtering because it 

2543 # recognizes that no spatial join is necessary. That's not ideal, but 

2544 # fixing it is out of scope for this ticket. 

2545 query4 = registry.queryDataIds( 

2546 ["visit", "tract"], 

2547 instrument="Cam1", 

2548 skymap="SkyMap1", 

2549 where="visit=1 AND detector=1 AND tract=0 AND patch=4", 

2550 ) 

2551 self.assertFalse(set(query4)) 

2552 self.assertTrue(query4.any(execute=False, exact=False)) 

2553 self.assertTrue(query4.any(execute=True, exact=False)) 

2554 self.assertFalse(query4.any(execute=True, exact=True)) 

2555 self.assertGreaterEqual(query4.count(exact=False), 1) 

2556 self.assertEqual(query4.count(exact=True), 0) 

2557 messages = list(query4.explain_no_results()) 

2558 self.assertTrue(messages) 

2559 self.assertTrue(any("regions did not overlap" in message for message in messages)) 

2560 

2561 # And there are cases when queries make empty results but we do not 

2562 # know how to explain that yet (could we just say miracles happen?) 

2563 query5 = registry.queryDimensionRecords( 

2564 "detector", where="detector.purpose = 'no-purpose'", instrument="Cam1" 

2565 ) 

2566 self.assertEqual(query5.count(exact=True), 0) 

2567 messages = list(query5.explain_no_results()) 

2568 self.assertFalse(messages) 

2569 

2570 def testQueryDataIdsOrderBy(self): 

2571 """Test order_by and limit on result returned by queryDataIds().""" 

2572 registry = self.makeRegistry() 

2573 self.loadData(registry, "base.yaml") 

2574 self.loadData(registry, "datasets.yaml") 

2575 self.loadData(registry, "spatial.yaml") 

2576 

2577 def do_query(dimensions=("visit", "tract"), datasets=None, collections=None): 

2578 return registry.queryDataIds( 

2579 dimensions, datasets=datasets, collections=collections, instrument="Cam1", skymap="SkyMap1" 

2580 ) 

2581 

2582 Test = namedtuple( 

2583 "testQueryDataIdsOrderByTest", 

2584 ("order_by", "keys", "result", "limit", "datasets", "collections"), 

2585 defaults=(None, None, None), 

2586 ) 

2587 

2588 test_data = ( 

2589 Test("tract,visit", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))), 

2590 Test("-tract,visit", "tract,visit", ((1, 2), (1, 2), (0, 1), (0, 1), (0, 2), (0, 2))), 

2591 Test("tract,-visit", "tract,visit", ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2))), 

2592 Test("-tract,-visit", "tract,visit", ((1, 2), (1, 2), (0, 2), (0, 2), (0, 1), (0, 1))), 

2593 Test( 

2594 "tract.id,visit.id", 

2595 "tract,visit", 

2596 ((0, 1), (0, 1), (0, 2)), 

2597 limit=(3,), 

2598 ), 

2599 Test("-tract,-visit", "tract,visit", ((1, 2), (1, 2), (0, 2)), limit=(3,)), 

2600 Test("tract,visit", "tract,visit", ((0, 2), (1, 2), (1, 2)), limit=(3, 3)), 

2601 Test("-tract,-visit", "tract,visit", ((0, 1),), limit=(3, 5)), 

2602 Test( 

2603 "tract,visit.exposure_time", "tract,visit", ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2)) 

2604 ), 

2605 Test( 

2606 "-tract,-visit.exposure_time", "tract,visit", ((1, 2), (1, 2), (0, 1), (0, 1), (0, 2), (0, 2)) 

2607 ), 

2608 Test("tract,-exposure_time", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))), 

2609 Test("tract,visit.name", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))), 

2610 Test( 

2611 "tract,-timespan.begin,timespan.end", 

2612 "tract,visit", 

2613 ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2)), 

2614 ), 

2615 Test("visit.day_obs,exposure.day_obs", "visit,exposure", ()), 

2616 Test("visit.timespan.begin,-exposure.timespan.begin", "visit,exposure", ()), 

2617 Test( 

2618 "tract,detector", 

2619 "tract,detector", 

2620 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)), 

2621 datasets="flat", 

2622 collections="imported_r", 

2623 ), 

2624 Test( 

2625 "tract,detector.full_name", 

2626 "tract,detector", 

2627 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)), 

2628 datasets="flat", 

2629 collections="imported_r", 

2630 ), 

2631 Test( 

2632 "tract,detector.raft,detector.name_in_raft", 

2633 "tract,detector", 

2634 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)), 

2635 datasets="flat", 

2636 collections="imported_r", 

2637 ), 

2638 ) 

2639 

2640 for test in test_data: 

2641 order_by = test.order_by.split(",") 

2642 keys = test.keys.split(",") 

2643 query = do_query(keys, test.datasets, test.collections).order_by(*order_by) 

2644 if test.limit is not None: 

2645 query = query.limit(*test.limit) 

2646 dataIds = tuple(tuple(dataId[k] for k in keys) for dataId in query) 

2647 self.assertEqual(dataIds, test.result) 

2648 

2649 # and materialize 

2650 query = do_query(keys).order_by(*order_by) 

2651 if test.limit is not None: 

2652 query = query.limit(*test.limit) 

2653 with query.materialize() as materialized: 

2654 dataIds = tuple(tuple(dataId[k] for k in keys) for dataId in materialized) 

2655 self.assertEqual(dataIds, test.result) 

2656 

2657 # errors in a name 

2658 for order_by in ("", "-"): 

2659 with self.assertRaisesRegex(ValueError, "Empty dimension name in ORDER BY"): 

2660 list(do_query().order_by(order_by)) 

2661 

2662 for order_by in ("undimension.name", "-undimension.name"): 

2663 with self.assertRaisesRegex(ValueError, "Unknown dimension element name 'undimension'"): 

2664 list(do_query().order_by(order_by)) 

2665 

2666 for order_by in ("attract", "-attract"): 

2667 with self.assertRaisesRegex(ValueError, "Metadata 'attract' cannot be found in any dimension"): 

2668 list(do_query().order_by(order_by)) 

2669 

2670 with self.assertRaisesRegex(ValueError, "Metadata 'exposure_time' exists in more than one dimension"): 

2671 list(do_query(("exposure", "visit")).order_by("exposure_time")) 

2672 

2673 with self.assertRaisesRegex(ValueError, "Timespan exists in more than one dimesion"): 

2674 list(do_query(("exposure", "visit")).order_by("timespan.begin")) 

2675 

2676 with self.assertRaisesRegex( 

2677 ValueError, "Cannot find any temporal dimension element for 'timespan.begin'" 

2678 ): 

2679 list(do_query(("tract")).order_by("timespan.begin")) 

2680 

2681 with self.assertRaisesRegex(ValueError, "Cannot use 'timespan.begin' with non-temporal element"): 

2682 list(do_query(("tract")).order_by("tract.timespan.begin")) 

2683 

2684 with self.assertRaisesRegex(ValueError, "Field 'name' does not exist in 'tract'."): 

2685 list(do_query(("tract")).order_by("tract.name")) 

2686 

2687 def testQueryDataIdsGovernorExceptions(self): 

2688 """Test exceptions raised by queryDataIds() for incorrect governors.""" 

2689 registry = self.makeRegistry() 

2690 self.loadData(registry, "base.yaml") 

2691 self.loadData(registry, "datasets.yaml") 

2692 self.loadData(registry, "spatial.yaml") 

2693 

2694 def do_query(dimensions, dataId=None, where=None, bind=None, **kwargs): 

2695 return registry.queryDataIds(dimensions, dataId=dataId, where=where, bind=bind, **kwargs) 

2696 

2697 Test = namedtuple( 

2698 "testQueryDataIdExceptionsTest", 

2699 ("dimensions", "dataId", "where", "bind", "kwargs", "exception", "count"), 

2700 defaults=(None, None, None, {}, None, 0), 

2701 ) 

2702 

2703 test_data = ( 

2704 Test("tract,visit", count=6), 

2705 Test("tract,visit", kwargs={"instrument": "Cam1", "skymap": "SkyMap1"}, count=6), 

2706 Test( 

2707 "tract,visit", kwargs={"instrument": "Cam2", "skymap": "SkyMap1"}, exception=DataIdValueError 

2708 ), 

2709 Test("tract,visit", dataId={"instrument": "Cam1", "skymap": "SkyMap1"}, count=6), 

2710 Test( 

2711 "tract,visit", dataId={"instrument": "Cam1", "skymap": "SkyMap2"}, exception=DataIdValueError 

2712 ), 

2713 Test("tract,visit", where="instrument='Cam1' AND skymap='SkyMap1'", count=6), 

2714 Test("tract,visit", where="instrument='Cam1' AND skymap='SkyMap5'", exception=DataIdValueError), 

2715 Test( 

2716 "tract,visit", 

2717 where="instrument=cam AND skymap=map", 

2718 bind={"cam": "Cam1", "map": "SkyMap1"}, 

2719 count=6, 

2720 ), 

2721 Test( 

2722 "tract,visit", 

2723 where="instrument=cam AND skymap=map", 

2724 bind={"cam": "Cam", "map": "SkyMap"}, 

2725 exception=DataIdValueError, 

2726 ), 

2727 ) 

2728 

2729 for test in test_data: 

2730 dimensions = test.dimensions.split(",") 

2731 if test.exception: 

2732 with self.assertRaises(test.exception): 

2733 do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs).count() 

2734 else: 

2735 query = do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs) 

2736 self.assertEqual(query.count(), test.count) 

2737 

2738 # and materialize 

2739 if test.exception: 

2740 with self.assertRaises(test.exception): 

2741 query = do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs) 

2742 with query.materialize() as materialized: 

2743 materialized.count() 

2744 else: 

2745 query = do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs) 

2746 with query.materialize() as materialized: 

2747 self.assertEqual(materialized.count(), test.count) 

2748 

2749 def testQueryDimensionRecordsOrderBy(self): 

2750 """Test order_by and limit on result returned by 

2751 queryDimensionRecords(). 

2752 """ 

2753 registry = self.makeRegistry() 

2754 self.loadData(registry, "base.yaml") 

2755 self.loadData(registry, "datasets.yaml") 

2756 self.loadData(registry, "spatial.yaml") 

2757 

2758 def do_query(element, datasets=None, collections=None): 

2759 return registry.queryDimensionRecords( 

2760 element, instrument="Cam1", datasets=datasets, collections=collections 

2761 ) 

2762 

2763 query = do_query("detector") 

2764 self.assertEqual(len(list(query)), 4) 

2765 

2766 Test = namedtuple( 

2767 "testQueryDataIdsOrderByTest", 

2768 ("element", "order_by", "result", "limit", "datasets", "collections"), 

2769 defaults=(None, None, None), 

2770 ) 

2771 

2772 test_data = ( 

2773 Test("detector", "detector", (1, 2, 3, 4)), 

2774 Test("detector", "-detector", (4, 3, 2, 1)), 

2775 Test("detector", "raft,-name_in_raft", (2, 1, 4, 3)), 

2776 Test("detector", "-detector.purpose", (4,), limit=(1,)), 

2777 Test("detector", "-purpose,detector.raft,name_in_raft", (2, 3), limit=(2, 2)), 

2778 Test("visit", "visit", (1, 2)), 

2779 Test("visit", "-visit.id", (2, 1)), 

2780 Test("visit", "zenith_angle", (1, 2)), 

2781 Test("visit", "-visit.name", (2, 1)), 

2782 Test("visit", "day_obs,-timespan.begin", (2, 1)), 

2783 ) 

2784 

2785 for test in test_data: 

2786 order_by = test.order_by.split(",") 

2787 query = do_query(test.element).order_by(*order_by) 

2788 if test.limit is not None: 

2789 query = query.limit(*test.limit) 

2790 dataIds = tuple(rec.id for rec in query) 

2791 self.assertEqual(dataIds, test.result) 

2792 

2793 # errors in a name 

2794 for order_by in ("", "-"): 

2795 with self.assertRaisesRegex(ValueError, "Empty dimension name in ORDER BY"): 

2796 list(do_query("detector").order_by(order_by)) 

2797 

2798 for order_by in ("undimension.name", "-undimension.name"): 

2799 with self.assertRaisesRegex(ValueError, "Element name mismatch: 'undimension'"): 

2800 list(do_query("detector").order_by(order_by)) 

2801 

2802 for order_by in ("attract", "-attract"): 

2803 with self.assertRaisesRegex(ValueError, "Field 'attract' does not exist in 'detector'."): 

2804 list(do_query("detector").order_by(order_by)) 

2805 

2806 def testQueryDimensionRecordsExceptions(self): 

2807 """Test exceptions raised by queryDimensionRecords().""" 

2808 registry = self.makeRegistry() 

2809 self.loadData(registry, "base.yaml") 

2810 self.loadData(registry, "datasets.yaml") 

2811 self.loadData(registry, "spatial.yaml") 

2812 

2813 result = registry.queryDimensionRecords("detector") 

2814 self.assertEqual(result.count(), 4) 

2815 result = registry.queryDimensionRecords("detector", instrument="Cam1") 

2816 self.assertEqual(result.count(), 4) 

2817 result = registry.queryDimensionRecords("detector", dataId={"instrument": "Cam1"}) 

2818 self.assertEqual(result.count(), 4) 

2819 result = registry.queryDimensionRecords("detector", where="instrument='Cam1'") 

2820 self.assertEqual(result.count(), 4) 

2821 result = registry.queryDimensionRecords("detector", where="instrument=instr", bind={"instr": "Cam1"}) 

2822 self.assertEqual(result.count(), 4) 

2823 

2824 with self.assertRaisesRegex( 

2825 DataIdValueError, "Could not fetch record for required dimension instrument" 

2826 ): 

2827 registry.queryDimensionRecords("detector", instrument="NotCam1") 

2828 

2829 with self.assertRaisesRegex( 

2830 DataIdValueError, "Could not fetch record for required dimension instrument" 

2831 ): 

2832 result = registry.queryDimensionRecords("detector", dataId={"instrument": "NotCam1"}) 

2833 

2834 with self.assertRaisesRegex(DataIdValueError, "Unknown values specified for governor dimension"): 

2835 result = registry.queryDimensionRecords("detector", where="instrument='NotCam1'") 

2836 result.count() 

2837 

2838 with self.assertRaisesRegex(DataIdValueError, "Unknown values specified for governor dimension"): 

2839 result = registry.queryDimensionRecords( 

2840 "detector", where="instrument=instr", bind={"instr": "NotCam1"} 

2841 ) 

2842 result.count() 

2843 

2844 def testDatasetConstrainedDimensionRecordQueries(self): 

2845 """Test that queryDimensionRecords works even when given a dataset 

2846 constraint whose dimensions extend beyond the requested dimension 

2847 element's. 

2848 """ 

2849 registry = self.makeRegistry() 

2850 self.loadData(registry, "base.yaml") 

2851 self.loadData(registry, "datasets.yaml") 

2852 # Query for physical_filter dimension records, using a dataset that 

2853 # has both physical_filter and dataset dimensions. 

2854 records = registry.queryDimensionRecords( 

2855 "physical_filter", 

2856 datasets=["flat"], 

2857 collections="imported_r", 

2858 ) 

2859 self.assertEqual({record.name for record in records}, {"Cam1-R1", "Cam1-R2"})