Coverage for python/lsst/daf/butler/registry/tests/_registry.py: 4%

1331 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2022-10-04 02:19 -0700

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = ["RegistryTests"] 

24 

25import itertools 

26import logging 

27import os 

28import re 

29import unittest 

30import uuid 

31from abc import ABC, abstractmethod 

32from collections import defaultdict, namedtuple 

33from datetime import datetime, timedelta 

34from typing import TYPE_CHECKING, Iterator, Optional, Type, Union 

35 

36import astropy.time 

37import sqlalchemy 

38 

39try: 

40 import numpy as np 

41except ImportError: 

42 np = None 

43 

44import lsst.sphgeom 

45 

46from ...core import ( 

47 DataCoordinate, 

48 DataCoordinateSet, 

49 DatasetAssociation, 

50 DatasetRef, 

51 DatasetType, 

52 DimensionGraph, 

53 NamedValueSet, 

54 StorageClass, 

55 Timespan, 

56 ddl, 

57) 

58from .._collection_summary import CollectionSummary 

59from .._collectionType import CollectionType 

60from .._config import RegistryConfig 

61from .._exceptions import ( 

62 ArgumentError, 

63 CollectionError, 

64 CollectionTypeError, 

65 ConflictingDefinitionError, 

66 DataIdValueError, 

67 DatasetTypeError, 

68 InconsistentDataIdError, 

69 MissingCollectionError, 

70 OrphanedRecordError, 

71) 

72from ..interfaces import ButlerAttributeExistsError, DatasetIdGenEnum 

73 

74if TYPE_CHECKING: 74 ↛ 75line 74 didn't jump to line 75, because the condition on line 74 was never true

75 from .._registry import Registry 

76 

77 

78class RegistryTests(ABC): 

79 """Generic tests for the `Registry` class that can be subclassed to 

80 generate tests for different configurations. 

81 """ 

82 

83 collectionsManager: Optional[str] = None 

84 """Name of the collections manager class, if subclass provides value for 

85 this member then it overrides name specified in default configuration 

86 (`str`). 

87 """ 

88 

89 datasetsManager: Optional[str] = None 

90 """Name of the datasets manager class, if subclass provides value for 

91 this member then it overrides name specified in default configuration 

92 (`str`). 

93 """ 

94 

95 @classmethod 

96 @abstractmethod 

97 def getDataDir(cls) -> str: 

98 """Return the root directory containing test data YAML files.""" 

99 raise NotImplementedError() 

100 

101 def makeRegistryConfig(self) -> RegistryConfig: 

102 """Create RegistryConfig used to create a registry. 

103 

104 This method should be called by a subclass from `makeRegistry`. 

105 Returned instance will be pre-configured based on the values of class 

106 members, and default-configured for all other parameters. Subclasses 

107 that need default configuration should just instantiate 

108 `RegistryConfig` directly. 

109 """ 

110 config = RegistryConfig() 

111 if self.collectionsManager: 

112 config["managers", "collections"] = self.collectionsManager 

113 if self.datasetsManager: 

114 config["managers", "datasets"] = self.datasetsManager 

115 return config 

116 

117 @abstractmethod 

118 def makeRegistry(self, share_repo_with: Optional[Registry] = None) -> Optional[Registry]: 

119 """Return the Registry instance to be tested. 

120 

121 Parameters 

122 ---------- 

123 share_repo_with : `Registry`, optional 

124 If provided, the new registry should point to the same data 

125 repository as this existing registry. 

126 

127 Returns 

128 ------- 

129 registry : `Registry` 

130 New `Registry` instance, or `None` *only* if `share_repo_with` is 

131 not `None` and this test case does not support that argument 

132 (e.g. it is impossible with in-memory SQLite DBs). 

133 """ 

134 raise NotImplementedError() 

135 

136 def loadData(self, registry: Registry, filename: str): 

137 """Load registry test data from ``getDataDir/<filename>``, 

138 which should be a YAML import/export file. 

139 """ 

140 from ...transfers import YamlRepoImportBackend 

141 

142 with open(os.path.join(self.getDataDir(), filename), "r") as stream: 

143 backend = YamlRepoImportBackend(stream, registry) 

144 backend.register() 

145 backend.load(datastore=None) 

146 

147 def checkQueryResults(self, results, expected): 

148 """Check that a query results object contains expected values. 

149 

150 Parameters 

151 ---------- 

152 results : `DataCoordinateQueryResults` or `DatasetQueryResults` 

153 A lazy-evaluation query results object. 

154 expected : `list` 

155 A list of `DataCoordinate` o `DatasetRef` objects that should be 

156 equal to results of the query, aside from ordering. 

157 """ 

158 self.assertCountEqual(list(results), expected) 

159 self.assertEqual(results.count(), len(expected)) 

160 if expected: 

161 self.assertTrue(results.any()) 

162 else: 

163 self.assertFalse(results.any()) 

164 

165 def testOpaque(self): 

166 """Tests for `Registry.registerOpaqueTable`, 

167 `Registry.insertOpaqueData`, `Registry.fetchOpaqueData`, and 

168 `Registry.deleteOpaqueData`. 

169 """ 

170 registry = self.makeRegistry() 

171 table = "opaque_table_for_testing" 

172 registry.registerOpaqueTable( 

173 table, 

174 spec=ddl.TableSpec( 

175 fields=[ 

176 ddl.FieldSpec("id", dtype=sqlalchemy.BigInteger, primaryKey=True), 

177 ddl.FieldSpec("name", dtype=sqlalchemy.String, length=16, nullable=False), 

178 ddl.FieldSpec("count", dtype=sqlalchemy.SmallInteger, nullable=True), 

179 ], 

180 ), 

181 ) 

182 rows = [ 

183 {"id": 1, "name": "one", "count": None}, 

184 {"id": 2, "name": "two", "count": 5}, 

185 {"id": 3, "name": "three", "count": 6}, 

186 ] 

187 registry.insertOpaqueData(table, *rows) 

188 self.assertCountEqual(rows, list(registry.fetchOpaqueData(table))) 

189 self.assertEqual(rows[0:1], list(registry.fetchOpaqueData(table, id=1))) 

190 self.assertEqual(rows[1:2], list(registry.fetchOpaqueData(table, name="two"))) 

191 self.assertEqual(rows[0:1], list(registry.fetchOpaqueData(table, id=(1, 3), name=("one", "two")))) 

192 self.assertEqual(rows, list(registry.fetchOpaqueData(table, id=(1, 2, 3)))) 

193 # Test very long IN clause which exceeds sqlite limit on number of 

194 # parameters. SQLite says the limit is 32k but it looks like it is 

195 # much higher. 

196 self.assertEqual(rows, list(registry.fetchOpaqueData(table, id=list(range(300_000))))) 

197 # Two IN clauses, each longer than 1k batch size, first with 

198 # duplicates, second has matching elements in different batches (after 

199 # sorting). 

200 self.assertEqual( 

201 rows[0:2], 

202 list( 

203 registry.fetchOpaqueData( 

204 table, 

205 id=list(range(1000)) + list(range(100, 0, -1)), 

206 name=["one"] + [f"q{i}" for i in range(2200)] + ["two"], 

207 ) 

208 ), 

209 ) 

210 self.assertEqual([], list(registry.fetchOpaqueData(table, id=1, name="two"))) 

211 registry.deleteOpaqueData(table, id=3) 

212 self.assertCountEqual(rows[:2], list(registry.fetchOpaqueData(table))) 

213 registry.deleteOpaqueData(table) 

214 self.assertEqual([], list(registry.fetchOpaqueData(table))) 

215 

216 def testDatasetType(self): 

217 """Tests for `Registry.registerDatasetType` and 

218 `Registry.getDatasetType`. 

219 """ 

220 registry = self.makeRegistry() 

221 # Check valid insert 

222 datasetTypeName = "test" 

223 storageClass = StorageClass("testDatasetType") 

224 registry.storageClasses.registerStorageClass(storageClass) 

225 dimensions = registry.dimensions.extract(("instrument", "visit")) 

226 differentDimensions = registry.dimensions.extract(("instrument", "patch")) 

227 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

228 # Inserting for the first time should return True 

229 self.assertTrue(registry.registerDatasetType(inDatasetType)) 

230 outDatasetType1 = registry.getDatasetType(datasetTypeName) 

231 self.assertEqual(outDatasetType1, inDatasetType) 

232 

233 # Re-inserting should work 

234 self.assertFalse(registry.registerDatasetType(inDatasetType)) 

235 # Except when they are not identical 

236 with self.assertRaises(ConflictingDefinitionError): 

237 nonIdenticalDatasetType = DatasetType(datasetTypeName, differentDimensions, storageClass) 

238 registry.registerDatasetType(nonIdenticalDatasetType) 

239 

240 # Template can be None 

241 datasetTypeName = "testNoneTemplate" 

242 storageClass = StorageClass("testDatasetType2") 

243 registry.storageClasses.registerStorageClass(storageClass) 

244 dimensions = registry.dimensions.extract(("instrument", "visit")) 

245 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

246 registry.registerDatasetType(inDatasetType) 

247 outDatasetType2 = registry.getDatasetType(datasetTypeName) 

248 self.assertEqual(outDatasetType2, inDatasetType) 

249 

250 allTypes = set(registry.queryDatasetTypes()) 

251 self.assertEqual(allTypes, {outDatasetType1, outDatasetType2}) 

252 

253 def testDimensions(self): 

254 """Tests for `Registry.insertDimensionData`, 

255 `Registry.syncDimensionData`, and `Registry.expandDataId`. 

256 """ 

257 registry = self.makeRegistry() 

258 dimensionName = "instrument" 

259 dimension = registry.dimensions[dimensionName] 

260 dimensionValue = { 

261 "name": "DummyCam", 

262 "visit_max": 10, 

263 "visit_system": 0, 

264 "exposure_max": 10, 

265 "detector_max": 2, 

266 "class_name": "lsst.pipe.base.Instrument", 

267 } 

268 registry.insertDimensionData(dimensionName, dimensionValue) 

269 # Inserting the same value twice should fail 

270 with self.assertRaises(sqlalchemy.exc.IntegrityError): 

271 registry.insertDimensionData(dimensionName, dimensionValue) 

272 # expandDataId should retrieve the record we just inserted 

273 self.assertEqual( 

274 registry.expandDataId(instrument="DummyCam", graph=dimension.graph) 

275 .records[dimensionName] 

276 .toDict(), 

277 dimensionValue, 

278 ) 

279 # expandDataId should raise if there is no record with the given ID. 

280 with self.assertRaises(DataIdValueError): 

281 registry.expandDataId({"instrument": "Unknown"}, graph=dimension.graph) 

282 # band doesn't have a table; insert should fail. 

283 with self.assertRaises(TypeError): 

284 registry.insertDimensionData("band", {"band": "i"}) 

285 dimensionName2 = "physical_filter" 

286 dimension2 = registry.dimensions[dimensionName2] 

287 dimensionValue2 = {"name": "DummyCam_i", "band": "i"} 

288 # Missing required dependency ("instrument") should fail 

289 with self.assertRaises(KeyError): 

290 registry.insertDimensionData(dimensionName2, dimensionValue2) 

291 # Adding required dependency should fix the failure 

292 dimensionValue2["instrument"] = "DummyCam" 

293 registry.insertDimensionData(dimensionName2, dimensionValue2) 

294 # expandDataId should retrieve the record we just inserted. 

295 self.assertEqual( 

296 registry.expandDataId(instrument="DummyCam", physical_filter="DummyCam_i", graph=dimension2.graph) 

297 .records[dimensionName2] 

298 .toDict(), 

299 dimensionValue2, 

300 ) 

301 # Use syncDimensionData to insert a new record successfully. 

302 dimensionName3 = "detector" 

303 dimensionValue3 = { 

304 "instrument": "DummyCam", 

305 "id": 1, 

306 "full_name": "one", 

307 "name_in_raft": "zero", 

308 "purpose": "SCIENCE", 

309 } 

310 self.assertTrue(registry.syncDimensionData(dimensionName3, dimensionValue3)) 

311 # Sync that again. Note that one field ("raft") is NULL, and that 

312 # should be okay. 

313 self.assertFalse(registry.syncDimensionData(dimensionName3, dimensionValue3)) 

314 # Now try that sync with the same primary key but a different value. 

315 # This should fail. 

316 with self.assertRaises(ConflictingDefinitionError): 

317 registry.syncDimensionData( 

318 dimensionName3, 

319 { 

320 "instrument": "DummyCam", 

321 "id": 1, 

322 "full_name": "one", 

323 "name_in_raft": "four", 

324 "purpose": "SCIENCE", 

325 }, 

326 ) 

327 

328 @unittest.skipIf(np is None, "numpy not available.") 

329 def testNumpyDataId(self): 

330 """Test that we can use a numpy int in a dataId.""" 

331 registry = self.makeRegistry() 

332 dimensionEntries = [ 

333 ("instrument", {"instrument": "DummyCam"}), 

334 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}), 

335 # Using an np.int64 here fails unless Records.fromDict is also 

336 # patched to look for numbers.Integral 

337 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}), 

338 ] 

339 for args in dimensionEntries: 

340 registry.insertDimensionData(*args) 

341 

342 # Try a normal integer and something that looks like an int but 

343 # is not. 

344 for visit_id in (42, np.int64(42)): 

345 with self.subTest(visit_id=visit_id, id_type=type(visit_id).__name__): 

346 expanded = registry.expandDataId({"instrument": "DummyCam", "visit": visit_id}) 

347 self.assertEqual(expanded["visit"], int(visit_id)) 

348 self.assertIsInstance(expanded["visit"], int) 

349 

350 def testDataIdRelationships(self): 

351 """Test that `Registry.expandDataId` raises an exception when the given 

352 keys are inconsistent. 

353 """ 

354 registry = self.makeRegistry() 

355 self.loadData(registry, "base.yaml") 

356 # Insert a few more dimension records for the next test. 

357 registry.insertDimensionData( 

358 "exposure", 

359 {"instrument": "Cam1", "id": 1, "obs_id": "one", "physical_filter": "Cam1-G"}, 

360 ) 

361 registry.insertDimensionData( 

362 "exposure", 

363 {"instrument": "Cam1", "id": 2, "obs_id": "two", "physical_filter": "Cam1-G"}, 

364 ) 

365 registry.insertDimensionData( 

366 "visit_system", 

367 {"instrument": "Cam1", "id": 0, "name": "one-to-one"}, 

368 ) 

369 registry.insertDimensionData( 

370 "visit", 

371 {"instrument": "Cam1", "id": 1, "name": "one", "physical_filter": "Cam1-G", "visit_system": 0}, 

372 ) 

373 registry.insertDimensionData( 

374 "visit_definition", 

375 {"instrument": "Cam1", "visit": 1, "exposure": 1, "visit_system": 0}, 

376 ) 

377 with self.assertRaises(InconsistentDataIdError): 

378 registry.expandDataId( 

379 {"instrument": "Cam1", "visit": 1, "exposure": 2}, 

380 ) 

381 

382 def testDataset(self): 

383 """Basic tests for `Registry.insertDatasets`, `Registry.getDataset`, 

384 and `Registry.removeDatasets`. 

385 """ 

386 registry = self.makeRegistry() 

387 self.loadData(registry, "base.yaml") 

388 run = "tésτ" 

389 registry.registerRun(run) 

390 datasetType = registry.getDatasetType("bias") 

391 dataId = {"instrument": "Cam1", "detector": 2} 

392 (ref,) = registry.insertDatasets(datasetType, dataIds=[dataId], run=run) 

393 outRef = registry.getDataset(ref.id) 

394 self.assertIsNotNone(ref.id) 

395 self.assertEqual(ref, outRef) 

396 with self.assertRaises(ConflictingDefinitionError): 

397 registry.insertDatasets(datasetType, dataIds=[dataId], run=run) 

398 registry.removeDatasets([ref]) 

399 self.assertIsNone(registry.findDataset(datasetType, dataId, collections=[run])) 

400 

401 def testFindDataset(self): 

402 """Tests for `Registry.findDataset`.""" 

403 registry = self.makeRegistry() 

404 self.loadData(registry, "base.yaml") 

405 run = "tésτ" 

406 datasetType = registry.getDatasetType("bias") 

407 dataId = {"instrument": "Cam1", "detector": 4} 

408 registry.registerRun(run) 

409 (inputRef,) = registry.insertDatasets(datasetType, dataIds=[dataId], run=run) 

410 outputRef = registry.findDataset(datasetType, dataId, collections=[run]) 

411 self.assertEqual(outputRef, inputRef) 

412 # Check that retrieval with invalid dataId raises 

413 with self.assertRaises(LookupError): 

414 dataId = {"instrument": "Cam1"} # no detector 

415 registry.findDataset(datasetType, dataId, collections=run) 

416 # Check that different dataIds match to different datasets 

417 dataId1 = {"instrument": "Cam1", "detector": 1} 

418 (inputRef1,) = registry.insertDatasets(datasetType, dataIds=[dataId1], run=run) 

419 dataId2 = {"instrument": "Cam1", "detector": 2} 

420 (inputRef2,) = registry.insertDatasets(datasetType, dataIds=[dataId2], run=run) 

421 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef1) 

422 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef2) 

423 self.assertNotEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef2) 

424 self.assertNotEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef1) 

425 # Check that requesting a non-existing dataId returns None 

426 nonExistingDataId = {"instrument": "Cam1", "detector": 3} 

427 self.assertIsNone(registry.findDataset(datasetType, nonExistingDataId, collections=run)) 

428 

429 def testRemoveDatasetTypeSuccess(self): 

430 """Test that Registry.removeDatasetType works when there are no 

431 datasets of that type present. 

432 """ 

433 registry = self.makeRegistry() 

434 self.loadData(registry, "base.yaml") 

435 registry.removeDatasetType("flat") 

436 with self.assertRaises(KeyError): 

437 registry.getDatasetType("flat") 

438 

439 def testRemoveDatasetTypeFailure(self): 

440 """Test that Registry.removeDatasetType raises when there are datasets 

441 of that type present or if the dataset type is for a component. 

442 """ 

443 registry = self.makeRegistry() 

444 self.loadData(registry, "base.yaml") 

445 self.loadData(registry, "datasets.yaml") 

446 with self.assertRaises(OrphanedRecordError): 

447 registry.removeDatasetType("flat") 

448 with self.assertRaises(ValueError): 

449 registry.removeDatasetType(DatasetType.nameWithComponent("flat", "image")) 

450 

451 def testImportDatasetsUUID(self): 

452 """Test for `Registry._importDatasets` with UUID dataset ID.""" 

453 if not self.datasetsManager.endswith(".ByDimensionsDatasetRecordStorageManagerUUID"): 

454 self.skipTest(f"Unexpected dataset manager {self.datasetsManager}") 

455 

456 registry = self.makeRegistry() 

457 self.loadData(registry, "base.yaml") 

458 for run in range(6): 

459 registry.registerRun(f"run{run}") 

460 datasetTypeBias = registry.getDatasetType("bias") 

461 datasetTypeFlat = registry.getDatasetType("flat") 

462 dataIdBias1 = {"instrument": "Cam1", "detector": 1} 

463 dataIdBias2 = {"instrument": "Cam1", "detector": 2} 

464 dataIdFlat1 = {"instrument": "Cam1", "detector": 1, "physical_filter": "Cam1-G", "band": "g"} 

465 

466 dataset_id = uuid.uuid4() 

467 ref = DatasetRef(datasetTypeBias, dataIdBias1, id=dataset_id, run="run0") 

468 (ref1,) = registry._importDatasets([ref]) 

469 # UUID is used without change 

470 self.assertEqual(ref.id, ref1.id) 

471 

472 # All different failure modes 

473 refs = ( 

474 # Importing same DatasetRef with different dataset ID is an error 

475 DatasetRef(datasetTypeBias, dataIdBias1, id=uuid.uuid4(), run="run0"), 

476 # Same DatasetId but different DataId 

477 DatasetRef(datasetTypeBias, dataIdBias2, id=ref1.id, run="run0"), 

478 DatasetRef(datasetTypeFlat, dataIdFlat1, id=ref1.id, run="run0"), 

479 # Same DatasetRef and DatasetId but different run 

480 DatasetRef(datasetTypeBias, dataIdBias1, id=ref1.id, run="run1"), 

481 ) 

482 for ref in refs: 

483 with self.assertRaises(ConflictingDefinitionError): 

484 registry._importDatasets([ref]) 

485 

486 # Test for non-unique IDs, they can be re-imported multiple times. 

487 for run, idGenMode in ((2, DatasetIdGenEnum.DATAID_TYPE), (4, DatasetIdGenEnum.DATAID_TYPE_RUN)): 

488 with self.subTest(idGenMode=idGenMode): 

489 

490 # Use integer dataset ID to force UUID calculation in _import 

491 ref = DatasetRef(datasetTypeBias, dataIdBias1, id=0, run=f"run{run}") 

492 (ref1,) = registry._importDatasets([ref], idGenerationMode=idGenMode) 

493 self.assertIsInstance(ref1.id, uuid.UUID) 

494 self.assertEqual(ref1.id.version, 5) 

495 

496 # Importing it again is OK 

497 (ref2,) = registry._importDatasets([ref1]) 

498 self.assertEqual(ref2.id, ref1.id) 

499 

500 # Cannot import to different run with the same ID 

501 ref = DatasetRef(datasetTypeBias, dataIdBias1, id=ref1.id, run=f"run{run+1}") 

502 with self.assertRaises(ConflictingDefinitionError): 

503 registry._importDatasets([ref]) 

504 

505 ref = DatasetRef(datasetTypeBias, dataIdBias1, id=0, run=f"run{run+1}") 

506 if idGenMode is DatasetIdGenEnum.DATAID_TYPE: 

507 # Cannot import same DATAID_TYPE ref into a new run 

508 with self.assertRaises(ConflictingDefinitionError): 

509 (ref2,) = registry._importDatasets([ref], idGenerationMode=idGenMode) 

510 else: 

511 # DATAID_TYPE_RUN ref can be imported into a new run 

512 (ref2,) = registry._importDatasets([ref], idGenerationMode=idGenMode) 

513 

514 def testImportDatasetsInt(self): 

515 """Test for `Registry._importDatasets` with integer dataset ID.""" 

516 if not self.datasetsManager.endswith(".ByDimensionsDatasetRecordStorageManager"): 

517 self.skipTest(f"Unexpected dataset manager {self.datasetsManager}") 

518 

519 registry = self.makeRegistry() 

520 self.loadData(registry, "base.yaml") 

521 run = "tésτ" 

522 registry.registerRun(run) 

523 datasetTypeBias = registry.getDatasetType("bias") 

524 datasetTypeFlat = registry.getDatasetType("flat") 

525 dataIdBias1 = {"instrument": "Cam1", "detector": 1} 

526 dataIdBias2 = {"instrument": "Cam1", "detector": 2} 

527 dataIdFlat1 = {"instrument": "Cam1", "detector": 1, "physical_filter": "Cam1-G", "band": "g"} 

528 dataset_id = 999999999 

529 

530 ref = DatasetRef(datasetTypeBias, dataIdBias1, id=dataset_id, run=run) 

531 (ref1,) = registry._importDatasets([ref]) 

532 # Should make new integer ID. 

533 self.assertNotEqual(ref1.id, ref.id) 

534 

535 # Ingesting same dataId with different dataset ID is an error 

536 ref2 = ref1.unresolved().resolved(dataset_id, run=run) 

537 with self.assertRaises(ConflictingDefinitionError): 

538 registry._importDatasets([ref2]) 

539 

540 # Ingesting different dataId with the same dataset ID should work 

541 ref3 = DatasetRef(datasetTypeBias, dataIdBias2, id=ref1.id, run=run) 

542 (ref4,) = registry._importDatasets([ref3]) 

543 self.assertNotEqual(ref4.id, ref1.id) 

544 

545 ref3 = DatasetRef(datasetTypeFlat, dataIdFlat1, id=ref1.id, run=run) 

546 (ref4,) = registry._importDatasets([ref3]) 

547 self.assertNotEqual(ref4.id, ref1.id) 

548 

549 def testDatasetTypeComponentQueries(self): 

550 """Test component options when querying for dataset types.""" 

551 registry = self.makeRegistry() 

552 self.loadData(registry, "base.yaml") 

553 self.loadData(registry, "datasets.yaml") 

554 # Test querying for dataset types with different inputs. 

555 # First query for all dataset types; components should only be included 

556 # when components=True. 

557 self.assertEqual({"bias", "flat"}, NamedValueSet(registry.queryDatasetTypes()).names) 

558 self.assertEqual({"bias", "flat"}, NamedValueSet(registry.queryDatasetTypes(components=False)).names) 

559 self.assertLess( 

560 {"bias", "flat", "bias.wcs", "flat.photoCalib"}, 

561 NamedValueSet(registry.queryDatasetTypes(components=True)).names, 

562 ) 

563 # Use a pattern that can match either parent or components. Again, 

564 # components are only returned if components=True. 

565 self.assertEqual({"bias"}, NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"))).names) 

566 self.assertEqual( 

567 {"bias"}, NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"), components=False)).names 

568 ) 

569 self.assertLess( 

570 {"bias", "bias.wcs"}, 

571 NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"), components=True)).names, 

572 ) 

573 # This pattern matches only a component. In this case we also return 

574 # that component dataset type if components=None. 

575 self.assertEqual( 

576 {"bias.wcs"}, NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"))).names 

577 ) 

578 self.assertEqual( 

579 set(), 

580 NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"), components=False)).names, 

581 ) 

582 self.assertEqual( 

583 {"bias.wcs"}, 

584 NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"), components=True)).names, 

585 ) 

586 # Add a dataset type using a StorageClass that we'll then remove; check 

587 # that this does not affect our ability to query for dataset types 

588 # (though it will warn). 

589 tempStorageClass = StorageClass( 

590 name="TempStorageClass", 

591 components={ 

592 "data1": registry.storageClasses.getStorageClass("StructuredDataDict"), 

593 "data2": registry.storageClasses.getStorageClass("StructuredDataDict"), 

594 }, 

595 ) 

596 registry.storageClasses.registerStorageClass(tempStorageClass) 

597 datasetType = DatasetType( 

598 "temporary", 

599 dimensions=["instrument"], 

600 storageClass=tempStorageClass, 

601 universe=registry.dimensions, 

602 ) 

603 registry.registerDatasetType(datasetType) 

604 registry.storageClasses._unregisterStorageClass(tempStorageClass.name) 

605 datasetType._storageClass = None 

606 del tempStorageClass 

607 # Querying for all dataset types, including components, should include 

608 # at least all non-component dataset types (and I don't want to 

609 # enumerate all of the Exposure components for bias and flat here). 

610 with self.assertLogs("lsst.daf.butler.registry", logging.WARN) as cm: 

611 everything = NamedValueSet(registry.queryDatasetTypes(components=True)) 

612 self.assertIn("TempStorageClass", cm.output[0]) 

613 self.assertLess({"bias", "flat", "temporary"}, everything.names) 

614 # It should not include "temporary.columns", because we tried to remove 

615 # the storage class that would tell it about that. So if the next line 

616 # fails (i.e. "temporary.columns" _is_ in everything.names), it means 

617 # this part of the test isn't doing anything, because the _unregister 

618 # call about isn't simulating the real-life case we want it to 

619 # simulate, in which different versions of daf_butler in entirely 

620 # different Python processes interact with the same repo. 

621 self.assertNotIn("temporary.data", everything.names) 

622 # Query for dataset types that start with "temp". This should again 

623 # not include the component, and also not fail. 

624 with self.assertLogs("lsst.daf.butler.registry", logging.WARN) as cm: 

625 startsWithTemp = NamedValueSet(registry.queryDatasetTypes(re.compile("temp.*"), components=True)) 

626 self.assertIn("TempStorageClass", cm.output[0]) 

627 self.assertEqual({"temporary"}, startsWithTemp.names) 

628 # Querying with no components should not warn at all. 

629 with self.assertLogs("lsst.daf.butler.registries", logging.WARN) as cm: 

630 startsWithTemp = NamedValueSet(registry.queryDatasetTypes(re.compile("temp.*"), components=False)) 

631 # Must issue a warning of our own to be captured. 

632 logging.getLogger("lsst.daf.butler.registries").warning("test message") 

633 self.assertEqual(len(cm.output), 1) 

634 self.assertIn("test message", cm.output[0]) 

635 

636 def testComponentLookups(self): 

637 """Test searching for component datasets via their parents.""" 

638 registry = self.makeRegistry() 

639 self.loadData(registry, "base.yaml") 

640 self.loadData(registry, "datasets.yaml") 

641 # Test getting the child dataset type (which does still exist in the 

642 # Registry), and check for consistency with 

643 # DatasetRef.makeComponentRef. 

644 collection = "imported_g" 

645 parentType = registry.getDatasetType("bias") 

646 childType = registry.getDatasetType("bias.wcs") 

647 parentRefResolved = registry.findDataset( 

648 parentType, collections=collection, instrument="Cam1", detector=1 

649 ) 

650 self.assertIsInstance(parentRefResolved, DatasetRef) 

651 self.assertEqual(childType, parentRefResolved.makeComponentRef("wcs").datasetType) 

652 # Search for a single dataset with findDataset. 

653 childRef1 = registry.findDataset("bias.wcs", collections=collection, dataId=parentRefResolved.dataId) 

654 self.assertEqual(childRef1, parentRefResolved.makeComponentRef("wcs")) 

655 # Search for detector data IDs constrained by component dataset 

656 # existence with queryDataIds. 

657 dataIds = registry.queryDataIds( 

658 ["detector"], 

659 datasets=["bias.wcs"], 

660 collections=collection, 

661 ).toSet() 

662 self.assertEqual( 

663 dataIds, 

664 DataCoordinateSet( 

665 { 

666 DataCoordinate.standardize(instrument="Cam1", detector=d, graph=parentType.dimensions) 

667 for d in (1, 2, 3) 

668 }, 

669 parentType.dimensions, 

670 ), 

671 ) 

672 # Search for multiple datasets of a single type with queryDatasets. 

673 childRefs2 = set( 

674 registry.queryDatasets( 

675 "bias.wcs", 

676 collections=collection, 

677 ) 

678 ) 

679 self.assertEqual( 

680 {ref.unresolved() for ref in childRefs2}, {DatasetRef(childType, dataId) for dataId in dataIds} 

681 ) 

682 

683 def testCollections(self): 

684 """Tests for registry methods that manage collections.""" 

685 registry = self.makeRegistry() 

686 other_registry = self.makeRegistry(share_repo_with=registry) 

687 self.loadData(registry, "base.yaml") 

688 self.loadData(registry, "datasets.yaml") 

689 run1 = "imported_g" 

690 run2 = "imported_r" 

691 # Test setting a collection docstring after it has been created. 

692 registry.setCollectionDocumentation(run1, "doc for run1") 

693 self.assertEqual(registry.getCollectionDocumentation(run1), "doc for run1") 

694 registry.setCollectionDocumentation(run1, None) 

695 self.assertIsNone(registry.getCollectionDocumentation(run1)) 

696 datasetType = "bias" 

697 # Find some datasets via their run's collection. 

698 dataId1 = {"instrument": "Cam1", "detector": 1} 

699 ref1 = registry.findDataset(datasetType, dataId1, collections=run1) 

700 self.assertIsNotNone(ref1) 

701 dataId2 = {"instrument": "Cam1", "detector": 2} 

702 ref2 = registry.findDataset(datasetType, dataId2, collections=run1) 

703 self.assertIsNotNone(ref2) 

704 # Associate those into a new collection, then look for them there. 

705 tag1 = "tag1" 

706 registry.registerCollection(tag1, type=CollectionType.TAGGED, doc="doc for tag1") 

707 # Check that we can query for old and new collections by type. 

708 self.assertEqual(set(registry.queryCollections(collectionTypes=CollectionType.RUN)), {run1, run2}) 

709 self.assertEqual( 

710 set(registry.queryCollections(collectionTypes={CollectionType.TAGGED, CollectionType.RUN})), 

711 {tag1, run1, run2}, 

712 ) 

713 self.assertEqual(registry.getCollectionDocumentation(tag1), "doc for tag1") 

714 registry.associate(tag1, [ref1, ref2]) 

715 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1) 

716 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

717 # Disassociate one and verify that we can't it there anymore... 

718 registry.disassociate(tag1, [ref1]) 

719 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=tag1)) 

720 # ...but we can still find ref2 in tag1, and ref1 in the run. 

721 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run1), ref1) 

722 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

723 collections = set(registry.queryCollections()) 

724 self.assertEqual(collections, {run1, run2, tag1}) 

725 # Associate both refs into tag1 again; ref2 is already there, but that 

726 # should be a harmless no-op. 

727 registry.associate(tag1, [ref1, ref2]) 

728 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1) 

729 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

730 # Get a different dataset (from a different run) that has the same 

731 # dataset type and data ID as ref2. 

732 ref2b = registry.findDataset(datasetType, dataId2, collections=run2) 

733 self.assertNotEqual(ref2, ref2b) 

734 # Attempting to associate that into tag1 should be an error. 

735 with self.assertRaises(ConflictingDefinitionError): 

736 registry.associate(tag1, [ref2b]) 

737 # That error shouldn't have messed up what we had before. 

738 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1) 

739 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

740 # Attempt to associate the conflicting dataset again, this time with 

741 # a dataset that isn't in the collection and won't cause a conflict. 

742 # Should also fail without modifying anything. 

743 dataId3 = {"instrument": "Cam1", "detector": 3} 

744 ref3 = registry.findDataset(datasetType, dataId3, collections=run1) 

745 with self.assertRaises(ConflictingDefinitionError): 

746 registry.associate(tag1, [ref3, ref2b]) 

747 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1) 

748 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

749 self.assertIsNone(registry.findDataset(datasetType, dataId3, collections=tag1)) 

750 # Register a chained collection that searches [tag1, run2] 

751 chain1 = "chain1" 

752 registry.registerCollection(chain1, type=CollectionType.CHAINED) 

753 self.assertIs(registry.getCollectionType(chain1), CollectionType.CHAINED) 

754 # Chained collection exists, but has no collections in it. 

755 self.assertFalse(registry.getCollectionChain(chain1)) 

756 # If we query for all collections, we should get the chained collection 

757 # only if we don't ask to flatten it (i.e. yield only its children). 

758 self.assertEqual(set(registry.queryCollections(flattenChains=False)), {tag1, run1, run2, chain1}) 

759 self.assertEqual(set(registry.queryCollections(flattenChains=True)), {tag1, run1, run2}) 

760 # Attempt to set its child collections to something circular; that 

761 # should fail. 

762 with self.assertRaises(ValueError): 

763 registry.setCollectionChain(chain1, [tag1, chain1]) 

764 # Add the child collections. 

765 registry.setCollectionChain(chain1, [tag1, run2]) 

766 self.assertEqual(list(registry.getCollectionChain(chain1)), [tag1, run2]) 

767 self.assertEqual(registry.getCollectionParentChains(tag1), {chain1}) 

768 self.assertEqual(registry.getCollectionParentChains(run2), {chain1}) 

769 # Refresh the other registry that points to the same repo, and make 

770 # sure it can see the things we've done (note that this does require 

771 # an explicit refresh(); that's the documented behavior, because 

772 # caching is ~impossible otherwise). 

773 if other_registry is not None: 

774 other_registry.refresh() 

775 self.assertEqual(list(other_registry.getCollectionChain(chain1)), [tag1, run2]) 

776 self.assertEqual(other_registry.getCollectionParentChains(tag1), {chain1}) 

777 self.assertEqual(other_registry.getCollectionParentChains(run2), {chain1}) 

778 # Searching for dataId1 or dataId2 in the chain should return ref1 and 

779 # ref2, because both are in tag1. 

780 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain1), ref1) 

781 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain1), ref2) 

782 # Now disassociate ref2 from tag1. The search (for bias) with 

783 # dataId2 in chain1 should then: 

784 # 1. not find it in tag1 

785 # 2. find a different dataset in run2 

786 registry.disassociate(tag1, [ref2]) 

787 ref2b = registry.findDataset(datasetType, dataId2, collections=chain1) 

788 self.assertNotEqual(ref2b, ref2) 

789 self.assertEqual(ref2b, registry.findDataset(datasetType, dataId2, collections=run2)) 

790 # Define a new chain so we can test recursive chains. 

791 chain2 = "chain2" 

792 registry.registerCollection(chain2, type=CollectionType.CHAINED) 

793 registry.setCollectionChain(chain2, [run2, chain1]) 

794 self.assertEqual(registry.getCollectionParentChains(chain1), {chain2}) 

795 self.assertEqual(registry.getCollectionParentChains(run2), {chain1, chain2}) 

796 # Query for collections matching a regex. 

797 self.assertCountEqual( 

798 list(registry.queryCollections(re.compile("imported_."), flattenChains=False)), 

799 ["imported_r", "imported_g"], 

800 ) 

801 # Query for collections matching a regex or an explicit str. 

802 self.assertCountEqual( 

803 list(registry.queryCollections([re.compile("imported_."), "chain1"], flattenChains=False)), 

804 ["imported_r", "imported_g", "chain1"], 

805 ) 

806 # Search for bias with dataId1 should find it via tag1 in chain2, 

807 # recursing, because is not in run1. 

808 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=run2)) 

809 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain2), ref1) 

810 # Search for bias with dataId2 should find it in run2 (ref2b). 

811 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain2), ref2b) 

812 # Search for a flat that is in run2. That should not be found 

813 # at the front of chain2, because of the restriction to bias 

814 # on run2 there, but it should be found in at the end of chain1. 

815 dataId4 = {"instrument": "Cam1", "detector": 3, "physical_filter": "Cam1-R2"} 

816 ref4 = registry.findDataset("flat", dataId4, collections=run2) 

817 self.assertIsNotNone(ref4) 

818 self.assertEqual(ref4, registry.findDataset("flat", dataId4, collections=chain2)) 

819 # Deleting a collection that's part of a CHAINED collection is not 

820 # allowed, and is exception-safe. 

821 with self.assertRaises(Exception): 

822 registry.removeCollection(run2) 

823 self.assertEqual(registry.getCollectionType(run2), CollectionType.RUN) 

824 with self.assertRaises(Exception): 

825 registry.removeCollection(chain1) 

826 self.assertEqual(registry.getCollectionType(chain1), CollectionType.CHAINED) 

827 # Actually remove chain2, test that it's gone by asking for its type. 

828 registry.removeCollection(chain2) 

829 with self.assertRaises(MissingCollectionError): 

830 registry.getCollectionType(chain2) 

831 # Actually remove run2 and chain1, which should work now. 

832 registry.removeCollection(chain1) 

833 registry.removeCollection(run2) 

834 with self.assertRaises(MissingCollectionError): 

835 registry.getCollectionType(run2) 

836 with self.assertRaises(MissingCollectionError): 

837 registry.getCollectionType(chain1) 

838 # Remove tag1 as well, just to test that we can remove TAGGED 

839 # collections. 

840 registry.removeCollection(tag1) 

841 with self.assertRaises(MissingCollectionError): 

842 registry.getCollectionType(tag1) 

843 

844 def testCollectionChainFlatten(self): 

845 """Test that Registry.setCollectionChain obeys its 'flatten' option.""" 

846 registry = self.makeRegistry() 

847 registry.registerCollection("inner", CollectionType.CHAINED) 

848 registry.registerCollection("innermost", CollectionType.RUN) 

849 registry.setCollectionChain("inner", ["innermost"]) 

850 registry.registerCollection("outer", CollectionType.CHAINED) 

851 registry.setCollectionChain("outer", ["inner"], flatten=False) 

852 self.assertEqual(list(registry.getCollectionChain("outer")), ["inner"]) 

853 registry.setCollectionChain("outer", ["inner"], flatten=True) 

854 self.assertEqual(list(registry.getCollectionChain("outer")), ["innermost"]) 

855 

856 def testBasicTransaction(self): 

857 """Test that all operations within a single transaction block are 

858 rolled back if an exception propagates out of the block. 

859 """ 

860 registry = self.makeRegistry() 

861 storageClass = StorageClass("testDatasetType") 

862 registry.storageClasses.registerStorageClass(storageClass) 

863 with registry.transaction(): 

864 registry.insertDimensionData("instrument", {"name": "Cam1", "class_name": "A"}) 

865 with self.assertRaises(ValueError): 

866 with registry.transaction(): 

867 registry.insertDimensionData("instrument", {"name": "Cam2"}) 

868 raise ValueError("Oops, something went wrong") 

869 # Cam1 should exist 

870 self.assertEqual(registry.expandDataId(instrument="Cam1").records["instrument"].class_name, "A") 

871 # But Cam2 and Cam3 should both not exist 

872 with self.assertRaises(DataIdValueError): 

873 registry.expandDataId(instrument="Cam2") 

874 with self.assertRaises(DataIdValueError): 

875 registry.expandDataId(instrument="Cam3") 

876 

877 def testNestedTransaction(self): 

878 """Test that operations within a transaction block are not rolled back 

879 if an exception propagates out of an inner transaction block and is 

880 then caught. 

881 """ 

882 registry = self.makeRegistry() 

883 dimension = registry.dimensions["instrument"] 

884 dataId1 = {"instrument": "DummyCam"} 

885 dataId2 = {"instrument": "DummyCam2"} 

886 checkpointReached = False 

887 with registry.transaction(): 

888 # This should be added and (ultimately) committed. 

889 registry.insertDimensionData(dimension, dataId1) 

890 with self.assertRaises(sqlalchemy.exc.IntegrityError): 

891 with registry.transaction(savepoint=True): 

892 # This does not conflict, and should succeed (but not 

893 # be committed). 

894 registry.insertDimensionData(dimension, dataId2) 

895 checkpointReached = True 

896 # This should conflict and raise, triggerring a rollback 

897 # of the previous insertion within the same transaction 

898 # context, but not the original insertion in the outer 

899 # block. 

900 registry.insertDimensionData(dimension, dataId1) 

901 self.assertTrue(checkpointReached) 

902 self.assertIsNotNone(registry.expandDataId(dataId1, graph=dimension.graph)) 

903 with self.assertRaises(DataIdValueError): 

904 registry.expandDataId(dataId2, graph=dimension.graph) 

905 

906 def testInstrumentDimensions(self): 

907 """Test queries involving only instrument dimensions, with no joins to 

908 skymap.""" 

909 registry = self.makeRegistry() 

910 

911 # need a bunch of dimensions and datasets for test 

912 registry.insertDimensionData( 

913 "instrument", dict(name="DummyCam", visit_max=25, exposure_max=300, detector_max=6) 

914 ) 

915 registry.insertDimensionData( 

916 "physical_filter", 

917 dict(instrument="DummyCam", name="dummy_r", band="r"), 

918 dict(instrument="DummyCam", name="dummy_i", band="i"), 

919 ) 

920 registry.insertDimensionData( 

921 "detector", *[dict(instrument="DummyCam", id=i, full_name=str(i)) for i in range(1, 6)] 

922 ) 

923 registry.insertDimensionData( 

924 "visit_system", 

925 dict(instrument="DummyCam", id=1, name="default"), 

926 ) 

927 registry.insertDimensionData( 

928 "visit", 

929 dict(instrument="DummyCam", id=10, name="ten", physical_filter="dummy_i", visit_system=1), 

930 dict(instrument="DummyCam", id=11, name="eleven", physical_filter="dummy_r", visit_system=1), 

931 dict(instrument="DummyCam", id=20, name="twelve", physical_filter="dummy_r", visit_system=1), 

932 ) 

933 registry.insertDimensionData( 

934 "exposure", 

935 dict(instrument="DummyCam", id=100, obs_id="100", physical_filter="dummy_i"), 

936 dict(instrument="DummyCam", id=101, obs_id="101", physical_filter="dummy_i"), 

937 dict(instrument="DummyCam", id=110, obs_id="110", physical_filter="dummy_r"), 

938 dict(instrument="DummyCam", id=111, obs_id="111", physical_filter="dummy_r"), 

939 dict(instrument="DummyCam", id=200, obs_id="200", physical_filter="dummy_r"), 

940 dict(instrument="DummyCam", id=201, obs_id="201", physical_filter="dummy_r"), 

941 ) 

942 registry.insertDimensionData( 

943 "visit_definition", 

944 dict(instrument="DummyCam", exposure=100, visit_system=1, visit=10), 

945 dict(instrument="DummyCam", exposure=101, visit_system=1, visit=10), 

946 dict(instrument="DummyCam", exposure=110, visit_system=1, visit=11), 

947 dict(instrument="DummyCam", exposure=111, visit_system=1, visit=11), 

948 dict(instrument="DummyCam", exposure=200, visit_system=1, visit=20), 

949 dict(instrument="DummyCam", exposure=201, visit_system=1, visit=20), 

950 ) 

951 # dataset types 

952 run1 = "test1_r" 

953 run2 = "test2_r" 

954 tagged2 = "test2_t" 

955 registry.registerRun(run1) 

956 registry.registerRun(run2) 

957 registry.registerCollection(tagged2) 

958 storageClass = StorageClass("testDataset") 

959 registry.storageClasses.registerStorageClass(storageClass) 

960 rawType = DatasetType( 

961 name="RAW", 

962 dimensions=registry.dimensions.extract(("instrument", "exposure", "detector")), 

963 storageClass=storageClass, 

964 ) 

965 registry.registerDatasetType(rawType) 

966 calexpType = DatasetType( 

967 name="CALEXP", 

968 dimensions=registry.dimensions.extract(("instrument", "visit", "detector")), 

969 storageClass=storageClass, 

970 ) 

971 registry.registerDatasetType(calexpType) 

972 

973 # add pre-existing datasets 

974 for exposure in (100, 101, 110, 111): 

975 for detector in (1, 2, 3): 

976 # note that only 3 of 5 detectors have datasets 

977 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector) 

978 (ref,) = registry.insertDatasets(rawType, dataIds=[dataId], run=run1) 

979 # exposures 100 and 101 appear in both run1 and tagged2. 

980 # 100 has different datasets in the different collections 

981 # 101 has the same dataset in both collections. 

982 if exposure == 100: 

983 (ref,) = registry.insertDatasets(rawType, dataIds=[dataId], run=run2) 

984 if exposure in (100, 101): 

985 registry.associate(tagged2, [ref]) 

986 # Add pre-existing datasets to tagged2. 

987 for exposure in (200, 201): 

988 for detector in (3, 4, 5): 

989 # note that only 3 of 5 detectors have datasets 

990 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector) 

991 (ref,) = registry.insertDatasets(rawType, dataIds=[dataId], run=run2) 

992 registry.associate(tagged2, [ref]) 

993 

994 dimensions = DimensionGraph( 

995 registry.dimensions, dimensions=(rawType.dimensions.required | calexpType.dimensions.required) 

996 ) 

997 # Test that single dim string works as well as list of str 

998 rows = registry.queryDataIds("visit", datasets=rawType, collections=run1).expanded().toSet() 

999 rowsI = registry.queryDataIds(["visit"], datasets=rawType, collections=run1).expanded().toSet() 

1000 self.assertEqual(rows, rowsI) 

1001 # with empty expression 

1002 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1).expanded().toSet() 

1003 self.assertEqual(len(rows), 4 * 3) # 4 exposures times 3 detectors 

1004 for dataId in rows: 

1005 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit")) 

1006 packer1 = registry.dimensions.makePacker("visit_detector", dataId) 

1007 packer2 = registry.dimensions.makePacker("exposure_detector", dataId) 

1008 self.assertEqual( 

1009 packer1.unpack(packer1.pack(dataId)), 

1010 DataCoordinate.standardize(dataId, graph=packer1.dimensions), 

1011 ) 

1012 self.assertEqual( 

1013 packer2.unpack(packer2.pack(dataId)), 

1014 DataCoordinate.standardize(dataId, graph=packer2.dimensions), 

1015 ) 

1016 self.assertNotEqual(packer1.pack(dataId), packer2.pack(dataId)) 

1017 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101, 110, 111)) 

1018 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 11)) 

1019 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3)) 

1020 

1021 # second collection 

1022 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=tagged2).toSet() 

1023 self.assertEqual(len(rows), 4 * 3) # 4 exposures times 3 detectors 

1024 for dataId in rows: 

1025 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit")) 

1026 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101, 200, 201)) 

1027 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 20)) 

1028 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3, 4, 5)) 

1029 

1030 # with two input datasets 

1031 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=[run1, tagged2]).toSet() 

1032 self.assertEqual(len(set(rows)), 6 * 3) # 6 exposures times 3 detectors; set needed to de-dupe 

1033 for dataId in rows: 

1034 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit")) 

1035 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101, 110, 111, 200, 201)) 

1036 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 11, 20)) 

1037 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3, 4, 5)) 

1038 

1039 # limit to single visit 

1040 rows = registry.queryDataIds( 

1041 dimensions, datasets=rawType, collections=run1, where="visit = 10", instrument="DummyCam" 

1042 ).toSet() 

1043 self.assertEqual(len(rows), 2 * 3) # 2 exposures times 3 detectors 

1044 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101)) 

1045 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10,)) 

1046 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3)) 

1047 

1048 # more limiting expression, using link names instead of Table.column 

1049 rows = registry.queryDataIds( 

1050 dimensions, 

1051 datasets=rawType, 

1052 collections=run1, 

1053 where="visit = 10 and detector > 1 and 'DummyCam'=instrument", 

1054 ).toSet() 

1055 self.assertEqual(len(rows), 2 * 2) # 2 exposures times 2 detectors 

1056 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101)) 

1057 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10,)) 

1058 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (2, 3)) 

1059 

1060 # queryDataIds with only one of `datasets` and `collections` is an 

1061 # error. 

1062 with self.assertRaises(CollectionError): 

1063 registry.queryDataIds(dimensions, datasets=rawType) 

1064 with self.assertRaises(ArgumentError): 

1065 registry.queryDataIds(dimensions, collections=run1) 

1066 

1067 # expression excludes everything 

1068 rows = registry.queryDataIds( 

1069 dimensions, datasets=rawType, collections=run1, where="visit > 1000", instrument="DummyCam" 

1070 ).toSet() 

1071 self.assertEqual(len(rows), 0) 

1072 

1073 # Selecting by physical_filter, this is not in the dimensions, but it 

1074 # is a part of the full expression so it should work too. 

1075 rows = registry.queryDataIds( 

1076 dimensions, 

1077 datasets=rawType, 

1078 collections=run1, 

1079 where="physical_filter = 'dummy_r'", 

1080 instrument="DummyCam", 

1081 ).toSet() 

1082 self.assertEqual(len(rows), 2 * 3) # 2 exposures times 3 detectors 

1083 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (110, 111)) 

1084 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (11,)) 

1085 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3)) 

1086 

1087 def testSkyMapDimensions(self): 

1088 """Tests involving only skymap dimensions, no joins to instrument.""" 

1089 registry = self.makeRegistry() 

1090 

1091 # need a bunch of dimensions and datasets for test, we want 

1092 # "band" in the test so also have to add physical_filter 

1093 # dimensions 

1094 registry.insertDimensionData("instrument", dict(instrument="DummyCam")) 

1095 registry.insertDimensionData( 

1096 "physical_filter", 

1097 dict(instrument="DummyCam", name="dummy_r", band="r"), 

1098 dict(instrument="DummyCam", name="dummy_i", band="i"), 

1099 ) 

1100 registry.insertDimensionData("skymap", dict(name="DummyMap", hash="sha!".encode("utf8"))) 

1101 for tract in range(10): 

1102 registry.insertDimensionData("tract", dict(skymap="DummyMap", id=tract)) 

1103 registry.insertDimensionData( 

1104 "patch", 

1105 *[dict(skymap="DummyMap", tract=tract, id=patch, cell_x=0, cell_y=0) for patch in range(10)], 

1106 ) 

1107 

1108 # dataset types 

1109 run = "tésτ" 

1110 registry.registerRun(run) 

1111 storageClass = StorageClass("testDataset") 

1112 registry.storageClasses.registerStorageClass(storageClass) 

1113 calexpType = DatasetType( 

1114 name="deepCoadd_calexp", 

1115 dimensions=registry.dimensions.extract(("skymap", "tract", "patch", "band")), 

1116 storageClass=storageClass, 

1117 ) 

1118 registry.registerDatasetType(calexpType) 

1119 mergeType = DatasetType( 

1120 name="deepCoadd_mergeDet", 

1121 dimensions=registry.dimensions.extract(("skymap", "tract", "patch")), 

1122 storageClass=storageClass, 

1123 ) 

1124 registry.registerDatasetType(mergeType) 

1125 measType = DatasetType( 

1126 name="deepCoadd_meas", 

1127 dimensions=registry.dimensions.extract(("skymap", "tract", "patch", "band")), 

1128 storageClass=storageClass, 

1129 ) 

1130 registry.registerDatasetType(measType) 

1131 

1132 dimensions = DimensionGraph( 

1133 registry.dimensions, 

1134 dimensions=( 

1135 calexpType.dimensions.required | mergeType.dimensions.required | measType.dimensions.required 

1136 ), 

1137 ) 

1138 

1139 # add pre-existing datasets 

1140 for tract in (1, 3, 5): 

1141 for patch in (2, 4, 6, 7): 

1142 dataId = dict(skymap="DummyMap", tract=tract, patch=patch) 

1143 registry.insertDatasets(mergeType, dataIds=[dataId], run=run) 

1144 for aFilter in ("i", "r"): 

1145 dataId = dict(skymap="DummyMap", tract=tract, patch=patch, band=aFilter) 

1146 registry.insertDatasets(calexpType, dataIds=[dataId], run=run) 

1147 

1148 # with empty expression 

1149 rows = registry.queryDataIds(dimensions, datasets=[calexpType, mergeType], collections=run).toSet() 

1150 self.assertEqual(len(rows), 3 * 4 * 2) # 4 tracts x 4 patches x 2 filters 

1151 for dataId in rows: 

1152 self.assertCountEqual(dataId.keys(), ("skymap", "tract", "patch", "band")) 

1153 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 3, 5)) 

1154 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 4, 6, 7)) 

1155 self.assertCountEqual(set(dataId["band"] for dataId in rows), ("i", "r")) 

1156 

1157 # limit to 2 tracts and 2 patches 

1158 rows = registry.queryDataIds( 

1159 dimensions, 

1160 datasets=[calexpType, mergeType], 

1161 collections=run, 

1162 where="tract IN (1, 5) AND patch IN (2, 7)", 

1163 skymap="DummyMap", 

1164 ).toSet() 

1165 self.assertEqual(len(rows), 2 * 2 * 2) # 2 tracts x 2 patches x 2 filters 

1166 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 5)) 

1167 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 7)) 

1168 self.assertCountEqual(set(dataId["band"] for dataId in rows), ("i", "r")) 

1169 

1170 # limit to single filter 

1171 rows = registry.queryDataIds( 

1172 dimensions, datasets=[calexpType, mergeType], collections=run, where="band = 'i'" 

1173 ).toSet() 

1174 self.assertEqual(len(rows), 3 * 4 * 1) # 4 tracts x 4 patches x 2 filters 

1175 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 3, 5)) 

1176 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 4, 6, 7)) 

1177 self.assertCountEqual(set(dataId["band"] for dataId in rows), ("i",)) 

1178 

1179 # Specifying non-existing skymap is an exception 

1180 with self.assertRaisesRegex(DataIdValueError, "Unknown values specified for governor dimension"): 

1181 rows = registry.queryDataIds( 

1182 dimensions, datasets=[calexpType, mergeType], collections=run, where="skymap = 'Mars'" 

1183 ).toSet() 

1184 

1185 def testSpatialJoin(self): 

1186 """Test queries that involve spatial overlap joins.""" 

1187 registry = self.makeRegistry() 

1188 self.loadData(registry, "hsc-rc2-subset.yaml") 

1189 

1190 # Dictionary of spatial DatabaseDimensionElements, keyed by the name of 

1191 # the TopologicalFamily they belong to. We'll relate all elements in 

1192 # each family to all of the elements in each other family. 

1193 families = defaultdict(set) 

1194 # Dictionary of {element.name: {dataId: region}}. 

1195 regions = {} 

1196 for element in registry.dimensions.getDatabaseElements(): 

1197 if element.spatial is not None: 

1198 families[element.spatial.name].add(element) 

1199 regions[element.name] = { 

1200 record.dataId: record.region for record in registry.queryDimensionRecords(element) 

1201 } 

1202 

1203 # If this check fails, it's not necessarily a problem - it may just be 

1204 # a reasonable change to the default dimension definitions - but the 

1205 # test below depends on there being more than one family to do anything 

1206 # useful. 

1207 self.assertEqual(len(families), 2) 

1208 

1209 # Overlap DatabaseDimensionElements with each other. 

1210 for family1, family2 in itertools.combinations(families, 2): 

1211 for element1, element2 in itertools.product(families[family1], families[family2]): 

1212 graph = DimensionGraph.union(element1.graph, element2.graph) 

1213 # Construct expected set of overlapping data IDs via a 

1214 # brute-force comparison of the regions we've already fetched. 

1215 expected = { 

1216 DataCoordinate.standardize({**dataId1.byName(), **dataId2.byName()}, graph=graph) 

1217 for (dataId1, region1), (dataId2, region2) in itertools.product( 

1218 regions[element1.name].items(), regions[element2.name].items() 

1219 ) 

1220 if not region1.isDisjointFrom(region2) 

1221 } 

1222 self.assertGreater(len(expected), 2, msg="Test that we aren't just comparing empty sets.") 

1223 queried = set(registry.queryDataIds(graph)) 

1224 self.assertEqual(expected, queried) 

1225 

1226 # Overlap each DatabaseDimensionElement with the commonSkyPix system. 

1227 commonSkyPix = registry.dimensions.commonSkyPix 

1228 for elementName, regions in regions.items(): 

1229 graph = DimensionGraph.union(registry.dimensions[elementName].graph, commonSkyPix.graph) 

1230 expected = set() 

1231 for dataId, region in regions.items(): 

1232 for begin, end in commonSkyPix.pixelization.envelope(region): 

1233 expected.update( 

1234 DataCoordinate.standardize({commonSkyPix.name: index, **dataId.byName()}, graph=graph) 

1235 for index in range(begin, end) 

1236 ) 

1237 self.assertGreater(len(expected), 2, msg="Test that we aren't just comparing empty sets.") 

1238 queried = set(registry.queryDataIds(graph)) 

1239 self.assertEqual(expected, queried) 

1240 

1241 def testAbstractQuery(self): 

1242 """Test that we can run a query that just lists the known 

1243 bands. This is tricky because band is 

1244 backed by a query against physical_filter. 

1245 """ 

1246 registry = self.makeRegistry() 

1247 registry.insertDimensionData("instrument", dict(name="DummyCam")) 

1248 registry.insertDimensionData( 

1249 "physical_filter", 

1250 dict(instrument="DummyCam", name="dummy_i", band="i"), 

1251 dict(instrument="DummyCam", name="dummy_i2", band="i"), 

1252 dict(instrument="DummyCam", name="dummy_r", band="r"), 

1253 ) 

1254 rows = registry.queryDataIds(["band"]).toSet() 

1255 self.assertCountEqual( 

1256 rows, 

1257 [ 

1258 DataCoordinate.standardize(band="i", universe=registry.dimensions), 

1259 DataCoordinate.standardize(band="r", universe=registry.dimensions), 

1260 ], 

1261 ) 

1262 

1263 def testAttributeManager(self): 

1264 """Test basic functionality of attribute manager.""" 

1265 # number of attributes with schema versions in a fresh database, 

1266 # 6 managers with 3 records per manager, plus config for dimensions 

1267 VERSION_COUNT = 6 * 3 + 1 

1268 

1269 registry = self.makeRegistry() 

1270 attributes = registry._managers.attributes 

1271 

1272 # check what get() returns for non-existing key 

1273 self.assertIsNone(attributes.get("attr")) 

1274 self.assertEqual(attributes.get("attr", ""), "") 

1275 self.assertEqual(attributes.get("attr", "Value"), "Value") 

1276 self.assertEqual(len(list(attributes.items())), VERSION_COUNT) 

1277 

1278 # cannot store empty key or value 

1279 with self.assertRaises(ValueError): 

1280 attributes.set("", "value") 

1281 with self.assertRaises(ValueError): 

1282 attributes.set("attr", "") 

1283 

1284 # set value of non-existing key 

1285 attributes.set("attr", "value") 

1286 self.assertEqual(len(list(attributes.items())), VERSION_COUNT + 1) 

1287 self.assertEqual(attributes.get("attr"), "value") 

1288 

1289 # update value of existing key 

1290 with self.assertRaises(ButlerAttributeExistsError): 

1291 attributes.set("attr", "value2") 

1292 

1293 attributes.set("attr", "value2", force=True) 

1294 self.assertEqual(len(list(attributes.items())), VERSION_COUNT + 1) 

1295 self.assertEqual(attributes.get("attr"), "value2") 

1296 

1297 # delete existing key 

1298 self.assertTrue(attributes.delete("attr")) 

1299 self.assertEqual(len(list(attributes.items())), VERSION_COUNT) 

1300 

1301 # delete non-existing key 

1302 self.assertFalse(attributes.delete("non-attr")) 

1303 

1304 # store bunch of keys and get the list back 

1305 data = [ 

1306 ("version.core", "1.2.3"), 

1307 ("version.dimensions", "3.2.1"), 

1308 ("config.managers.opaque", "ByNameOpaqueTableStorageManager"), 

1309 ] 

1310 for key, value in data: 

1311 attributes.set(key, value) 

1312 items = dict(attributes.items()) 

1313 for key, value in data: 

1314 self.assertEqual(items[key], value) 

1315 

1316 def testQueryDatasetsDeduplication(self): 

1317 """Test that the findFirst option to queryDatasets selects datasets 

1318 from collections in the order given". 

1319 """ 

1320 registry = self.makeRegistry() 

1321 self.loadData(registry, "base.yaml") 

1322 self.loadData(registry, "datasets.yaml") 

1323 self.assertCountEqual( 

1324 list(registry.queryDatasets("bias", collections=["imported_g", "imported_r"])), 

1325 [ 

1326 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"), 

1327 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g"), 

1328 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g"), 

1329 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r"), 

1330 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r"), 

1331 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

1332 ], 

1333 ) 

1334 self.assertCountEqual( 

1335 list(registry.queryDatasets("bias", collections=["imported_g", "imported_r"], findFirst=True)), 

1336 [ 

1337 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"), 

1338 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g"), 

1339 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g"), 

1340 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

1341 ], 

1342 ) 

1343 self.assertCountEqual( 

1344 list(registry.queryDatasets("bias", collections=["imported_r", "imported_g"], findFirst=True)), 

1345 [ 

1346 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"), 

1347 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r"), 

1348 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r"), 

1349 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

1350 ], 

1351 ) 

1352 

1353 def testQueryResults(self): 

1354 """Test querying for data IDs and then manipulating the QueryResults 

1355 object returned to perform other queries. 

1356 """ 

1357 registry = self.makeRegistry() 

1358 self.loadData(registry, "base.yaml") 

1359 self.loadData(registry, "datasets.yaml") 

1360 bias = registry.getDatasetType("bias") 

1361 flat = registry.getDatasetType("flat") 

1362 # Obtain expected results from methods other than those we're testing 

1363 # here. That includes: 

1364 # - the dimensions of the data IDs we want to query: 

1365 expectedGraph = DimensionGraph(registry.dimensions, names=["detector", "physical_filter"]) 

1366 # - the dimensions of some other data IDs we'll extract from that: 

1367 expectedSubsetGraph = DimensionGraph(registry.dimensions, names=["detector"]) 

1368 # - the data IDs we expect to obtain from the first queries: 

1369 expectedDataIds = DataCoordinateSet( 

1370 { 

1371 DataCoordinate.standardize( 

1372 instrument="Cam1", detector=d, physical_filter=p, universe=registry.dimensions 

1373 ) 

1374 for d, p in itertools.product({1, 2, 3}, {"Cam1-G", "Cam1-R1", "Cam1-R2"}) 

1375 }, 

1376 graph=expectedGraph, 

1377 hasFull=False, 

1378 hasRecords=False, 

1379 ) 

1380 # - the flat datasets we expect to find from those data IDs, in just 

1381 # one collection (so deduplication is irrelevant): 

1382 expectedFlats = [ 

1383 registry.findDataset( 

1384 flat, instrument="Cam1", detector=1, physical_filter="Cam1-R1", collections="imported_r" 

1385 ), 

1386 registry.findDataset( 

1387 flat, instrument="Cam1", detector=2, physical_filter="Cam1-R1", collections="imported_r" 

1388 ), 

1389 registry.findDataset( 

1390 flat, instrument="Cam1", detector=3, physical_filter="Cam1-R2", collections="imported_r" 

1391 ), 

1392 ] 

1393 # - the data IDs we expect to extract from that: 

1394 expectedSubsetDataIds = expectedDataIds.subset(expectedSubsetGraph) 

1395 # - the bias datasets we expect to find from those data IDs, after we 

1396 # subset-out the physical_filter dimension, both with duplicates: 

1397 expectedAllBiases = [ 

1398 registry.findDataset(bias, instrument="Cam1", detector=1, collections="imported_g"), 

1399 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_g"), 

1400 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_g"), 

1401 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_r"), 

1402 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_r"), 

1403 ] 

1404 # - ...and without duplicates: 

1405 expectedDeduplicatedBiases = [ 

1406 registry.findDataset(bias, instrument="Cam1", detector=1, collections="imported_g"), 

1407 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_r"), 

1408 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_r"), 

1409 ] 

1410 # Test against those expected results, using a "lazy" query for the 

1411 # data IDs (which re-executes that query each time we use it to do 

1412 # something new). 

1413 dataIds = registry.queryDataIds( 

1414 ["detector", "physical_filter"], 

1415 where="detector.purpose = 'SCIENCE'", # this rejects detector=4 

1416 instrument="Cam1", 

1417 ) 

1418 self.assertEqual(dataIds.graph, expectedGraph) 

1419 self.assertEqual(dataIds.toSet(), expectedDataIds) 

1420 self.assertCountEqual( 

1421 list( 

1422 dataIds.findDatasets( 

1423 flat, 

1424 collections=["imported_r"], 

1425 ) 

1426 ), 

1427 expectedFlats, 

1428 ) 

1429 subsetDataIds = dataIds.subset(expectedSubsetGraph, unique=True) 

1430 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph) 

1431 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds) 

1432 self.assertCountEqual( 

1433 list(subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=False)), 

1434 expectedAllBiases, 

1435 ) 

1436 self.assertCountEqual( 

1437 list(subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=True)), 

1438 expectedDeduplicatedBiases, 

1439 ) 

1440 

1441 # Check dimensions match. 

1442 with self.assertRaises(ValueError): 

1443 subsetDataIds.findDatasets("flat", collections=["imported_r", "imported_g"], findFirst=True) 

1444 

1445 # Use a component dataset type. 

1446 self.assertCountEqual( 

1447 list( 

1448 subsetDataIds.findDatasets( 

1449 bias.makeComponentDatasetType("image"), 

1450 collections=["imported_r", "imported_g"], 

1451 findFirst=False, 

1452 ) 

1453 ), 

1454 [ref.makeComponentRef("image") for ref in expectedAllBiases], 

1455 ) 

1456 

1457 # Use a named dataset type that does not exist and a dataset type 

1458 # object that does not exist. 

1459 unknown_type = DatasetType("not_known", dimensions=bias.dimensions, storageClass="Exposure") 

1460 unknown_component_type = unknown_type.makeComponentDatasetType("image") 

1461 

1462 # Four combinations of unknown dataset type need to be tested. 

1463 # Composite and component and string name vs dataset type object. 

1464 test_type: Union[str, DatasetType] 

1465 for test_type, test_type_name in ( 

1466 (unknown_type, unknown_type.name), 

1467 (unknown_type.name, unknown_type.name), 

1468 (unknown_component_type, unknown_type.name), 

1469 (unknown_component_type.name, unknown_component_type.name), 

1470 ): 

1471 with self.assertRaisesRegex(DatasetTypeError, expected_regex=test_type_name): 

1472 list( 

1473 subsetDataIds.findDatasets( 

1474 test_type, collections=["imported_r", "imported_g"], findFirst=True 

1475 ) 

1476 ) 

1477 

1478 # Materialize the bias dataset queries (only) by putting the results 

1479 # into temporary tables, then repeat those tests. 

1480 with subsetDataIds.findDatasets( 

1481 bias, collections=["imported_r", "imported_g"], findFirst=False 

1482 ).materialize() as biases: 

1483 self.assertCountEqual(list(biases), expectedAllBiases) 

1484 with subsetDataIds.findDatasets( 

1485 bias, collections=["imported_r", "imported_g"], findFirst=True 

1486 ).materialize() as biases: 

1487 self.assertCountEqual(list(biases), expectedDeduplicatedBiases) 

1488 # Materialize the data ID subset query, but not the dataset queries. 

1489 with subsetDataIds.materialize() as subsetDataIds: 

1490 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph) 

1491 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds) 

1492 self.assertCountEqual( 

1493 list( 

1494 subsetDataIds.findDatasets( 

1495 bias, collections=["imported_r", "imported_g"], findFirst=False 

1496 ) 

1497 ), 

1498 expectedAllBiases, 

1499 ) 

1500 self.assertCountEqual( 

1501 list( 

1502 subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=True) 

1503 ), 

1504 expectedDeduplicatedBiases, 

1505 ) 

1506 # Materialize the dataset queries, too. 

1507 with subsetDataIds.findDatasets( 

1508 bias, collections=["imported_r", "imported_g"], findFirst=False 

1509 ).materialize() as biases: 

1510 self.assertCountEqual(list(biases), expectedAllBiases) 

1511 with subsetDataIds.findDatasets( 

1512 bias, collections=["imported_r", "imported_g"], findFirst=True 

1513 ).materialize() as biases: 

1514 self.assertCountEqual(list(biases), expectedDeduplicatedBiases) 

1515 # Materialize the original query, but none of the follow-up queries. 

1516 with dataIds.materialize() as dataIds: 

1517 self.assertEqual(dataIds.graph, expectedGraph) 

1518 self.assertEqual(dataIds.toSet(), expectedDataIds) 

1519 self.assertCountEqual( 

1520 list( 

1521 dataIds.findDatasets( 

1522 flat, 

1523 collections=["imported_r"], 

1524 ) 

1525 ), 

1526 expectedFlats, 

1527 ) 

1528 subsetDataIds = dataIds.subset(expectedSubsetGraph, unique=True) 

1529 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph) 

1530 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds) 

1531 self.assertCountEqual( 

1532 list( 

1533 subsetDataIds.findDatasets( 

1534 bias, collections=["imported_r", "imported_g"], findFirst=False 

1535 ) 

1536 ), 

1537 expectedAllBiases, 

1538 ) 

1539 self.assertCountEqual( 

1540 list( 

1541 subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=True) 

1542 ), 

1543 expectedDeduplicatedBiases, 

1544 ) 

1545 # Materialize just the bias dataset queries. 

1546 with subsetDataIds.findDatasets( 

1547 bias, collections=["imported_r", "imported_g"], findFirst=False 

1548 ).materialize() as biases: 

1549 self.assertCountEqual(list(biases), expectedAllBiases) 

1550 with subsetDataIds.findDatasets( 

1551 bias, collections=["imported_r", "imported_g"], findFirst=True 

1552 ).materialize() as biases: 

1553 self.assertCountEqual(list(biases), expectedDeduplicatedBiases) 

1554 # Materialize the subset data ID query, but not the dataset 

1555 # queries. 

1556 with subsetDataIds.materialize() as subsetDataIds: 

1557 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph) 

1558 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds) 

1559 self.assertCountEqual( 

1560 list( 

1561 subsetDataIds.findDatasets( 

1562 bias, collections=["imported_r", "imported_g"], findFirst=False 

1563 ) 

1564 ), 

1565 expectedAllBiases, 

1566 ) 

1567 self.assertCountEqual( 

1568 list( 

1569 subsetDataIds.findDatasets( 

1570 bias, collections=["imported_r", "imported_g"], findFirst=True 

1571 ) 

1572 ), 

1573 expectedDeduplicatedBiases, 

1574 ) 

1575 # Materialize the bias dataset queries, too, so now we're 

1576 # materializing every single step. 

1577 with subsetDataIds.findDatasets( 

1578 bias, collections=["imported_r", "imported_g"], findFirst=False 

1579 ).materialize() as biases: 

1580 self.assertCountEqual(list(biases), expectedAllBiases) 

1581 with subsetDataIds.findDatasets( 

1582 bias, collections=["imported_r", "imported_g"], findFirst=True 

1583 ).materialize() as biases: 

1584 self.assertCountEqual(list(biases), expectedDeduplicatedBiases) 

1585 

1586 def testEmptyDimensionsQueries(self): 

1587 """Test Query and QueryResults objects in the case where there are no 

1588 dimensions. 

1589 """ 

1590 # Set up test data: one dataset type, two runs, one dataset in each. 

1591 registry = self.makeRegistry() 

1592 self.loadData(registry, "base.yaml") 

1593 schema = DatasetType("schema", dimensions=registry.dimensions.empty, storageClass="Catalog") 

1594 registry.registerDatasetType(schema) 

1595 dataId = DataCoordinate.makeEmpty(registry.dimensions) 

1596 run1 = "run1" 

1597 run2 = "run2" 

1598 registry.registerRun(run1) 

1599 registry.registerRun(run2) 

1600 (dataset1,) = registry.insertDatasets(schema, dataIds=[dataId], run=run1) 

1601 (dataset2,) = registry.insertDatasets(schema, dataIds=[dataId], run=run2) 

1602 # Query directly for both of the datasets, and each one, one at a time. 

1603 self.checkQueryResults( 

1604 registry.queryDatasets(schema, collections=[run1, run2], findFirst=False), [dataset1, dataset2] 

1605 ) 

1606 self.checkQueryResults( 

1607 registry.queryDatasets(schema, collections=[run1, run2], findFirst=True), 

1608 [dataset1], 

1609 ) 

1610 self.checkQueryResults( 

1611 registry.queryDatasets(schema, collections=[run2, run1], findFirst=True), 

1612 [dataset2], 

1613 ) 

1614 # Query for data IDs with no dimensions. 

1615 dataIds = registry.queryDataIds([]) 

1616 self.checkQueryResults(dataIds, [dataId]) 

1617 # Use queried data IDs to find the datasets. 

1618 self.checkQueryResults( 

1619 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False), 

1620 [dataset1, dataset2], 

1621 ) 

1622 self.checkQueryResults( 

1623 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), 

1624 [dataset1], 

1625 ) 

1626 self.checkQueryResults( 

1627 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), 

1628 [dataset2], 

1629 ) 

1630 # Now materialize the data ID query results and repeat those tests. 

1631 with dataIds.materialize() as dataIds: 

1632 self.checkQueryResults(dataIds, [dataId]) 

1633 self.checkQueryResults( 

1634 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), 

1635 [dataset1], 

1636 ) 

1637 self.checkQueryResults( 

1638 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), 

1639 [dataset2], 

1640 ) 

1641 # Query for non-empty data IDs, then subset that to get the empty one. 

1642 # Repeat the above tests starting from that. 

1643 dataIds = registry.queryDataIds(["instrument"]).subset(registry.dimensions.empty, unique=True) 

1644 self.checkQueryResults(dataIds, [dataId]) 

1645 self.checkQueryResults( 

1646 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False), 

1647 [dataset1, dataset2], 

1648 ) 

1649 self.checkQueryResults( 

1650 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), 

1651 [dataset1], 

1652 ) 

1653 self.checkQueryResults( 

1654 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), 

1655 [dataset2], 

1656 ) 

1657 with dataIds.materialize() as dataIds: 

1658 self.checkQueryResults(dataIds, [dataId]) 

1659 self.checkQueryResults( 

1660 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False), 

1661 [dataset1, dataset2], 

1662 ) 

1663 self.checkQueryResults( 

1664 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), 

1665 [dataset1], 

1666 ) 

1667 self.checkQueryResults( 

1668 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), 

1669 [dataset2], 

1670 ) 

1671 # Query for non-empty data IDs, then materialize, then subset to get 

1672 # the empty one. Repeat again. 

1673 with registry.queryDataIds(["instrument"]).materialize() as nonEmptyDataIds: 

1674 dataIds = nonEmptyDataIds.subset(registry.dimensions.empty, unique=True) 

1675 self.checkQueryResults(dataIds, [dataId]) 

1676 self.checkQueryResults( 

1677 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False), 

1678 [dataset1, dataset2], 

1679 ) 

1680 self.checkQueryResults( 

1681 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), 

1682 [dataset1], 

1683 ) 

1684 self.checkQueryResults( 

1685 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), 

1686 [dataset2], 

1687 ) 

1688 with dataIds.materialize() as dataIds: 

1689 self.checkQueryResults(dataIds, [dataId]) 

1690 self.checkQueryResults( 

1691 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False), 

1692 [dataset1, dataset2], 

1693 ) 

1694 self.checkQueryResults( 

1695 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), 

1696 [dataset1], 

1697 ) 

1698 self.checkQueryResults( 

1699 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), 

1700 [dataset2], 

1701 ) 

1702 # Query for non-empty data IDs with a constraint on an empty-data-ID 

1703 # dataset that exists. 

1704 dataIds = registry.queryDataIds(["instrument"], datasets="schema", collections=...) 

1705 self.checkQueryResults( 

1706 dataIds.subset(unique=True), 

1707 [DataCoordinate.standardize(instrument="Cam1", universe=registry.dimensions)], 

1708 ) 

1709 # Again query for non-empty data IDs with a constraint on empty-data-ID 

1710 # datasets, but when the datasets don't exist. We delete the existing 

1711 # dataset and query just that collection rather than creating a new 

1712 # empty collection because this is a bit less likely for our build-time 

1713 # logic to shortcut-out (via the collection summaries), and such a 

1714 # shortcut would make this test a bit more trivial than we'd like. 

1715 registry.removeDatasets([dataset2]) 

1716 dataIds = registry.queryDataIds(["instrument"], datasets="schema", collections=run2) 

1717 self.checkQueryResults(dataIds, []) 

1718 

1719 def testDimensionDataModifications(self): 

1720 """Test that modifying dimension records via: 

1721 syncDimensionData(..., update=True) and 

1722 insertDimensionData(..., replace=True) works as expected, even in the 

1723 presence of datasets using those dimensions and spatial overlap 

1724 relationships. 

1725 """ 

1726 

1727 def unpack_range_set(ranges: lsst.sphgeom.RangeSet) -> Iterator[int]: 

1728 """Unpack a sphgeom.RangeSet into the integers it contains.""" 

1729 for begin, end in ranges: 

1730 yield from range(begin, end) 

1731 

1732 def range_set_hull( 

1733 ranges: lsst.sphgeom.RangeSet, 

1734 pixelization: lsst.sphgeom.HtmPixelization, 

1735 ) -> lsst.sphgeom.ConvexPolygon: 

1736 """Create a ConvexPolygon hull of the region defined by a set of 

1737 HTM pixelization index ranges. 

1738 """ 

1739 points = [] 

1740 for index in unpack_range_set(ranges): 

1741 points.extend(pixelization.triangle(index).getVertices()) 

1742 return lsst.sphgeom.ConvexPolygon(points) 

1743 

1744 # Use HTM to set up an initial parent region (one arbitrary trixel) 

1745 # and four child regions (the trixels within the parent at the next 

1746 # level. We'll use the parent as a tract/visit region and the children 

1747 # as its patch/visit_detector regions. 

1748 registry = self.makeRegistry() 

1749 htm6 = registry.dimensions.skypix["htm"][6].pixelization 

1750 commonSkyPix = registry.dimensions.commonSkyPix.pixelization 

1751 index = 12288 

1752 child_ranges_small = lsst.sphgeom.RangeSet(index).scaled(4) 

1753 assert htm6.universe().contains(child_ranges_small) 

1754 child_regions_small = [htm6.triangle(i) for i in unpack_range_set(child_ranges_small)] 

1755 parent_region_small = lsst.sphgeom.ConvexPolygon( 

1756 list(itertools.chain.from_iterable(c.getVertices() for c in child_regions_small)) 

1757 ) 

1758 assert all(parent_region_small.contains(c) for c in child_regions_small) 

1759 # Make a larger version of each child region, defined to be the set of 

1760 # htm6 trixels that overlap the original's bounding circle. Make a new 

1761 # parent that's the convex hull of the new children. 

1762 child_regions_large = [ 

1763 range_set_hull(htm6.envelope(c.getBoundingCircle()), htm6) for c in child_regions_small 

1764 ] 

1765 assert all(large.contains(small) for large, small in zip(child_regions_large, child_regions_small)) 

1766 parent_region_large = lsst.sphgeom.ConvexPolygon( 

1767 list(itertools.chain.from_iterable(c.getVertices() for c in child_regions_large)) 

1768 ) 

1769 assert all(parent_region_large.contains(c) for c in child_regions_large) 

1770 assert parent_region_large.contains(parent_region_small) 

1771 assert not parent_region_small.contains(parent_region_large) 

1772 assert not all(parent_region_small.contains(c) for c in child_regions_large) 

1773 # Find some commonSkyPix indices that overlap the large regions but not 

1774 # overlap the small regions. We use commonSkyPix here to make sure the 

1775 # real tests later involve what's in the database, not just post-query 

1776 # region filtering. 

1777 child_difference_indices = [] 

1778 for large, small in zip(child_regions_large, child_regions_small): 

1779 difference = list(unpack_range_set(commonSkyPix.envelope(large) - commonSkyPix.envelope(small))) 

1780 assert difference, "if this is empty, we can't test anything useful with these regions" 

1781 assert all( 

1782 not commonSkyPix.triangle(d).isDisjointFrom(large) 

1783 and commonSkyPix.triangle(d).isDisjointFrom(small) 

1784 for d in difference 

1785 ) 

1786 child_difference_indices.append(difference) 

1787 parent_difference_indices = list( 

1788 unpack_range_set( 

1789 commonSkyPix.envelope(parent_region_large) - commonSkyPix.envelope(parent_region_small) 

1790 ) 

1791 ) 

1792 assert parent_difference_indices, "if this is empty, we can't test anything useful with these regions" 

1793 assert all( 

1794 ( 

1795 not commonSkyPix.triangle(d).isDisjointFrom(parent_region_large) 

1796 and commonSkyPix.triangle(d).isDisjointFrom(parent_region_small) 

1797 ) 

1798 for d in parent_difference_indices 

1799 ) 

1800 # Now that we've finally got those regions, we'll insert the large ones 

1801 # as tract/patch dimension records. 

1802 skymap_name = "testing_v1" 

1803 registry.insertDimensionData( 

1804 "skymap", 

1805 { 

1806 "name": skymap_name, 

1807 "hash": bytes([42]), 

1808 "tract_max": 1, 

1809 "patch_nx_max": 2, 

1810 "patch_ny_max": 2, 

1811 }, 

1812 ) 

1813 registry.insertDimensionData("tract", {"skymap": skymap_name, "id": 0, "region": parent_region_large}) 

1814 registry.insertDimensionData( 

1815 "patch", 

1816 *[ 

1817 {"skymap": skymap_name, "tract": 0, "id": n, "cell_x": n % 2, "cell_y": n // 2, "region": c} 

1818 for n, c in enumerate(child_regions_large) 

1819 ], 

1820 ) 

1821 # Add at dataset that uses these dimensions to make sure that modifying 

1822 # them doesn't disrupt foreign keys (need to make sure DB doesn't 

1823 # implement insert with replace=True as delete-then-insert). 

1824 dataset_type = DatasetType( 

1825 "coadd", 

1826 dimensions=["tract", "patch"], 

1827 universe=registry.dimensions, 

1828 storageClass="Exposure", 

1829 ) 

1830 registry.registerDatasetType(dataset_type) 

1831 registry.registerCollection("the_run", CollectionType.RUN) 

1832 registry.insertDatasets( 

1833 dataset_type, 

1834 [{"skymap": skymap_name, "tract": 0, "patch": 2}], 

1835 run="the_run", 

1836 ) 

1837 # Query for tracts and patches that overlap some "difference" htm9 

1838 # pixels; there should be overlaps, because the database has 

1839 # the "large" suite of regions. 

1840 self.assertEqual( 

1841 {0}, 

1842 { 

1843 data_id["tract"] 

1844 for data_id in registry.queryDataIds( 

1845 ["tract"], 

1846 skymap=skymap_name, 

1847 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]}, 

1848 ) 

1849 }, 

1850 ) 

1851 for patch_id, patch_difference_indices in enumerate(child_difference_indices): 

1852 self.assertIn( 

1853 patch_id, 

1854 { 

1855 data_id["patch"] 

1856 for data_id in registry.queryDataIds( 

1857 ["patch"], 

1858 skymap=skymap_name, 

1859 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]}, 

1860 ) 

1861 }, 

1862 ) 

1863 # Use sync to update the tract region and insert to update the patch 

1864 # regions, to the "small" suite. 

1865 updated = registry.syncDimensionData( 

1866 "tract", 

1867 {"skymap": skymap_name, "id": 0, "region": parent_region_small}, 

1868 update=True, 

1869 ) 

1870 self.assertEqual(updated, {"region": parent_region_large}) 

1871 registry.insertDimensionData( 

1872 "patch", 

1873 *[ 

1874 {"skymap": skymap_name, "tract": 0, "id": n, "cell_x": n % 2, "cell_y": n // 2, "region": c} 

1875 for n, c in enumerate(child_regions_small) 

1876 ], 

1877 replace=True, 

1878 ) 

1879 # Query again; there now should be no such overlaps, because the 

1880 # database has the "small" suite of regions. 

1881 self.assertFalse( 

1882 set( 

1883 registry.queryDataIds( 

1884 ["tract"], 

1885 skymap=skymap_name, 

1886 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]}, 

1887 ) 

1888 ) 

1889 ) 

1890 for patch_id, patch_difference_indices in enumerate(child_difference_indices): 

1891 self.assertNotIn( 

1892 patch_id, 

1893 { 

1894 data_id["patch"] 

1895 for data_id in registry.queryDataIds( 

1896 ["patch"], 

1897 skymap=skymap_name, 

1898 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]}, 

1899 ) 

1900 }, 

1901 ) 

1902 # Update back to the large regions and query one more time. 

1903 updated = registry.syncDimensionData( 

1904 "tract", 

1905 {"skymap": skymap_name, "id": 0, "region": parent_region_large}, 

1906 update=True, 

1907 ) 

1908 self.assertEqual(updated, {"region": parent_region_small}) 

1909 registry.insertDimensionData( 

1910 "patch", 

1911 *[ 

1912 {"skymap": skymap_name, "tract": 0, "id": n, "cell_x": n % 2, "cell_y": n // 2, "region": c} 

1913 for n, c in enumerate(child_regions_large) 

1914 ], 

1915 replace=True, 

1916 ) 

1917 self.assertEqual( 

1918 {0}, 

1919 { 

1920 data_id["tract"] 

1921 for data_id in registry.queryDataIds( 

1922 ["tract"], 

1923 skymap=skymap_name, 

1924 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]}, 

1925 ) 

1926 }, 

1927 ) 

1928 for patch_id, patch_difference_indices in enumerate(child_difference_indices): 

1929 self.assertIn( 

1930 patch_id, 

1931 { 

1932 data_id["patch"] 

1933 for data_id in registry.queryDataIds( 

1934 ["patch"], 

1935 skymap=skymap_name, 

1936 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]}, 

1937 ) 

1938 }, 

1939 ) 

1940 

1941 def testCalibrationCollections(self): 

1942 """Test operations on `~CollectionType.CALIBRATION` collections, 

1943 including `Registry.certify`, `Registry.decertify`, and 

1944 `Registry.findDataset`. 

1945 """ 

1946 # Setup - make a Registry, fill it with some datasets in 

1947 # non-calibration collections. 

1948 registry = self.makeRegistry() 

1949 self.loadData(registry, "base.yaml") 

1950 self.loadData(registry, "datasets.yaml") 

1951 # Set up some timestamps. 

1952 t1 = astropy.time.Time("2020-01-01T01:00:00", format="isot", scale="tai") 

1953 t2 = astropy.time.Time("2020-01-01T02:00:00", format="isot", scale="tai") 

1954 t3 = astropy.time.Time("2020-01-01T03:00:00", format="isot", scale="tai") 

1955 t4 = astropy.time.Time("2020-01-01T04:00:00", format="isot", scale="tai") 

1956 t5 = astropy.time.Time("2020-01-01T05:00:00", format="isot", scale="tai") 

1957 allTimespans = [ 

1958 Timespan(a, b) for a, b in itertools.combinations([None, t1, t2, t3, t4, t5, None], r=2) 

1959 ] 

1960 # Get references to some datasets. 

1961 bias2a = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g") 

1962 bias3a = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g") 

1963 bias2b = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r") 

1964 bias3b = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r") 

1965 # Register the main calibration collection we'll be working with. 

1966 collection = "Cam1/calibs/default" 

1967 registry.registerCollection(collection, type=CollectionType.CALIBRATION) 

1968 # Cannot associate into a calibration collection (no timespan). 

1969 with self.assertRaises(CollectionTypeError): 

1970 registry.associate(collection, [bias2a]) 

1971 # Certify 2a dataset with [t2, t4) validity. 

1972 registry.certify(collection, [bias2a], Timespan(begin=t2, end=t4)) 

1973 # Test that we can query for this dataset via the new collection, both 

1974 # on its own and with a RUN collection, as long as we don't try to join 

1975 # in temporal dimensions or use findFirst=True. 

1976 self.assertEqual( 

1977 set(registry.queryDatasets("bias", findFirst=False, collections=collection)), 

1978 {bias2a}, 

1979 ) 

1980 self.assertEqual( 

1981 set(registry.queryDatasets("bias", findFirst=False, collections=[collection, "imported_r"])), 

1982 { 

1983 bias2a, 

1984 bias2b, 

1985 bias3b, 

1986 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

1987 }, 

1988 ) 

1989 self.assertEqual( 

1990 set(registry.queryDataIds("detector", datasets="bias", collections=collection)), 

1991 {registry.expandDataId(instrument="Cam1", detector=2)}, 

1992 ) 

1993 self.assertEqual( 

1994 set(registry.queryDataIds("detector", datasets="bias", collections=[collection, "imported_r"])), 

1995 { 

1996 registry.expandDataId(instrument="Cam1", detector=2), 

1997 registry.expandDataId(instrument="Cam1", detector=3), 

1998 registry.expandDataId(instrument="Cam1", detector=4), 

1999 }, 

2000 ) 

2001 

2002 # We should not be able to certify 2b with anything overlapping that 

2003 # window. 

2004 with self.assertRaises(ConflictingDefinitionError): 

2005 registry.certify(collection, [bias2b], Timespan(begin=None, end=t3)) 

2006 with self.assertRaises(ConflictingDefinitionError): 

2007 registry.certify(collection, [bias2b], Timespan(begin=None, end=t5)) 

2008 with self.assertRaises(ConflictingDefinitionError): 

2009 registry.certify(collection, [bias2b], Timespan(begin=t1, end=t3)) 

2010 with self.assertRaises(ConflictingDefinitionError): 

2011 registry.certify(collection, [bias2b], Timespan(begin=t1, end=t5)) 

2012 with self.assertRaises(ConflictingDefinitionError): 

2013 registry.certify(collection, [bias2b], Timespan(begin=t1, end=None)) 

2014 with self.assertRaises(ConflictingDefinitionError): 

2015 registry.certify(collection, [bias2b], Timespan(begin=t2, end=t3)) 

2016 with self.assertRaises(ConflictingDefinitionError): 

2017 registry.certify(collection, [bias2b], Timespan(begin=t2, end=t5)) 

2018 with self.assertRaises(ConflictingDefinitionError): 

2019 registry.certify(collection, [bias2b], Timespan(begin=t2, end=None)) 

2020 # We should be able to certify 3a with a range overlapping that window, 

2021 # because it's for a different detector. 

2022 # We'll certify 3a over [t1, t3). 

2023 registry.certify(collection, [bias3a], Timespan(begin=t1, end=t3)) 

2024 # Now we'll certify 2b and 3b together over [t4, ∞). 

2025 registry.certify(collection, [bias2b, bias3b], Timespan(begin=t4, end=None)) 

2026 

2027 # Fetch all associations and check that they are what we expect. 

2028 self.assertCountEqual( 

2029 list( 

2030 registry.queryDatasetAssociations( 

2031 "bias", 

2032 collections=[collection, "imported_g", "imported_r"], 

2033 ) 

2034 ), 

2035 [ 

2036 DatasetAssociation( 

2037 ref=registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"), 

2038 collection="imported_g", 

2039 timespan=None, 

2040 ), 

2041 DatasetAssociation( 

2042 ref=registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

2043 collection="imported_r", 

2044 timespan=None, 

2045 ), 

2046 DatasetAssociation(ref=bias2a, collection="imported_g", timespan=None), 

2047 DatasetAssociation(ref=bias3a, collection="imported_g", timespan=None), 

2048 DatasetAssociation(ref=bias2b, collection="imported_r", timespan=None), 

2049 DatasetAssociation(ref=bias3b, collection="imported_r", timespan=None), 

2050 DatasetAssociation(ref=bias2a, collection=collection, timespan=Timespan(begin=t2, end=t4)), 

2051 DatasetAssociation(ref=bias3a, collection=collection, timespan=Timespan(begin=t1, end=t3)), 

2052 DatasetAssociation(ref=bias2b, collection=collection, timespan=Timespan(begin=t4, end=None)), 

2053 DatasetAssociation(ref=bias3b, collection=collection, timespan=Timespan(begin=t4, end=None)), 

2054 ], 

2055 ) 

2056 

2057 class Ambiguous: 

2058 """Tag class to denote lookups that should be ambiguous.""" 

2059 

2060 pass 

2061 

2062 def assertLookup( 

2063 detector: int, timespan: Timespan, expected: Optional[Union[DatasetRef, Type[Ambiguous]]] 

2064 ) -> None: 

2065 """Local function that asserts that a bias lookup returns the given 

2066 expected result. 

2067 """ 

2068 if expected is Ambiguous: 

2069 with self.assertRaises(RuntimeError): 

2070 registry.findDataset( 

2071 "bias", 

2072 collections=collection, 

2073 instrument="Cam1", 

2074 detector=detector, 

2075 timespan=timespan, 

2076 ) 

2077 else: 

2078 self.assertEqual( 

2079 expected, 

2080 registry.findDataset( 

2081 "bias", 

2082 collections=collection, 

2083 instrument="Cam1", 

2084 detector=detector, 

2085 timespan=timespan, 

2086 ), 

2087 ) 

2088 

2089 # Systematically test lookups against expected results. 

2090 assertLookup(detector=2, timespan=Timespan(None, t1), expected=None) 

2091 assertLookup(detector=2, timespan=Timespan(None, t2), expected=None) 

2092 assertLookup(detector=2, timespan=Timespan(None, t3), expected=bias2a) 

2093 assertLookup(detector=2, timespan=Timespan(None, t4), expected=bias2a) 

2094 assertLookup(detector=2, timespan=Timespan(None, t5), expected=Ambiguous) 

2095 assertLookup(detector=2, timespan=Timespan(None, None), expected=Ambiguous) 

2096 assertLookup(detector=2, timespan=Timespan(t1, t2), expected=None) 

2097 assertLookup(detector=2, timespan=Timespan(t1, t3), expected=bias2a) 

2098 assertLookup(detector=2, timespan=Timespan(t1, t4), expected=bias2a) 

2099 assertLookup(detector=2, timespan=Timespan(t1, t5), expected=Ambiguous) 

2100 assertLookup(detector=2, timespan=Timespan(t1, None), expected=Ambiguous) 

2101 assertLookup(detector=2, timespan=Timespan(t2, t3), expected=bias2a) 

2102 assertLookup(detector=2, timespan=Timespan(t2, t4), expected=bias2a) 

2103 assertLookup(detector=2, timespan=Timespan(t2, t5), expected=Ambiguous) 

2104 assertLookup(detector=2, timespan=Timespan(t2, None), expected=Ambiguous) 

2105 assertLookup(detector=2, timespan=Timespan(t3, t4), expected=bias2a) 

2106 assertLookup(detector=2, timespan=Timespan(t3, t5), expected=Ambiguous) 

2107 assertLookup(detector=2, timespan=Timespan(t3, None), expected=Ambiguous) 

2108 assertLookup(detector=2, timespan=Timespan(t4, t5), expected=bias2b) 

2109 assertLookup(detector=2, timespan=Timespan(t4, None), expected=bias2b) 

2110 assertLookup(detector=2, timespan=Timespan(t5, None), expected=bias2b) 

2111 assertLookup(detector=3, timespan=Timespan(None, t1), expected=None) 

2112 assertLookup(detector=3, timespan=Timespan(None, t2), expected=bias3a) 

2113 assertLookup(detector=3, timespan=Timespan(None, t3), expected=bias3a) 

2114 assertLookup(detector=3, timespan=Timespan(None, t4), expected=bias3a) 

2115 assertLookup(detector=3, timespan=Timespan(None, t5), expected=Ambiguous) 

2116 assertLookup(detector=3, timespan=Timespan(None, None), expected=Ambiguous) 

2117 assertLookup(detector=3, timespan=Timespan(t1, t2), expected=bias3a) 

2118 assertLookup(detector=3, timespan=Timespan(t1, t3), expected=bias3a) 

2119 assertLookup(detector=3, timespan=Timespan(t1, t4), expected=bias3a) 

2120 assertLookup(detector=3, timespan=Timespan(t1, t5), expected=Ambiguous) 

2121 assertLookup(detector=3, timespan=Timespan(t1, None), expected=Ambiguous) 

2122 assertLookup(detector=3, timespan=Timespan(t2, t3), expected=bias3a) 

2123 assertLookup(detector=3, timespan=Timespan(t2, t4), expected=bias3a) 

2124 assertLookup(detector=3, timespan=Timespan(t2, t5), expected=Ambiguous) 

2125 assertLookup(detector=3, timespan=Timespan(t2, None), expected=Ambiguous) 

2126 assertLookup(detector=3, timespan=Timespan(t3, t4), expected=None) 

2127 assertLookup(detector=3, timespan=Timespan(t3, t5), expected=bias3b) 

2128 assertLookup(detector=3, timespan=Timespan(t3, None), expected=bias3b) 

2129 assertLookup(detector=3, timespan=Timespan(t4, t5), expected=bias3b) 

2130 assertLookup(detector=3, timespan=Timespan(t4, None), expected=bias3b) 

2131 assertLookup(detector=3, timespan=Timespan(t5, None), expected=bias3b) 

2132 

2133 # Decertify [t3, t5) for all data IDs, and do test lookups again. 

2134 # This should truncate bias2a to [t2, t3), leave bias3a unchanged at 

2135 # [t1, t3), and truncate bias2b and bias3b to [t5, ∞). 

2136 registry.decertify(collection=collection, datasetType="bias", timespan=Timespan(t3, t5)) 

2137 assertLookup(detector=2, timespan=Timespan(None, t1), expected=None) 

2138 assertLookup(detector=2, timespan=Timespan(None, t2), expected=None) 

2139 assertLookup(detector=2, timespan=Timespan(None, t3), expected=bias2a) 

2140 assertLookup(detector=2, timespan=Timespan(None, t4), expected=bias2a) 

2141 assertLookup(detector=2, timespan=Timespan(None, t5), expected=bias2a) 

2142 assertLookup(detector=2, timespan=Timespan(None, None), expected=Ambiguous) 

2143 assertLookup(detector=2, timespan=Timespan(t1, t2), expected=None) 

2144 assertLookup(detector=2, timespan=Timespan(t1, t3), expected=bias2a) 

2145 assertLookup(detector=2, timespan=Timespan(t1, t4), expected=bias2a) 

2146 assertLookup(detector=2, timespan=Timespan(t1, t5), expected=bias2a) 

2147 assertLookup(detector=2, timespan=Timespan(t1, None), expected=Ambiguous) 

2148 assertLookup(detector=2, timespan=Timespan(t2, t3), expected=bias2a) 

2149 assertLookup(detector=2, timespan=Timespan(t2, t4), expected=bias2a) 

2150 assertLookup(detector=2, timespan=Timespan(t2, t5), expected=bias2a) 

2151 assertLookup(detector=2, timespan=Timespan(t2, None), expected=Ambiguous) 

2152 assertLookup(detector=2, timespan=Timespan(t3, t4), expected=None) 

2153 assertLookup(detector=2, timespan=Timespan(t3, t5), expected=None) 

2154 assertLookup(detector=2, timespan=Timespan(t3, None), expected=bias2b) 

2155 assertLookup(detector=2, timespan=Timespan(t4, t5), expected=None) 

2156 assertLookup(detector=2, timespan=Timespan(t4, None), expected=bias2b) 

2157 assertLookup(detector=2, timespan=Timespan(t5, None), expected=bias2b) 

2158 assertLookup(detector=3, timespan=Timespan(None, t1), expected=None) 

2159 assertLookup(detector=3, timespan=Timespan(None, t2), expected=bias3a) 

2160 assertLookup(detector=3, timespan=Timespan(None, t3), expected=bias3a) 

2161 assertLookup(detector=3, timespan=Timespan(None, t4), expected=bias3a) 

2162 assertLookup(detector=3, timespan=Timespan(None, t5), expected=bias3a) 

2163 assertLookup(detector=3, timespan=Timespan(None, None), expected=Ambiguous) 

2164 assertLookup(detector=3, timespan=Timespan(t1, t2), expected=bias3a) 

2165 assertLookup(detector=3, timespan=Timespan(t1, t3), expected=bias3a) 

2166 assertLookup(detector=3, timespan=Timespan(t1, t4), expected=bias3a) 

2167 assertLookup(detector=3, timespan=Timespan(t1, t5), expected=bias3a) 

2168 assertLookup(detector=3, timespan=Timespan(t1, None), expected=Ambiguous) 

2169 assertLookup(detector=3, timespan=Timespan(t2, t3), expected=bias3a) 

2170 assertLookup(detector=3, timespan=Timespan(t2, t4), expected=bias3a) 

2171 assertLookup(detector=3, timespan=Timespan(t2, t5), expected=bias3a) 

2172 assertLookup(detector=3, timespan=Timespan(t2, None), expected=Ambiguous) 

2173 assertLookup(detector=3, timespan=Timespan(t3, t4), expected=None) 

2174 assertLookup(detector=3, timespan=Timespan(t3, t5), expected=None) 

2175 assertLookup(detector=3, timespan=Timespan(t3, None), expected=bias3b) 

2176 assertLookup(detector=3, timespan=Timespan(t4, t5), expected=None) 

2177 assertLookup(detector=3, timespan=Timespan(t4, None), expected=bias3b) 

2178 assertLookup(detector=3, timespan=Timespan(t5, None), expected=bias3b) 

2179 

2180 # Decertify everything, this time with explicit data IDs, then check 

2181 # that no lookups succeed. 

2182 registry.decertify( 

2183 collection, 

2184 "bias", 

2185 Timespan(None, None), 

2186 dataIds=[ 

2187 dict(instrument="Cam1", detector=2), 

2188 dict(instrument="Cam1", detector=3), 

2189 ], 

2190 ) 

2191 for detector in (2, 3): 

2192 for timespan in allTimespans: 

2193 assertLookup(detector=detector, timespan=timespan, expected=None) 

2194 # Certify bias2a and bias3a over (-∞, ∞), check that all lookups return 

2195 # those. 

2196 registry.certify( 

2197 collection, 

2198 [bias2a, bias3a], 

2199 Timespan(None, None), 

2200 ) 

2201 for timespan in allTimespans: 

2202 assertLookup(detector=2, timespan=timespan, expected=bias2a) 

2203 assertLookup(detector=3, timespan=timespan, expected=bias3a) 

2204 # Decertify just bias2 over [t2, t4). 

2205 # This should split a single certification row into two (and leave the 

2206 # other existing row, for bias3a, alone). 

2207 registry.decertify( 

2208 collection, "bias", Timespan(t2, t4), dataIds=[dict(instrument="Cam1", detector=2)] 

2209 ) 

2210 for timespan in allTimespans: 

2211 assertLookup(detector=3, timespan=timespan, expected=bias3a) 

2212 overlapsBefore = timespan.overlaps(Timespan(None, t2)) 

2213 overlapsAfter = timespan.overlaps(Timespan(t4, None)) 

2214 if overlapsBefore and overlapsAfter: 

2215 expected = Ambiguous 

2216 elif overlapsBefore or overlapsAfter: 

2217 expected = bias2a 

2218 else: 

2219 expected = None 

2220 assertLookup(detector=2, timespan=timespan, expected=expected) 

2221 

2222 def testSkipCalibs(self): 

2223 """Test how queries handle skipping of calibration collections.""" 

2224 registry = self.makeRegistry() 

2225 self.loadData(registry, "base.yaml") 

2226 self.loadData(registry, "datasets.yaml") 

2227 

2228 coll_calib = "Cam1/calibs/default" 

2229 registry.registerCollection(coll_calib, type=CollectionType.CALIBRATION) 

2230 

2231 # Add all biases to the calibration collection. 

2232 # Without this, the logic that prunes dataset subqueries based on 

2233 # datasetType-collection summary information will fire before the logic 

2234 # we want to test below. This is a good thing (it avoids the dreaded 

2235 # NotImplementedError a bit more often) everywhere but here. 

2236 registry.certify(coll_calib, registry.queryDatasets("bias", collections=...), Timespan(None, None)) 

2237 

2238 coll_list = [coll_calib, "imported_g", "imported_r"] 

2239 chain = "Cam1/chain" 

2240 registry.registerCollection(chain, type=CollectionType.CHAINED) 

2241 registry.setCollectionChain(chain, coll_list) 

2242 

2243 # explicit list will raise if findFirst=True or there are temporal 

2244 # dimensions 

2245 with self.assertRaises(NotImplementedError): 

2246 registry.queryDatasets("bias", collections=coll_list, findFirst=True) 

2247 with self.assertRaises(NotImplementedError): 

2248 registry.queryDataIds( 

2249 ["instrument", "detector", "exposure"], datasets="bias", collections=coll_list 

2250 ).count() 

2251 

2252 # chain will skip 

2253 datasets = list(registry.queryDatasets("bias", collections=chain)) 

2254 self.assertGreater(len(datasets), 0) 

2255 

2256 dataIds = list(registry.queryDataIds(["instrument", "detector"], datasets="bias", collections=chain)) 

2257 self.assertGreater(len(dataIds), 0) 

2258 

2259 # glob will skip too 

2260 datasets = list(registry.queryDatasets("bias", collections="*d*")) 

2261 self.assertGreater(len(datasets), 0) 

2262 

2263 # regular expression will skip too 

2264 pattern = re.compile(".*") 

2265 datasets = list(registry.queryDatasets("bias", collections=pattern)) 

2266 self.assertGreater(len(datasets), 0) 

2267 

2268 # ellipsis should work as usual 

2269 datasets = list(registry.queryDatasets("bias", collections=...)) 

2270 self.assertGreater(len(datasets), 0) 

2271 

2272 # few tests with findFirst 

2273 datasets = list(registry.queryDatasets("bias", collections=chain, findFirst=True)) 

2274 self.assertGreater(len(datasets), 0) 

2275 

2276 def testIngestTimeQuery(self): 

2277 

2278 registry = self.makeRegistry() 

2279 self.loadData(registry, "base.yaml") 

2280 dt0 = datetime.utcnow() 

2281 self.loadData(registry, "datasets.yaml") 

2282 dt1 = datetime.utcnow() 

2283 

2284 datasets = list(registry.queryDatasets(..., collections=...)) 

2285 len0 = len(datasets) 

2286 self.assertGreater(len0, 0) 

2287 

2288 where = "ingest_date > T'2000-01-01'" 

2289 datasets = list(registry.queryDatasets(..., collections=..., where=where)) 

2290 len1 = len(datasets) 

2291 self.assertEqual(len0, len1) 

2292 

2293 # no one will ever use this piece of software in 30 years 

2294 where = "ingest_date > T'2050-01-01'" 

2295 datasets = list(registry.queryDatasets(..., collections=..., where=where)) 

2296 len2 = len(datasets) 

2297 self.assertEqual(len2, 0) 

2298 

2299 # Check more exact timing to make sure there is no 37 seconds offset 

2300 # (after fixing DM-30124). SQLite time precision is 1 second, make 

2301 # sure that we don't test with higher precision. 

2302 tests = [ 

2303 # format: (timestamp, operator, expected_len) 

2304 (dt0 - timedelta(seconds=1), ">", len0), 

2305 (dt0 - timedelta(seconds=1), "<", 0), 

2306 (dt1 + timedelta(seconds=1), "<", len0), 

2307 (dt1 + timedelta(seconds=1), ">", 0), 

2308 ] 

2309 for dt, op, expect_len in tests: 

2310 dt_str = dt.isoformat(sep=" ") 

2311 

2312 where = f"ingest_date {op} T'{dt_str}'" 

2313 datasets = list(registry.queryDatasets(..., collections=..., where=where)) 

2314 self.assertEqual(len(datasets), expect_len) 

2315 

2316 # same with bind using datetime or astropy Time 

2317 where = f"ingest_date {op} ingest_time" 

2318 datasets = list( 

2319 registry.queryDatasets(..., collections=..., where=where, bind={"ingest_time": dt}) 

2320 ) 

2321 self.assertEqual(len(datasets), expect_len) 

2322 

2323 dt_astropy = astropy.time.Time(dt, format="datetime") 

2324 datasets = list( 

2325 registry.queryDatasets(..., collections=..., where=where, bind={"ingest_time": dt_astropy}) 

2326 ) 

2327 self.assertEqual(len(datasets), expect_len) 

2328 

2329 def testTimespanQueries(self): 

2330 """Test query expressions involving timespans.""" 

2331 registry = self.makeRegistry() 

2332 self.loadData(registry, "hsc-rc2-subset.yaml") 

2333 # All exposures in the database; mapping from ID to timespan. 

2334 visits = {record.id: record.timespan for record in registry.queryDimensionRecords("visit")} 

2335 # Just those IDs, sorted (which is also temporal sorting, because HSC 

2336 # exposure IDs are monotonically increasing). 

2337 ids = sorted(visits.keys()) 

2338 self.assertGreater(len(ids), 20) 

2339 # Pick some quasi-random indexes into `ids` to play with. 

2340 i1 = int(len(ids) * 0.1) 

2341 i2 = int(len(ids) * 0.3) 

2342 i3 = int(len(ids) * 0.6) 

2343 i4 = int(len(ids) * 0.8) 

2344 # Extract some times from those: just before the beginning of i1 (which 

2345 # should be after the end of the exposure before), exactly the 

2346 # beginning of i2, just after the beginning of i3 (and before its end), 

2347 # and the exact end of i4. 

2348 t1 = visits[ids[i1]].begin - astropy.time.TimeDelta(1.0, format="sec") 

2349 self.assertGreater(t1, visits[ids[i1 - 1]].end) 

2350 t2 = visits[ids[i2]].begin 

2351 t3 = visits[ids[i3]].begin + astropy.time.TimeDelta(1.0, format="sec") 

2352 self.assertLess(t3, visits[ids[i3]].end) 

2353 t4 = visits[ids[i4]].end 

2354 # Make sure those are actually in order. 

2355 self.assertEqual([t1, t2, t3, t4], sorted([t4, t3, t2, t1])) 

2356 

2357 bind = { 

2358 "t1": t1, 

2359 "t2": t2, 

2360 "t3": t3, 

2361 "t4": t4, 

2362 "ts23": Timespan(t2, t3), 

2363 } 

2364 

2365 def query(where): 

2366 """Helper function that queries for visit data IDs and returns 

2367 results as a sorted, deduplicated list of visit IDs. 

2368 """ 

2369 return sorted( 

2370 { 

2371 dataId["visit"] 

2372 for dataId in registry.queryDataIds("visit", instrument="HSC", bind=bind, where=where) 

2373 } 

2374 ) 

2375 

2376 # Try a bunch of timespan queries, mixing up the bounds themselves, 

2377 # where they appear in the expression, and how we get the timespan into 

2378 # the expression. 

2379 

2380 # t1 is before the start of i1, so this should not include i1. 

2381 self.assertEqual(ids[:i1], query("visit.timespan OVERLAPS (null, t1)")) 

2382 # t2 is exactly at the start of i2, but ends are exclusive, so these 

2383 # should not include i2. 

2384 self.assertEqual(ids[i1:i2], query("(t1, t2) OVERLAPS visit.timespan")) 

2385 self.assertEqual(ids[:i2], query("visit.timespan < (t2, t4)")) 

2386 # t3 is in the middle of i3, so this should include i3. 

2387 self.assertEqual(ids[i2 : i3 + 1], query("visit.timespan OVERLAPS ts23")) 

2388 # This one should not include t3 by the same reasoning. 

2389 self.assertEqual(ids[i3 + 1 :], query("visit.timespan > (t1, t3)")) 

2390 # t4 is exactly at the end of i4, so this should include i4. 

2391 self.assertEqual(ids[i3 : i4 + 1], query(f"visit.timespan OVERLAPS (T'{t3.tai.isot}', t4)")) 

2392 # i4's upper bound of t4 is exclusive so this should not include t4. 

2393 self.assertEqual(ids[i4 + 1 :], query("visit.timespan OVERLAPS (t4, NULL)")) 

2394 

2395 # Now some timespan vs. time scalar queries. 

2396 self.assertEqual(ids[:i2], query("visit.timespan < t2")) 

2397 self.assertEqual(ids[:i2], query("t2 > visit.timespan")) 

2398 self.assertEqual(ids[i3 + 1 :], query("visit.timespan > t3")) 

2399 self.assertEqual(ids[i3 + 1 :], query("t3 < visit.timespan")) 

2400 self.assertEqual(ids[i3 : i3 + 1], query("visit.timespan OVERLAPS t3")) 

2401 self.assertEqual(ids[i3 : i3 + 1], query(f"T'{t3.tai.isot}' OVERLAPS visit.timespan")) 

2402 

2403 # Empty timespans should not overlap anything. 

2404 self.assertEqual([], query("visit.timespan OVERLAPS (t3, t2)")) 

2405 

2406 def testCollectionSummaries(self): 

2407 """Test recording and retrieval of collection summaries.""" 

2408 self.maxDiff = None 

2409 registry = self.makeRegistry() 

2410 # Importing datasets from yaml should go through the code path where 

2411 # we update collection summaries as we insert datasets. 

2412 self.loadData(registry, "base.yaml") 

2413 self.loadData(registry, "datasets.yaml") 

2414 flat = registry.getDatasetType("flat") 

2415 expected1 = CollectionSummary() 

2416 expected1.dataset_types.add(registry.getDatasetType("bias")) 

2417 expected1.add_data_ids( 

2418 flat, [DataCoordinate.standardize(instrument="Cam1", universe=registry.dimensions)] 

2419 ) 

2420 self.assertEqual(registry.getCollectionSummary("imported_g"), expected1) 

2421 self.assertEqual(registry.getCollectionSummary("imported_r"), expected1) 

2422 # Create a chained collection with both of the imported runs; the 

2423 # summary should be the same, because it's a union with itself. 

2424 chain = "chain" 

2425 registry.registerCollection(chain, CollectionType.CHAINED) 

2426 registry.setCollectionChain(chain, ["imported_r", "imported_g"]) 

2427 self.assertEqual(registry.getCollectionSummary(chain), expected1) 

2428 # Associate flats only into a tagged collection and a calibration 

2429 # collection to check summaries of those. 

2430 tag = "tag" 

2431 registry.registerCollection(tag, CollectionType.TAGGED) 

2432 registry.associate(tag, registry.queryDatasets(flat, collections="imported_g")) 

2433 calibs = "calibs" 

2434 registry.registerCollection(calibs, CollectionType.CALIBRATION) 

2435 registry.certify( 

2436 calibs, registry.queryDatasets(flat, collections="imported_g"), timespan=Timespan(None, None) 

2437 ) 

2438 expected2 = expected1.copy() 

2439 expected2.dataset_types.discard("bias") 

2440 self.assertEqual(registry.getCollectionSummary(tag), expected2) 

2441 self.assertEqual(registry.getCollectionSummary(calibs), expected2) 

2442 # Explicitly calling Registry.refresh() should load those same 

2443 # summaries, via a totally different code path. 

2444 registry.refresh() 

2445 self.assertEqual(registry.getCollectionSummary("imported_g"), expected1) 

2446 self.assertEqual(registry.getCollectionSummary("imported_r"), expected1) 

2447 self.assertEqual(registry.getCollectionSummary(tag), expected2) 

2448 self.assertEqual(registry.getCollectionSummary(calibs), expected2) 

2449 

2450 def testBindInQueryDatasets(self): 

2451 """Test that the bind parameter is correctly forwarded in 

2452 queryDatasets recursion. 

2453 """ 

2454 registry = self.makeRegistry() 

2455 # Importing datasets from yaml should go through the code path where 

2456 # we update collection summaries as we insert datasets. 

2457 self.loadData(registry, "base.yaml") 

2458 self.loadData(registry, "datasets.yaml") 

2459 self.assertEqual( 

2460 set(registry.queryDatasets("flat", band="r", collections=...)), 

2461 set(registry.queryDatasets("flat", where="band=my_band", bind={"my_band": "r"}, collections=...)), 

2462 ) 

2463 

2464 def testQueryResultSummaries(self): 

2465 """Test summary methods like `count`, `any`, and `explain_no_results` 

2466 on `DataCoordinateQueryResults` and `DatasetQueryResults` 

2467 """ 

2468 registry = self.makeRegistry() 

2469 self.loadData(registry, "base.yaml") 

2470 self.loadData(registry, "datasets.yaml") 

2471 self.loadData(registry, "spatial.yaml") 

2472 # Default test dataset has two collections, each with both flats and 

2473 # biases. Add a new collection with only biases. 

2474 registry.registerCollection("biases", CollectionType.TAGGED) 

2475 registry.associate("biases", registry.queryDatasets("bias", collections=["imported_g"])) 

2476 # First query yields two results, and involves no postprocessing. 

2477 query1 = registry.queryDataIds(["physical_filter"], band="r") 

2478 self.assertTrue(query1.any(execute=False, exact=False)) 

2479 self.assertTrue(query1.any(execute=True, exact=False)) 

2480 self.assertTrue(query1.any(execute=True, exact=True)) 

2481 self.assertEqual(query1.count(exact=False), 2) 

2482 self.assertEqual(query1.count(exact=True), 2) 

2483 self.assertFalse(list(query1.explain_no_results())) 

2484 # Second query should yield no results, but this isn't detectable 

2485 # unless we actually run a query. 

2486 query2 = registry.queryDataIds(["physical_filter"], band="h") 

2487 self.assertTrue(query2.any(execute=False, exact=False)) 

2488 self.assertFalse(query2.any(execute=True, exact=False)) 

2489 self.assertFalse(query2.any(execute=True, exact=True)) 

2490 self.assertEqual(query2.count(exact=False), 0) 

2491 self.assertEqual(query2.count(exact=True), 0) 

2492 self.assertFalse(list(query2.explain_no_results())) 

2493 # These queries yield no results due to various problems that can be 

2494 # spotted prior to execution, yielding helpful diagnostics. 

2495 base_query = registry.queryDataIds(["detector", "physical_filter"]) 

2496 queries_and_snippets = [ 

2497 ( 

2498 # Dataset type name doesn't match any existing dataset types. 

2499 registry.queryDatasets("nonexistent", collections=...), 

2500 ["nonexistent"], 

2501 ), 

2502 ( 

2503 # Dataset type object isn't registered. 

2504 registry.queryDatasets( 

2505 DatasetType( 

2506 "nonexistent", 

2507 dimensions=["instrument"], 

2508 universe=registry.dimensions, 

2509 storageClass="Image", 

2510 ), 

2511 collections=..., 

2512 ), 

2513 ["nonexistent"], 

2514 ), 

2515 ( 

2516 # No datasets of this type in this collection. 

2517 registry.queryDatasets("flat", collections=["biases"]), 

2518 ["flat", "biases"], 

2519 ), 

2520 ( 

2521 # No datasets of this type in this collection. 

2522 base_query.findDatasets("flat", collections=["biases"]), 

2523 ["flat", "biases"], 

2524 ), 

2525 ( 

2526 # No collections matching at all. 

2527 registry.queryDatasets("flat", collections=re.compile("potato.+")), 

2528 ["potato"], 

2529 ), 

2530 ] 

2531 # The behavior of these additional queries is slated to change in the 

2532 # future, so we also check for deprecation warnings. 

2533 with self.assertWarns(FutureWarning): 

2534 queries_and_snippets.append( 

2535 ( 

2536 # Dataset type name doesn't match any existing dataset 

2537 # types. 

2538 registry.queryDataIds(["detector"], datasets=["nonexistent"], collections=...), 

2539 ["nonexistent"], 

2540 ) 

2541 ) 

2542 with self.assertWarns(FutureWarning): 

2543 queries_and_snippets.append( 

2544 ( 

2545 # Dataset type name doesn't match any existing dataset 

2546 # types. 

2547 registry.queryDimensionRecords("detector", datasets=["nonexistent"], collections=...), 

2548 ["nonexistent"], 

2549 ) 

2550 ) 

2551 for query, snippets in queries_and_snippets: 

2552 self.assertFalse(query.any(execute=False, exact=False)) 

2553 self.assertFalse(query.any(execute=True, exact=False)) 

2554 self.assertFalse(query.any(execute=True, exact=True)) 

2555 self.assertEqual(query.count(exact=False), 0) 

2556 self.assertEqual(query.count(exact=True), 0) 

2557 messages = list(query.explain_no_results()) 

2558 self.assertTrue(messages) 

2559 # Want all expected snippets to appear in at least one message. 

2560 self.assertTrue( 

2561 any( 

2562 all(snippet in message for snippet in snippets) for message in query.explain_no_results() 

2563 ), 

2564 messages, 

2565 ) 

2566 

2567 # This query does yield results, but should also emit a warning because 

2568 # dataset type patterns to queryDataIds is deprecated; just look for 

2569 # the warning. 

2570 with self.assertWarns(FutureWarning): 

2571 registry.queryDataIds(["detector"], datasets=re.compile("^nonexistent$"), collections=...) 

2572 

2573 # These queries yield no results due to problems that can be identified 

2574 # by cheap follow-up queries, yielding helpful diagnostics. 

2575 for query, snippets in [ 

2576 ( 

2577 # No records for one of the involved dimensions. 

2578 registry.queryDataIds(["subfilter"]), 

2579 ["dimension records", "subfilter"], 

2580 ), 

2581 ( 

2582 # No records for one of the involved dimensions. 

2583 registry.queryDimensionRecords("subfilter"), 

2584 ["dimension records", "subfilter"], 

2585 ), 

2586 ]: 

2587 self.assertFalse(query.any(execute=True, exact=False)) 

2588 self.assertFalse(query.any(execute=True, exact=True)) 

2589 self.assertEqual(query.count(exact=True), 0) 

2590 messages = list(query.explain_no_results()) 

2591 self.assertTrue(messages) 

2592 # Want all expected snippets to appear in at least one message. 

2593 self.assertTrue( 

2594 any( 

2595 all(snippet in message for snippet in snippets) for message in query.explain_no_results() 

2596 ), 

2597 messages, 

2598 ) 

2599 

2600 # This query yields four overlaps in the database, but one is filtered 

2601 # out in postprocessing. The count queries aren't accurate because 

2602 # they don't account for duplication that happens due to an internal 

2603 # join against commonSkyPix. 

2604 query3 = registry.queryDataIds(["visit", "tract"], instrument="Cam1", skymap="SkyMap1") 

2605 self.assertEqual( 

2606 { 

2607 DataCoordinate.standardize( 

2608 instrument="Cam1", 

2609 skymap="SkyMap1", 

2610 visit=v, 

2611 tract=t, 

2612 universe=registry.dimensions, 

2613 ) 

2614 for v, t in [(1, 0), (2, 0), (2, 1)] 

2615 }, 

2616 set(query3), 

2617 ) 

2618 self.assertTrue(query3.any(execute=False, exact=False)) 

2619 self.assertTrue(query3.any(execute=True, exact=False)) 

2620 self.assertTrue(query3.any(execute=True, exact=True)) 

2621 self.assertGreaterEqual(query3.count(exact=False), 4) 

2622 self.assertGreaterEqual(query3.count(exact=True), 3) 

2623 self.assertFalse(list(query3.explain_no_results())) 

2624 # This query yields overlaps in the database, but all are filtered 

2625 # out in postprocessing. The count queries again aren't very useful. 

2626 # We have to use `where=` here to avoid an optimization that 

2627 # (currently) skips the spatial postprocess-filtering because it 

2628 # recognizes that no spatial join is necessary. That's not ideal, but 

2629 # fixing it is out of scope for this ticket. 

2630 query4 = registry.queryDataIds( 

2631 ["visit", "tract"], 

2632 instrument="Cam1", 

2633 skymap="SkyMap1", 

2634 where="visit=1 AND detector=1 AND tract=0 AND patch=4", 

2635 ) 

2636 self.assertFalse(set(query4)) 

2637 self.assertTrue(query4.any(execute=False, exact=False)) 

2638 self.assertTrue(query4.any(execute=True, exact=False)) 

2639 self.assertFalse(query4.any(execute=True, exact=True)) 

2640 self.assertGreaterEqual(query4.count(exact=False), 1) 

2641 self.assertEqual(query4.count(exact=True), 0) 

2642 messages = list(query4.explain_no_results()) 

2643 self.assertTrue(messages) 

2644 self.assertTrue(any("regions did not overlap" in message for message in messages)) 

2645 

2646 # And there are cases when queries make empty results but we do not 

2647 # know how to explain that yet (could we just say miracles happen?) 

2648 query5 = registry.queryDimensionRecords( 

2649 "detector", where="detector.purpose = 'no-purpose'", instrument="Cam1" 

2650 ) 

2651 self.assertEqual(query5.count(exact=True), 0) 

2652 messages = list(query5.explain_no_results()) 

2653 self.assertFalse(messages) 

2654 

2655 def testQueryDataIdsOrderBy(self): 

2656 """Test order_by and limit on result returned by queryDataIds().""" 

2657 registry = self.makeRegistry() 

2658 self.loadData(registry, "base.yaml") 

2659 self.loadData(registry, "datasets.yaml") 

2660 self.loadData(registry, "spatial.yaml") 

2661 

2662 def do_query(dimensions=("visit", "tract"), datasets=None, collections=None): 

2663 return registry.queryDataIds( 

2664 dimensions, datasets=datasets, collections=collections, instrument="Cam1", skymap="SkyMap1" 

2665 ) 

2666 

2667 Test = namedtuple( 

2668 "testQueryDataIdsOrderByTest", 

2669 ("order_by", "keys", "result", "limit", "datasets", "collections"), 

2670 defaults=(None, None, None), 

2671 ) 

2672 

2673 test_data = ( 

2674 Test("tract,visit", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))), 

2675 Test("-tract,visit", "tract,visit", ((1, 2), (1, 2), (0, 1), (0, 1), (0, 2), (0, 2))), 

2676 Test("tract,-visit", "tract,visit", ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2))), 

2677 Test("-tract,-visit", "tract,visit", ((1, 2), (1, 2), (0, 2), (0, 2), (0, 1), (0, 1))), 

2678 Test( 

2679 "tract.id,visit.id", 

2680 "tract,visit", 

2681 ((0, 1), (0, 1), (0, 2)), 

2682 limit=(3,), 

2683 ), 

2684 Test("-tract,-visit", "tract,visit", ((1, 2), (1, 2), (0, 2)), limit=(3,)), 

2685 Test("tract,visit", "tract,visit", ((0, 2), (1, 2), (1, 2)), limit=(3, 3)), 

2686 Test("-tract,-visit", "tract,visit", ((0, 1),), limit=(3, 5)), 

2687 Test( 

2688 "tract,visit.exposure_time", "tract,visit", ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2)) 

2689 ), 

2690 Test( 

2691 "-tract,-visit.exposure_time", "tract,visit", ((1, 2), (1, 2), (0, 1), (0, 1), (0, 2), (0, 2)) 

2692 ), 

2693 Test("tract,-exposure_time", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))), 

2694 Test("tract,visit.name", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))), 

2695 Test( 

2696 "tract,-timespan.begin,timespan.end", 

2697 "tract,visit", 

2698 ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2)), 

2699 ), 

2700 Test("visit.day_obs,exposure.day_obs", "visit,exposure", ()), 

2701 Test("visit.timespan.begin,-exposure.timespan.begin", "visit,exposure", ()), 

2702 Test( 

2703 "tract,detector", 

2704 "tract,detector", 

2705 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)), 

2706 datasets="flat", 

2707 collections="imported_r", 

2708 ), 

2709 Test( 

2710 "tract,detector.full_name", 

2711 "tract,detector", 

2712 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)), 

2713 datasets="flat", 

2714 collections="imported_r", 

2715 ), 

2716 Test( 

2717 "tract,detector.raft,detector.name_in_raft", 

2718 "tract,detector", 

2719 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)), 

2720 datasets="flat", 

2721 collections="imported_r", 

2722 ), 

2723 ) 

2724 

2725 for test in test_data: 

2726 order_by = test.order_by.split(",") 

2727 keys = test.keys.split(",") 

2728 query = do_query(keys, test.datasets, test.collections).order_by(*order_by) 

2729 if test.limit is not None: 

2730 query = query.limit(*test.limit) 

2731 dataIds = tuple(tuple(dataId[k] for k in keys) for dataId in query) 

2732 self.assertEqual(dataIds, test.result) 

2733 

2734 # and materialize 

2735 query = do_query(keys).order_by(*order_by) 

2736 if test.limit is not None: 

2737 query = query.limit(*test.limit) 

2738 with query.materialize() as materialized: 

2739 dataIds = tuple(tuple(dataId[k] for k in keys) for dataId in materialized) 

2740 self.assertEqual(dataIds, test.result) 

2741 

2742 # errors in a name 

2743 for order_by in ("", "-"): 

2744 with self.assertRaisesRegex(ValueError, "Empty dimension name in ORDER BY"): 

2745 list(do_query().order_by(order_by)) 

2746 

2747 for order_by in ("undimension.name", "-undimension.name"): 

2748 with self.assertRaisesRegex(ValueError, "Unknown dimension element name 'undimension'"): 

2749 list(do_query().order_by(order_by)) 

2750 

2751 for order_by in ("attract", "-attract"): 

2752 with self.assertRaisesRegex(ValueError, "Metadata 'attract' cannot be found in any dimension"): 

2753 list(do_query().order_by(order_by)) 

2754 

2755 with self.assertRaisesRegex(ValueError, "Metadata 'exposure_time' exists in more than one dimension"): 

2756 list(do_query(("exposure", "visit")).order_by("exposure_time")) 

2757 

2758 with self.assertRaisesRegex(ValueError, "Timespan exists in more than one dimesion"): 

2759 list(do_query(("exposure", "visit")).order_by("timespan.begin")) 

2760 

2761 with self.assertRaisesRegex( 

2762 ValueError, "Cannot find any temporal dimension element for 'timespan.begin'" 

2763 ): 

2764 list(do_query(("tract")).order_by("timespan.begin")) 

2765 

2766 with self.assertRaisesRegex(ValueError, "Cannot use 'timespan.begin' with non-temporal element"): 

2767 list(do_query(("tract")).order_by("tract.timespan.begin")) 

2768 

2769 with self.assertRaisesRegex(ValueError, "Field 'name' does not exist in 'tract'."): 

2770 list(do_query(("tract")).order_by("tract.name")) 

2771 

2772 def testQueryDataIdsGovernorExceptions(self): 

2773 """Test exceptions raised by queryDataIds() for incorrect governors.""" 

2774 registry = self.makeRegistry() 

2775 self.loadData(registry, "base.yaml") 

2776 self.loadData(registry, "datasets.yaml") 

2777 self.loadData(registry, "spatial.yaml") 

2778 

2779 def do_query(dimensions, dataId=None, where=None, bind=None, **kwargs): 

2780 return registry.queryDataIds(dimensions, dataId=dataId, where=where, bind=bind, **kwargs) 

2781 

2782 Test = namedtuple( 

2783 "testQueryDataIdExceptionsTest", 

2784 ("dimensions", "dataId", "where", "bind", "kwargs", "exception", "count"), 

2785 defaults=(None, None, None, {}, None, 0), 

2786 ) 

2787 

2788 test_data = ( 

2789 Test("tract,visit", count=6), 

2790 Test("tract,visit", kwargs={"instrument": "Cam1", "skymap": "SkyMap1"}, count=6), 

2791 Test( 

2792 "tract,visit", kwargs={"instrument": "Cam2", "skymap": "SkyMap1"}, exception=DataIdValueError 

2793 ), 

2794 Test("tract,visit", dataId={"instrument": "Cam1", "skymap": "SkyMap1"}, count=6), 

2795 Test( 

2796 "tract,visit", dataId={"instrument": "Cam1", "skymap": "SkyMap2"}, exception=DataIdValueError 

2797 ), 

2798 Test("tract,visit", where="instrument='Cam1' AND skymap='SkyMap1'", count=6), 

2799 Test("tract,visit", where="instrument='Cam1' AND skymap='SkyMap5'", exception=DataIdValueError), 

2800 Test( 

2801 "tract,visit", 

2802 where="instrument=cam AND skymap=map", 

2803 bind={"cam": "Cam1", "map": "SkyMap1"}, 

2804 count=6, 

2805 ), 

2806 Test( 

2807 "tract,visit", 

2808 where="instrument=cam AND skymap=map", 

2809 bind={"cam": "Cam", "map": "SkyMap"}, 

2810 exception=DataIdValueError, 

2811 ), 

2812 ) 

2813 

2814 for test in test_data: 

2815 dimensions = test.dimensions.split(",") 

2816 if test.exception: 

2817 with self.assertRaises(test.exception): 

2818 do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs).count() 

2819 else: 

2820 query = do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs) 

2821 self.assertEqual(query.count(), test.count) 

2822 

2823 # and materialize 

2824 if test.exception: 

2825 with self.assertRaises(test.exception): 

2826 query = do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs) 

2827 with query.materialize() as materialized: 

2828 materialized.count() 

2829 else: 

2830 query = do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs) 

2831 with query.materialize() as materialized: 

2832 self.assertEqual(materialized.count(), test.count) 

2833 

2834 def testQueryDimensionRecordsOrderBy(self): 

2835 """Test order_by and limit on result returned by 

2836 queryDimensionRecords(). 

2837 """ 

2838 registry = self.makeRegistry() 

2839 self.loadData(registry, "base.yaml") 

2840 self.loadData(registry, "datasets.yaml") 

2841 self.loadData(registry, "spatial.yaml") 

2842 

2843 def do_query(element, datasets=None, collections=None): 

2844 return registry.queryDimensionRecords( 

2845 element, instrument="Cam1", datasets=datasets, collections=collections 

2846 ) 

2847 

2848 query = do_query("detector") 

2849 self.assertEqual(len(list(query)), 4) 

2850 

2851 Test = namedtuple( 

2852 "testQueryDataIdsOrderByTest", 

2853 ("element", "order_by", "result", "limit", "datasets", "collections"), 

2854 defaults=(None, None, None), 

2855 ) 

2856 

2857 test_data = ( 

2858 Test("detector", "detector", (1, 2, 3, 4)), 

2859 Test("detector", "-detector", (4, 3, 2, 1)), 

2860 Test("detector", "raft,-name_in_raft", (2, 1, 4, 3)), 

2861 Test("detector", "-detector.purpose", (4,), limit=(1,)), 

2862 Test("detector", "-purpose,detector.raft,name_in_raft", (2, 3), limit=(2, 2)), 

2863 Test("visit", "visit", (1, 2)), 

2864 Test("visit", "-visit.id", (2, 1)), 

2865 Test("visit", "zenith_angle", (1, 2)), 

2866 Test("visit", "-visit.name", (2, 1)), 

2867 Test("visit", "day_obs,-timespan.begin", (2, 1)), 

2868 ) 

2869 

2870 for test in test_data: 

2871 order_by = test.order_by.split(",") 

2872 query = do_query(test.element).order_by(*order_by) 

2873 if test.limit is not None: 

2874 query = query.limit(*test.limit) 

2875 dataIds = tuple(rec.id for rec in query) 

2876 self.assertEqual(dataIds, test.result) 

2877 

2878 # errors in a name 

2879 for order_by in ("", "-"): 

2880 with self.assertRaisesRegex(ValueError, "Empty dimension name in ORDER BY"): 

2881 list(do_query("detector").order_by(order_by)) 

2882 

2883 for order_by in ("undimension.name", "-undimension.name"): 

2884 with self.assertRaisesRegex(ValueError, "Element name mismatch: 'undimension'"): 

2885 list(do_query("detector").order_by(order_by)) 

2886 

2887 for order_by in ("attract", "-attract"): 

2888 with self.assertRaisesRegex(ValueError, "Field 'attract' does not exist in 'detector'."): 

2889 list(do_query("detector").order_by(order_by)) 

2890 

2891 def testQueryDimensionRecordsExceptions(self): 

2892 """Test exceptions raised by queryDimensionRecords().""" 

2893 registry = self.makeRegistry() 

2894 self.loadData(registry, "base.yaml") 

2895 self.loadData(registry, "datasets.yaml") 

2896 self.loadData(registry, "spatial.yaml") 

2897 

2898 result = registry.queryDimensionRecords("detector") 

2899 self.assertEqual(result.count(), 4) 

2900 result = registry.queryDimensionRecords("detector", instrument="Cam1") 

2901 self.assertEqual(result.count(), 4) 

2902 result = registry.queryDimensionRecords("detector", dataId={"instrument": "Cam1"}) 

2903 self.assertEqual(result.count(), 4) 

2904 result = registry.queryDimensionRecords("detector", where="instrument='Cam1'") 

2905 self.assertEqual(result.count(), 4) 

2906 result = registry.queryDimensionRecords("detector", where="instrument=instr", bind={"instr": "Cam1"}) 

2907 self.assertEqual(result.count(), 4) 

2908 

2909 with self.assertRaisesRegex(DataIdValueError, "dimension instrument"): 

2910 result = registry.queryDimensionRecords("detector", instrument="NotCam1") 

2911 result.count() 

2912 

2913 with self.assertRaisesRegex(DataIdValueError, "dimension instrument"): 

2914 result = registry.queryDimensionRecords("detector", dataId={"instrument": "NotCam1"}) 

2915 result.count() 

2916 

2917 with self.assertRaisesRegex(DataIdValueError, "Unknown values specified for governor dimension"): 

2918 result = registry.queryDimensionRecords("detector", where="instrument='NotCam1'") 

2919 result.count() 

2920 

2921 with self.assertRaisesRegex(DataIdValueError, "Unknown values specified for governor dimension"): 

2922 result = registry.queryDimensionRecords( 

2923 "detector", where="instrument=instr", bind={"instr": "NotCam1"} 

2924 ) 

2925 result.count() 

2926 

2927 def testDatasetConstrainedDimensionRecordQueries(self): 

2928 """Test that queryDimensionRecords works even when given a dataset 

2929 constraint whose dimensions extend beyond the requested dimension 

2930 element's. 

2931 """ 

2932 registry = self.makeRegistry() 

2933 self.loadData(registry, "base.yaml") 

2934 self.loadData(registry, "datasets.yaml") 

2935 # Query for physical_filter dimension records, using a dataset that 

2936 # has both physical_filter and dataset dimensions. 

2937 records = registry.queryDimensionRecords( 

2938 "physical_filter", 

2939 datasets=["flat"], 

2940 collections="imported_r", 

2941 ) 

2942 self.assertEqual({record.name for record in records}, {"Cam1-R1", "Cam1-R2"}) 

2943 # Trying to constrain by all dataset types is an error. 

2944 with self.assertRaises(TypeError): 

2945 list(registry.queryDimensionRecords("physical_filter", datasets=..., collections="imported_r")) 

2946 

2947 def testSkyPixDatasetQueries(self): 

2948 """Test that we can build queries involving skypix dimensions as long 

2949 as a dataset type that uses those dimensions is included. 

2950 """ 

2951 registry = self.makeRegistry() 

2952 self.loadData(registry, "base.yaml") 

2953 dataset_type = DatasetType( 

2954 "a", dimensions=["htm7", "instrument"], universe=registry.dimensions, storageClass="int" 

2955 ) 

2956 registry.registerDatasetType(dataset_type) 

2957 run = "r" 

2958 registry.registerRun(run) 

2959 # First try queries where there are no datasets; the concern is whether 

2960 # we can even build and execute these queries without raising, even 

2961 # when "doomed" query shortcuts are in play. 

2962 self.assertFalse( 

2963 list(registry.queryDataIds(["htm7", "instrument"], datasets=dataset_type, collections=run)) 

2964 ) 

2965 self.assertFalse(list(registry.queryDatasets(dataset_type, collections=run))) 

2966 # Now add a dataset and see that we can get it back. 

2967 htm7 = registry.dimensions.skypix["htm"][7].pixelization 

2968 data_id = registry.expandDataId(instrument="Cam1", htm7=htm7.universe()[0][0]) 

2969 (ref,) = registry.insertDatasets(dataset_type, [data_id], run=run) 

2970 self.assertEqual( 

2971 set(registry.queryDataIds(["htm7", "instrument"], datasets=dataset_type, collections=run)), 

2972 {data_id}, 

2973 ) 

2974 self.assertEqual(set(registry.queryDatasets(dataset_type, collections=run)), {ref}) 

2975 

2976 def testDatasetIdFactory(self): 

2977 """Simple test for DatasetIdFactory, mostly to catch potential changes 

2978 in its API. 

2979 """ 

2980 registry = self.makeRegistry() 

2981 factory = registry.datasetIdFactory 

2982 dataset_type = DatasetType( 

2983 "datasetType", 

2984 dimensions=["detector", "instrument"], 

2985 universe=registry.dimensions, 

2986 storageClass="int", 

2987 ) 

2988 run = "run" 

2989 data_id = DataCoordinate.standardize(instrument="Cam1", detector=1, graph=dataset_type.dimensions) 

2990 

2991 datasetId = factory.makeDatasetId(run, dataset_type, data_id, DatasetIdGenEnum.UNIQUE) 

2992 self.assertIsInstance(datasetId, uuid.UUID) 

2993 self.assertEqual(datasetId.version, 4) 

2994 

2995 datasetId = factory.makeDatasetId(run, dataset_type, data_id, DatasetIdGenEnum.DATAID_TYPE) 

2996 self.assertIsInstance(datasetId, uuid.UUID) 

2997 self.assertEqual(datasetId.version, 5) 

2998 

2999 datasetId = factory.makeDatasetId(run, dataset_type, data_id, DatasetIdGenEnum.DATAID_TYPE_RUN) 

3000 self.assertIsInstance(datasetId, uuid.UUID) 

3001 self.assertEqual(datasetId.version, 5)