Coverage for python/lsst/daf/butler/registry/tests/_registry.py: 5%

1298 statements  

« prev     ^ index     » next       coverage.py v6.4.2, created at 2022-07-19 12:13 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = ["RegistryTests"] 

24 

25import itertools 

26import logging 

27import os 

28import re 

29import unittest 

30import uuid 

31from abc import ABC, abstractmethod 

32from collections import defaultdict, namedtuple 

33from datetime import datetime, timedelta 

34from typing import TYPE_CHECKING, Iterator, Optional, Type, Union 

35 

36import astropy.time 

37import sqlalchemy 

38 

39try: 

40 import numpy as np 

41except ImportError: 

42 np = None 

43 

44import lsst.sphgeom 

45 

46from ...core import ( 

47 DataCoordinate, 

48 DataCoordinateSet, 

49 DatasetAssociation, 

50 DatasetRef, 

51 DatasetType, 

52 DimensionGraph, 

53 NamedValueSet, 

54 StorageClass, 

55 Timespan, 

56 ddl, 

57) 

58from .._collectionType import CollectionType 

59from .._config import RegistryConfig 

60from .._exceptions import ( 

61 ArgumentError, 

62 CollectionError, 

63 CollectionTypeError, 

64 ConflictingDefinitionError, 

65 DataIdValueError, 

66 InconsistentDataIdError, 

67 MissingCollectionError, 

68 OrphanedRecordError, 

69) 

70from ..interfaces import ButlerAttributeExistsError, DatasetIdGenEnum 

71from ..summaries import CollectionSummary 

72 

73if TYPE_CHECKING: 73 ↛ 74line 73 didn't jump to line 74, because the condition on line 73 was never true

74 from .._registry import Registry 

75 

76 

77class RegistryTests(ABC): 

78 """Generic tests for the `Registry` class that can be subclassed to 

79 generate tests for different configurations. 

80 """ 

81 

82 collectionsManager: Optional[str] = None 

83 """Name of the collections manager class, if subclass provides value for 

84 this member then it overrides name specified in default configuration 

85 (`str`). 

86 """ 

87 

88 datasetsManager: Optional[str] = None 

89 """Name of the datasets manager class, if subclass provides value for 

90 this member then it overrides name specified in default configuration 

91 (`str`). 

92 """ 

93 

94 @classmethod 

95 @abstractmethod 

96 def getDataDir(cls) -> str: 

97 """Return the root directory containing test data YAML files.""" 

98 raise NotImplementedError() 

99 

100 def makeRegistryConfig(self) -> RegistryConfig: 

101 """Create RegistryConfig used to create a registry. 

102 

103 This method should be called by a subclass from `makeRegistry`. 

104 Returned instance will be pre-configured based on the values of class 

105 members, and default-configured for all other parameters. Subclasses 

106 that need default configuration should just instantiate 

107 `RegistryConfig` directly. 

108 """ 

109 config = RegistryConfig() 

110 if self.collectionsManager: 

111 config["managers", "collections"] = self.collectionsManager 

112 if self.datasetsManager: 

113 config["managers", "datasets"] = self.datasetsManager 

114 return config 

115 

116 @abstractmethod 

117 def makeRegistry(self, share_repo_with: Optional[Registry] = None) -> Optional[Registry]: 

118 """Return the Registry instance to be tested. 

119 

120 Parameters 

121 ---------- 

122 share_repo_with : `Registry`, optional 

123 If provided, the new registry should point to the same data 

124 repository as this existing registry. 

125 

126 Returns 

127 ------- 

128 registry : `Registry` 

129 New `Registry` instance, or `None` *only* if `share_repo_with` is 

130 not `None` and this test case does not support that argument 

131 (e.g. it is impossible with in-memory SQLite DBs). 

132 """ 

133 raise NotImplementedError() 

134 

135 def loadData(self, registry: Registry, filename: str): 

136 """Load registry test data from ``getDataDir/<filename>``, 

137 which should be a YAML import/export file. 

138 """ 

139 from ...transfers import YamlRepoImportBackend 

140 

141 with open(os.path.join(self.getDataDir(), filename), "r") as stream: 

142 backend = YamlRepoImportBackend(stream, registry) 

143 backend.register() 

144 backend.load(datastore=None) 

145 

146 def checkQueryResults(self, results, expected): 

147 """Check that a query results object contains expected values. 

148 

149 Parameters 

150 ---------- 

151 results : `DataCoordinateQueryResults` or `DatasetQueryResults` 

152 A lazy-evaluation query results object. 

153 expected : `list` 

154 A list of `DataCoordinate` o `DatasetRef` objects that should be 

155 equal to results of the query, aside from ordering. 

156 """ 

157 self.assertCountEqual(list(results), expected) 

158 self.assertEqual(results.count(), len(expected)) 

159 if expected: 

160 self.assertTrue(results.any()) 

161 else: 

162 self.assertFalse(results.any()) 

163 

164 def testOpaque(self): 

165 """Tests for `Registry.registerOpaqueTable`, 

166 `Registry.insertOpaqueData`, `Registry.fetchOpaqueData`, and 

167 `Registry.deleteOpaqueData`. 

168 """ 

169 registry = self.makeRegistry() 

170 table = "opaque_table_for_testing" 

171 registry.registerOpaqueTable( 

172 table, 

173 spec=ddl.TableSpec( 

174 fields=[ 

175 ddl.FieldSpec("id", dtype=sqlalchemy.BigInteger, primaryKey=True), 

176 ddl.FieldSpec("name", dtype=sqlalchemy.String, length=16, nullable=False), 

177 ddl.FieldSpec("count", dtype=sqlalchemy.SmallInteger, nullable=True), 

178 ], 

179 ), 

180 ) 

181 rows = [ 

182 {"id": 1, "name": "one", "count": None}, 

183 {"id": 2, "name": "two", "count": 5}, 

184 {"id": 3, "name": "three", "count": 6}, 

185 ] 

186 registry.insertOpaqueData(table, *rows) 

187 self.assertCountEqual(rows, list(registry.fetchOpaqueData(table))) 

188 self.assertEqual(rows[0:1], list(registry.fetchOpaqueData(table, id=1))) 

189 self.assertEqual(rows[1:2], list(registry.fetchOpaqueData(table, name="two"))) 

190 self.assertEqual(rows[0:1], list(registry.fetchOpaqueData(table, id=(1, 3), name=("one", "two")))) 

191 self.assertEqual(rows, list(registry.fetchOpaqueData(table, id=(1, 2, 3)))) 

192 # Test very long IN clause which exceeds sqlite limit on number of 

193 # parameters. SQLite says the limit is 32k but it looks like it is 

194 # much higher. 

195 self.assertEqual(rows, list(registry.fetchOpaqueData(table, id=list(range(300_000))))) 

196 # Two IN clauses, each longer than 1k batch size, first with 

197 # duplicates, second has matching elements in different batches (after 

198 # sorting). 

199 self.assertEqual( 

200 rows[0:2], 

201 list( 

202 registry.fetchOpaqueData( 

203 table, 

204 id=list(range(1000)) + list(range(100, 0, -1)), 

205 name=["one"] + [f"q{i}" for i in range(2200)] + ["two"], 

206 ) 

207 ), 

208 ) 

209 self.assertEqual([], list(registry.fetchOpaqueData(table, id=1, name="two"))) 

210 registry.deleteOpaqueData(table, id=3) 

211 self.assertCountEqual(rows[:2], list(registry.fetchOpaqueData(table))) 

212 registry.deleteOpaqueData(table) 

213 self.assertEqual([], list(registry.fetchOpaqueData(table))) 

214 

215 def testDatasetType(self): 

216 """Tests for `Registry.registerDatasetType` and 

217 `Registry.getDatasetType`. 

218 """ 

219 registry = self.makeRegistry() 

220 # Check valid insert 

221 datasetTypeName = "test" 

222 storageClass = StorageClass("testDatasetType") 

223 registry.storageClasses.registerStorageClass(storageClass) 

224 dimensions = registry.dimensions.extract(("instrument", "visit")) 

225 differentDimensions = registry.dimensions.extract(("instrument", "patch")) 

226 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

227 # Inserting for the first time should return True 

228 self.assertTrue(registry.registerDatasetType(inDatasetType)) 

229 outDatasetType1 = registry.getDatasetType(datasetTypeName) 

230 self.assertEqual(outDatasetType1, inDatasetType) 

231 

232 # Re-inserting should work 

233 self.assertFalse(registry.registerDatasetType(inDatasetType)) 

234 # Except when they are not identical 

235 with self.assertRaises(ConflictingDefinitionError): 

236 nonIdenticalDatasetType = DatasetType(datasetTypeName, differentDimensions, storageClass) 

237 registry.registerDatasetType(nonIdenticalDatasetType) 

238 

239 # Template can be None 

240 datasetTypeName = "testNoneTemplate" 

241 storageClass = StorageClass("testDatasetType2") 

242 registry.storageClasses.registerStorageClass(storageClass) 

243 dimensions = registry.dimensions.extract(("instrument", "visit")) 

244 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

245 registry.registerDatasetType(inDatasetType) 

246 outDatasetType2 = registry.getDatasetType(datasetTypeName) 

247 self.assertEqual(outDatasetType2, inDatasetType) 

248 

249 allTypes = set(registry.queryDatasetTypes()) 

250 self.assertEqual(allTypes, {outDatasetType1, outDatasetType2}) 

251 

252 def testDimensions(self): 

253 """Tests for `Registry.insertDimensionData`, 

254 `Registry.syncDimensionData`, and `Registry.expandDataId`. 

255 """ 

256 registry = self.makeRegistry() 

257 dimensionName = "instrument" 

258 dimension = registry.dimensions[dimensionName] 

259 dimensionValue = { 

260 "name": "DummyCam", 

261 "visit_max": 10, 

262 "visit_system": 0, 

263 "exposure_max": 10, 

264 "detector_max": 2, 

265 "class_name": "lsst.pipe.base.Instrument", 

266 } 

267 registry.insertDimensionData(dimensionName, dimensionValue) 

268 # Inserting the same value twice should fail 

269 with self.assertRaises(sqlalchemy.exc.IntegrityError): 

270 registry.insertDimensionData(dimensionName, dimensionValue) 

271 # expandDataId should retrieve the record we just inserted 

272 self.assertEqual( 

273 registry.expandDataId(instrument="DummyCam", graph=dimension.graph) 

274 .records[dimensionName] 

275 .toDict(), 

276 dimensionValue, 

277 ) 

278 # expandDataId should raise if there is no record with the given ID. 

279 with self.assertRaises(DataIdValueError): 

280 registry.expandDataId({"instrument": "Unknown"}, graph=dimension.graph) 

281 # band doesn't have a table; insert should fail. 

282 with self.assertRaises(TypeError): 

283 registry.insertDimensionData("band", {"band": "i"}) 

284 dimensionName2 = "physical_filter" 

285 dimension2 = registry.dimensions[dimensionName2] 

286 dimensionValue2 = {"name": "DummyCam_i", "band": "i"} 

287 # Missing required dependency ("instrument") should fail 

288 with self.assertRaises(KeyError): 

289 registry.insertDimensionData(dimensionName2, dimensionValue2) 

290 # Adding required dependency should fix the failure 

291 dimensionValue2["instrument"] = "DummyCam" 

292 registry.insertDimensionData(dimensionName2, dimensionValue2) 

293 # expandDataId should retrieve the record we just inserted. 

294 self.assertEqual( 

295 registry.expandDataId(instrument="DummyCam", physical_filter="DummyCam_i", graph=dimension2.graph) 

296 .records[dimensionName2] 

297 .toDict(), 

298 dimensionValue2, 

299 ) 

300 # Use syncDimensionData to insert a new record successfully. 

301 dimensionName3 = "detector" 

302 dimensionValue3 = { 

303 "instrument": "DummyCam", 

304 "id": 1, 

305 "full_name": "one", 

306 "name_in_raft": "zero", 

307 "purpose": "SCIENCE", 

308 } 

309 self.assertTrue(registry.syncDimensionData(dimensionName3, dimensionValue3)) 

310 # Sync that again. Note that one field ("raft") is NULL, and that 

311 # should be okay. 

312 self.assertFalse(registry.syncDimensionData(dimensionName3, dimensionValue3)) 

313 # Now try that sync with the same primary key but a different value. 

314 # This should fail. 

315 with self.assertRaises(ConflictingDefinitionError): 

316 registry.syncDimensionData( 

317 dimensionName3, 

318 { 

319 "instrument": "DummyCam", 

320 "id": 1, 

321 "full_name": "one", 

322 "name_in_raft": "four", 

323 "purpose": "SCIENCE", 

324 }, 

325 ) 

326 

327 @unittest.skipIf(np is None, "numpy not available.") 

328 def testNumpyDataId(self): 

329 """Test that we can use a numpy int in a dataId.""" 

330 registry = self.makeRegistry() 

331 dimensionEntries = [ 

332 ("instrument", {"instrument": "DummyCam"}), 

333 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}), 

334 # Using an np.int64 here fails unless Records.fromDict is also 

335 # patched to look for numbers.Integral 

336 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}), 

337 ] 

338 for args in dimensionEntries: 

339 registry.insertDimensionData(*args) 

340 

341 # Try a normal integer and something that looks like an int but 

342 # is not. 

343 for visit_id in (42, np.int64(42)): 

344 with self.subTest(visit_id=visit_id, id_type=type(visit_id).__name__): 

345 expanded = registry.expandDataId({"instrument": "DummyCam", "visit": visit_id}) 

346 self.assertEqual(expanded["visit"], int(visit_id)) 

347 self.assertIsInstance(expanded["visit"], int) 

348 

349 def testDataIdRelationships(self): 

350 """Test that `Registry.expandDataId` raises an exception when the given 

351 keys are inconsistent. 

352 """ 

353 registry = self.makeRegistry() 

354 self.loadData(registry, "base.yaml") 

355 # Insert a few more dimension records for the next test. 

356 registry.insertDimensionData( 

357 "exposure", 

358 {"instrument": "Cam1", "id": 1, "obs_id": "one", "physical_filter": "Cam1-G"}, 

359 ) 

360 registry.insertDimensionData( 

361 "exposure", 

362 {"instrument": "Cam1", "id": 2, "obs_id": "two", "physical_filter": "Cam1-G"}, 

363 ) 

364 registry.insertDimensionData( 

365 "visit_system", 

366 {"instrument": "Cam1", "id": 0, "name": "one-to-one"}, 

367 ) 

368 registry.insertDimensionData( 

369 "visit", 

370 {"instrument": "Cam1", "id": 1, "name": "one", "physical_filter": "Cam1-G", "visit_system": 0}, 

371 ) 

372 registry.insertDimensionData( 

373 "visit_definition", 

374 {"instrument": "Cam1", "visit": 1, "exposure": 1, "visit_system": 0}, 

375 ) 

376 with self.assertRaises(InconsistentDataIdError): 

377 registry.expandDataId( 

378 {"instrument": "Cam1", "visit": 1, "exposure": 2}, 

379 ) 

380 

381 def testDataset(self): 

382 """Basic tests for `Registry.insertDatasets`, `Registry.getDataset`, 

383 and `Registry.removeDatasets`. 

384 """ 

385 registry = self.makeRegistry() 

386 self.loadData(registry, "base.yaml") 

387 run = "tésτ" 

388 registry.registerRun(run) 

389 datasetType = registry.getDatasetType("bias") 

390 dataId = {"instrument": "Cam1", "detector": 2} 

391 (ref,) = registry.insertDatasets(datasetType, dataIds=[dataId], run=run) 

392 outRef = registry.getDataset(ref.id) 

393 self.assertIsNotNone(ref.id) 

394 self.assertEqual(ref, outRef) 

395 with self.assertRaises(ConflictingDefinitionError): 

396 registry.insertDatasets(datasetType, dataIds=[dataId], run=run) 

397 registry.removeDatasets([ref]) 

398 self.assertIsNone(registry.findDataset(datasetType, dataId, collections=[run])) 

399 

400 def testFindDataset(self): 

401 """Tests for `Registry.findDataset`.""" 

402 registry = self.makeRegistry() 

403 self.loadData(registry, "base.yaml") 

404 run = "tésτ" 

405 datasetType = registry.getDatasetType("bias") 

406 dataId = {"instrument": "Cam1", "detector": 4} 

407 registry.registerRun(run) 

408 (inputRef,) = registry.insertDatasets(datasetType, dataIds=[dataId], run=run) 

409 outputRef = registry.findDataset(datasetType, dataId, collections=[run]) 

410 self.assertEqual(outputRef, inputRef) 

411 # Check that retrieval with invalid dataId raises 

412 with self.assertRaises(LookupError): 

413 dataId = {"instrument": "Cam1"} # no detector 

414 registry.findDataset(datasetType, dataId, collections=run) 

415 # Check that different dataIds match to different datasets 

416 dataId1 = {"instrument": "Cam1", "detector": 1} 

417 (inputRef1,) = registry.insertDatasets(datasetType, dataIds=[dataId1], run=run) 

418 dataId2 = {"instrument": "Cam1", "detector": 2} 

419 (inputRef2,) = registry.insertDatasets(datasetType, dataIds=[dataId2], run=run) 

420 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef1) 

421 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef2) 

422 self.assertNotEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef2) 

423 self.assertNotEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef1) 

424 # Check that requesting a non-existing dataId returns None 

425 nonExistingDataId = {"instrument": "Cam1", "detector": 3} 

426 self.assertIsNone(registry.findDataset(datasetType, nonExistingDataId, collections=run)) 

427 

428 def testRemoveDatasetTypeSuccess(self): 

429 """Test that Registry.removeDatasetType works when there are no 

430 datasets of that type present. 

431 """ 

432 registry = self.makeRegistry() 

433 self.loadData(registry, "base.yaml") 

434 registry.removeDatasetType("flat") 

435 with self.assertRaises(KeyError): 

436 registry.getDatasetType("flat") 

437 

438 def testRemoveDatasetTypeFailure(self): 

439 """Test that Registry.removeDatasetType raises when there are datasets 

440 of that type present or if the dataset type is for a component. 

441 """ 

442 registry = self.makeRegistry() 

443 self.loadData(registry, "base.yaml") 

444 self.loadData(registry, "datasets.yaml") 

445 with self.assertRaises(OrphanedRecordError): 

446 registry.removeDatasetType("flat") 

447 with self.assertRaises(ValueError): 

448 registry.removeDatasetType(DatasetType.nameWithComponent("flat", "image")) 

449 

450 def testImportDatasetsUUID(self): 

451 """Test for `Registry._importDatasets` with UUID dataset ID.""" 

452 if not self.datasetsManager.endswith(".ByDimensionsDatasetRecordStorageManagerUUID"): 

453 self.skipTest(f"Unexpected dataset manager {self.datasetsManager}") 

454 

455 registry = self.makeRegistry() 

456 self.loadData(registry, "base.yaml") 

457 for run in range(6): 

458 registry.registerRun(f"run{run}") 

459 datasetTypeBias = registry.getDatasetType("bias") 

460 datasetTypeFlat = registry.getDatasetType("flat") 

461 dataIdBias1 = {"instrument": "Cam1", "detector": 1} 

462 dataIdBias2 = {"instrument": "Cam1", "detector": 2} 

463 dataIdFlat1 = {"instrument": "Cam1", "detector": 1, "physical_filter": "Cam1-G", "band": "g"} 

464 

465 dataset_id = uuid.uuid4() 

466 ref = DatasetRef(datasetTypeBias, dataIdBias1, id=dataset_id, run="run0") 

467 (ref1,) = registry._importDatasets([ref]) 

468 # UUID is used without change 

469 self.assertEqual(ref.id, ref1.id) 

470 

471 # All different failure modes 

472 refs = ( 

473 # Importing same DatasetRef with different dataset ID is an error 

474 DatasetRef(datasetTypeBias, dataIdBias1, id=uuid.uuid4(), run="run0"), 

475 # Same DatasetId but different DataId 

476 DatasetRef(datasetTypeBias, dataIdBias2, id=ref1.id, run="run0"), 

477 DatasetRef(datasetTypeFlat, dataIdFlat1, id=ref1.id, run="run0"), 

478 # Same DatasetRef and DatasetId but different run 

479 DatasetRef(datasetTypeBias, dataIdBias1, id=ref1.id, run="run1"), 

480 ) 

481 for ref in refs: 

482 with self.assertRaises(ConflictingDefinitionError): 

483 registry._importDatasets([ref]) 

484 

485 # Test for non-unique IDs, they can be re-imported multiple times. 

486 for run, idGenMode in ((2, DatasetIdGenEnum.DATAID_TYPE), (4, DatasetIdGenEnum.DATAID_TYPE_RUN)): 

487 with self.subTest(idGenMode=idGenMode): 

488 

489 # Use integer dataset ID to force UUID calculation in _import 

490 ref = DatasetRef(datasetTypeBias, dataIdBias1, id=0, run=f"run{run}") 

491 (ref1,) = registry._importDatasets([ref], idGenerationMode=idGenMode) 

492 self.assertIsInstance(ref1.id, uuid.UUID) 

493 self.assertEqual(ref1.id.version, 5) 

494 

495 # Importing it again is OK 

496 (ref2,) = registry._importDatasets([ref1]) 

497 self.assertEqual(ref2.id, ref1.id) 

498 

499 # Cannot import to different run with the same ID 

500 ref = DatasetRef(datasetTypeBias, dataIdBias1, id=ref1.id, run=f"run{run+1}") 

501 with self.assertRaises(ConflictingDefinitionError): 

502 registry._importDatasets([ref]) 

503 

504 ref = DatasetRef(datasetTypeBias, dataIdBias1, id=0, run=f"run{run+1}") 

505 if idGenMode is DatasetIdGenEnum.DATAID_TYPE: 

506 # Cannot import same DATAID_TYPE ref into a new run 

507 with self.assertRaises(ConflictingDefinitionError): 

508 (ref2,) = registry._importDatasets([ref], idGenerationMode=idGenMode) 

509 else: 

510 # DATAID_TYPE_RUN ref can be imported into a new run 

511 (ref2,) = registry._importDatasets([ref], idGenerationMode=idGenMode) 

512 

513 def testImportDatasetsInt(self): 

514 """Test for `Registry._importDatasets` with integer dataset ID.""" 

515 if not self.datasetsManager.endswith(".ByDimensionsDatasetRecordStorageManager"): 

516 self.skipTest(f"Unexpected dataset manager {self.datasetsManager}") 

517 

518 registry = self.makeRegistry() 

519 self.loadData(registry, "base.yaml") 

520 run = "tésτ" 

521 registry.registerRun(run) 

522 datasetTypeBias = registry.getDatasetType("bias") 

523 datasetTypeFlat = registry.getDatasetType("flat") 

524 dataIdBias1 = {"instrument": "Cam1", "detector": 1} 

525 dataIdBias2 = {"instrument": "Cam1", "detector": 2} 

526 dataIdFlat1 = {"instrument": "Cam1", "detector": 1, "physical_filter": "Cam1-G", "band": "g"} 

527 dataset_id = 999999999 

528 

529 ref = DatasetRef(datasetTypeBias, dataIdBias1, id=dataset_id, run=run) 

530 (ref1,) = registry._importDatasets([ref]) 

531 # Should make new integer ID. 

532 self.assertNotEqual(ref1.id, ref.id) 

533 

534 # Ingesting same dataId with different dataset ID is an error 

535 ref2 = ref1.unresolved().resolved(dataset_id, run=run) 

536 with self.assertRaises(ConflictingDefinitionError): 

537 registry._importDatasets([ref2]) 

538 

539 # Ingesting different dataId with the same dataset ID should work 

540 ref3 = DatasetRef(datasetTypeBias, dataIdBias2, id=ref1.id, run=run) 

541 (ref4,) = registry._importDatasets([ref3]) 

542 self.assertNotEqual(ref4.id, ref1.id) 

543 

544 ref3 = DatasetRef(datasetTypeFlat, dataIdFlat1, id=ref1.id, run=run) 

545 (ref4,) = registry._importDatasets([ref3]) 

546 self.assertNotEqual(ref4.id, ref1.id) 

547 

548 def testDatasetTypeComponentQueries(self): 

549 """Test component options when querying for dataset types.""" 

550 registry = self.makeRegistry() 

551 self.loadData(registry, "base.yaml") 

552 self.loadData(registry, "datasets.yaml") 

553 # Test querying for dataset types with different inputs. 

554 # First query for all dataset types; components should only be included 

555 # when components=True. 

556 self.assertEqual({"bias", "flat"}, NamedValueSet(registry.queryDatasetTypes()).names) 

557 self.assertEqual({"bias", "flat"}, NamedValueSet(registry.queryDatasetTypes(components=False)).names) 

558 self.assertLess( 

559 {"bias", "flat", "bias.wcs", "flat.photoCalib"}, 

560 NamedValueSet(registry.queryDatasetTypes(components=True)).names, 

561 ) 

562 # Use a pattern that can match either parent or components. Again, 

563 # components are only returned if components=True. 

564 self.assertEqual({"bias"}, NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"))).names) 

565 self.assertEqual( 

566 {"bias"}, NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"), components=False)).names 

567 ) 

568 self.assertLess( 

569 {"bias", "bias.wcs"}, 

570 NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"), components=True)).names, 

571 ) 

572 # This pattern matches only a component. In this case we also return 

573 # that component dataset type if components=None. 

574 self.assertEqual( 

575 {"bias.wcs"}, NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"))).names 

576 ) 

577 self.assertEqual( 

578 set(), 

579 NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"), components=False)).names, 

580 ) 

581 self.assertEqual( 

582 {"bias.wcs"}, 

583 NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"), components=True)).names, 

584 ) 

585 # Add a dataset type using a StorageClass that we'll then remove; check 

586 # that this does not affect our ability to query for dataset types 

587 # (though it will warn). 

588 tempStorageClass = StorageClass( 

589 name="TempStorageClass", 

590 components={"data", registry.storageClasses.getStorageClass("StructuredDataDict")}, 

591 ) 

592 registry.storageClasses.registerStorageClass(tempStorageClass) 

593 datasetType = DatasetType( 

594 "temporary", 

595 dimensions=["instrument"], 

596 storageClass=tempStorageClass, 

597 universe=registry.dimensions, 

598 ) 

599 registry.registerDatasetType(datasetType) 

600 registry.storageClasses._unregisterStorageClass(tempStorageClass.name) 

601 datasetType._storageClass = None 

602 del tempStorageClass 

603 # Querying for all dataset types, including components, should include 

604 # at least all non-component dataset types (and I don't want to 

605 # enumerate all of the Exposure components for bias and flat here). 

606 with self.assertLogs("lsst.daf.butler.registries", logging.WARN) as cm: 

607 everything = NamedValueSet(registry.queryDatasetTypes(components=True)) 

608 self.assertIn("TempStorageClass", cm.output[0]) 

609 self.assertLess({"bias", "flat", "temporary"}, everything.names) 

610 # It should not include "temporary.columns", because we tried to remove 

611 # the storage class that would tell it about that. So if the next line 

612 # fails (i.e. "temporary.columns" _is_ in everything.names), it means 

613 # this part of the test isn't doing anything, because the _unregister 

614 # call about isn't simulating the real-life case we want it to 

615 # simulate, in which different versions of daf_butler in entirely 

616 # different Python processes interact with the same repo. 

617 self.assertNotIn("temporary.data", everything.names) 

618 # Query for dataset types that start with "temp". This should again 

619 # not include the component, and also not fail. 

620 with self.assertLogs("lsst.daf.butler.registries", logging.WARN) as cm: 

621 startsWithTemp = NamedValueSet(registry.queryDatasetTypes(re.compile("temp.*"))) 

622 self.assertIn("TempStorageClass", cm.output[0]) 

623 self.assertEqual({"temporary"}, startsWithTemp.names) 

624 # Querying with no components should not warn at all. 

625 with self.assertLogs("lsst.daf.butler.registries", logging.WARN) as cm: 

626 startsWithTemp = NamedValueSet(registry.queryDatasetTypes(re.compile("temp.*"), components=False)) 

627 # Must issue a warning of our own to be captured. 

628 logging.getLogger("lsst.daf.butler.registries").warning("test message") 

629 self.assertEqual(len(cm.output), 1) 

630 self.assertIn("test message", cm.output[0]) 

631 

632 def testComponentLookups(self): 

633 """Test searching for component datasets via their parents.""" 

634 registry = self.makeRegistry() 

635 self.loadData(registry, "base.yaml") 

636 self.loadData(registry, "datasets.yaml") 

637 # Test getting the child dataset type (which does still exist in the 

638 # Registry), and check for consistency with 

639 # DatasetRef.makeComponentRef. 

640 collection = "imported_g" 

641 parentType = registry.getDatasetType("bias") 

642 childType = registry.getDatasetType("bias.wcs") 

643 parentRefResolved = registry.findDataset( 

644 parentType, collections=collection, instrument="Cam1", detector=1 

645 ) 

646 self.assertIsInstance(parentRefResolved, DatasetRef) 

647 self.assertEqual(childType, parentRefResolved.makeComponentRef("wcs").datasetType) 

648 # Search for a single dataset with findDataset. 

649 childRef1 = registry.findDataset("bias.wcs", collections=collection, dataId=parentRefResolved.dataId) 

650 self.assertEqual(childRef1, parentRefResolved.makeComponentRef("wcs")) 

651 # Search for detector data IDs constrained by component dataset 

652 # existence with queryDataIds. 

653 dataIds = registry.queryDataIds( 

654 ["detector"], 

655 datasets=["bias.wcs"], 

656 collections=collection, 

657 ).toSet() 

658 self.assertEqual( 

659 dataIds, 

660 DataCoordinateSet( 

661 { 

662 DataCoordinate.standardize(instrument="Cam1", detector=d, graph=parentType.dimensions) 

663 for d in (1, 2, 3) 

664 }, 

665 parentType.dimensions, 

666 ), 

667 ) 

668 # Search for multiple datasets of a single type with queryDatasets. 

669 childRefs2 = set( 

670 registry.queryDatasets( 

671 "bias.wcs", 

672 collections=collection, 

673 ) 

674 ) 

675 self.assertEqual( 

676 {ref.unresolved() for ref in childRefs2}, {DatasetRef(childType, dataId) for dataId in dataIds} 

677 ) 

678 

679 def testCollections(self): 

680 """Tests for registry methods that manage collections.""" 

681 registry = self.makeRegistry() 

682 other_registry = self.makeRegistry(share_repo_with=registry) 

683 self.loadData(registry, "base.yaml") 

684 self.loadData(registry, "datasets.yaml") 

685 run1 = "imported_g" 

686 run2 = "imported_r" 

687 # Test setting a collection docstring after it has been created. 

688 registry.setCollectionDocumentation(run1, "doc for run1") 

689 self.assertEqual(registry.getCollectionDocumentation(run1), "doc for run1") 

690 registry.setCollectionDocumentation(run1, None) 

691 self.assertIsNone(registry.getCollectionDocumentation(run1)) 

692 datasetType = "bias" 

693 # Find some datasets via their run's collection. 

694 dataId1 = {"instrument": "Cam1", "detector": 1} 

695 ref1 = registry.findDataset(datasetType, dataId1, collections=run1) 

696 self.assertIsNotNone(ref1) 

697 dataId2 = {"instrument": "Cam1", "detector": 2} 

698 ref2 = registry.findDataset(datasetType, dataId2, collections=run1) 

699 self.assertIsNotNone(ref2) 

700 # Associate those into a new collection, then look for them there. 

701 tag1 = "tag1" 

702 registry.registerCollection(tag1, type=CollectionType.TAGGED, doc="doc for tag1") 

703 # Check that we can query for old and new collections by type. 

704 self.assertEqual(set(registry.queryCollections(collectionTypes=CollectionType.RUN)), {run1, run2}) 

705 self.assertEqual( 

706 set(registry.queryCollections(collectionTypes={CollectionType.TAGGED, CollectionType.RUN})), 

707 {tag1, run1, run2}, 

708 ) 

709 self.assertEqual(registry.getCollectionDocumentation(tag1), "doc for tag1") 

710 registry.associate(tag1, [ref1, ref2]) 

711 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1) 

712 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

713 # Disassociate one and verify that we can't it there anymore... 

714 registry.disassociate(tag1, [ref1]) 

715 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=tag1)) 

716 # ...but we can still find ref2 in tag1, and ref1 in the run. 

717 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run1), ref1) 

718 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

719 collections = set(registry.queryCollections()) 

720 self.assertEqual(collections, {run1, run2, tag1}) 

721 # Associate both refs into tag1 again; ref2 is already there, but that 

722 # should be a harmless no-op. 

723 registry.associate(tag1, [ref1, ref2]) 

724 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1) 

725 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

726 # Get a different dataset (from a different run) that has the same 

727 # dataset type and data ID as ref2. 

728 ref2b = registry.findDataset(datasetType, dataId2, collections=run2) 

729 self.assertNotEqual(ref2, ref2b) 

730 # Attempting to associate that into tag1 should be an error. 

731 with self.assertRaises(ConflictingDefinitionError): 

732 registry.associate(tag1, [ref2b]) 

733 # That error shouldn't have messed up what we had before. 

734 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1) 

735 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

736 # Attempt to associate the conflicting dataset again, this time with 

737 # a dataset that isn't in the collection and won't cause a conflict. 

738 # Should also fail without modifying anything. 

739 dataId3 = {"instrument": "Cam1", "detector": 3} 

740 ref3 = registry.findDataset(datasetType, dataId3, collections=run1) 

741 with self.assertRaises(ConflictingDefinitionError): 

742 registry.associate(tag1, [ref3, ref2b]) 

743 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1) 

744 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

745 self.assertIsNone(registry.findDataset(datasetType, dataId3, collections=tag1)) 

746 # Register a chained collection that searches [tag1, run2] 

747 chain1 = "chain1" 

748 registry.registerCollection(chain1, type=CollectionType.CHAINED) 

749 self.assertIs(registry.getCollectionType(chain1), CollectionType.CHAINED) 

750 # Chained collection exists, but has no collections in it. 

751 self.assertFalse(registry.getCollectionChain(chain1)) 

752 # If we query for all collections, we should get the chained collection 

753 # only if we don't ask to flatten it (i.e. yield only its children). 

754 self.assertEqual(set(registry.queryCollections(flattenChains=False)), {tag1, run1, run2, chain1}) 

755 self.assertEqual(set(registry.queryCollections(flattenChains=True)), {tag1, run1, run2}) 

756 # Attempt to set its child collections to something circular; that 

757 # should fail. 

758 with self.assertRaises(ValueError): 

759 registry.setCollectionChain(chain1, [tag1, chain1]) 

760 # Add the child collections. 

761 registry.setCollectionChain(chain1, [tag1, run2]) 

762 self.assertEqual(list(registry.getCollectionChain(chain1)), [tag1, run2]) 

763 self.assertEqual(registry.getCollectionParentChains(tag1), {chain1}) 

764 self.assertEqual(registry.getCollectionParentChains(run2), {chain1}) 

765 # Refresh the other registry that points to the same repo, and make 

766 # sure it can see the things we've done (note that this does require 

767 # an explicit refresh(); that's the documented behavior, because 

768 # caching is ~impossible otherwise). 

769 if other_registry is not None: 

770 other_registry.refresh() 

771 self.assertEqual(list(other_registry.getCollectionChain(chain1)), [tag1, run2]) 

772 self.assertEqual(other_registry.getCollectionParentChains(tag1), {chain1}) 

773 self.assertEqual(other_registry.getCollectionParentChains(run2), {chain1}) 

774 # Searching for dataId1 or dataId2 in the chain should return ref1 and 

775 # ref2, because both are in tag1. 

776 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain1), ref1) 

777 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain1), ref2) 

778 # Now disassociate ref2 from tag1. The search (for bias) with 

779 # dataId2 in chain1 should then: 

780 # 1. not find it in tag1 

781 # 2. find a different dataset in run2 

782 registry.disassociate(tag1, [ref2]) 

783 ref2b = registry.findDataset(datasetType, dataId2, collections=chain1) 

784 self.assertNotEqual(ref2b, ref2) 

785 self.assertEqual(ref2b, registry.findDataset(datasetType, dataId2, collections=run2)) 

786 # Define a new chain so we can test recursive chains. 

787 chain2 = "chain2" 

788 registry.registerCollection(chain2, type=CollectionType.CHAINED) 

789 registry.setCollectionChain(chain2, [run2, chain1]) 

790 self.assertEqual(registry.getCollectionParentChains(chain1), {chain2}) 

791 self.assertEqual(registry.getCollectionParentChains(run2), {chain1, chain2}) 

792 # Query for collections matching a regex. 

793 self.assertCountEqual( 

794 list(registry.queryCollections(re.compile("imported_."), flattenChains=False)), 

795 ["imported_r", "imported_g"], 

796 ) 

797 # Query for collections matching a regex or an explicit str. 

798 self.assertCountEqual( 

799 list(registry.queryCollections([re.compile("imported_."), "chain1"], flattenChains=False)), 

800 ["imported_r", "imported_g", "chain1"], 

801 ) 

802 # Search for bias with dataId1 should find it via tag1 in chain2, 

803 # recursing, because is not in run1. 

804 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=run2)) 

805 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain2), ref1) 

806 # Search for bias with dataId2 should find it in run2 (ref2b). 

807 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain2), ref2b) 

808 # Search for a flat that is in run2. That should not be found 

809 # at the front of chain2, because of the restriction to bias 

810 # on run2 there, but it should be found in at the end of chain1. 

811 dataId4 = {"instrument": "Cam1", "detector": 3, "physical_filter": "Cam1-R2"} 

812 ref4 = registry.findDataset("flat", dataId4, collections=run2) 

813 self.assertIsNotNone(ref4) 

814 self.assertEqual(ref4, registry.findDataset("flat", dataId4, collections=chain2)) 

815 # Deleting a collection that's part of a CHAINED collection is not 

816 # allowed, and is exception-safe. 

817 with self.assertRaises(Exception): 

818 registry.removeCollection(run2) 

819 self.assertEqual(registry.getCollectionType(run2), CollectionType.RUN) 

820 with self.assertRaises(Exception): 

821 registry.removeCollection(chain1) 

822 self.assertEqual(registry.getCollectionType(chain1), CollectionType.CHAINED) 

823 # Actually remove chain2, test that it's gone by asking for its type. 

824 registry.removeCollection(chain2) 

825 with self.assertRaises(MissingCollectionError): 

826 registry.getCollectionType(chain2) 

827 # Actually remove run2 and chain1, which should work now. 

828 registry.removeCollection(chain1) 

829 registry.removeCollection(run2) 

830 with self.assertRaises(MissingCollectionError): 

831 registry.getCollectionType(run2) 

832 with self.assertRaises(MissingCollectionError): 

833 registry.getCollectionType(chain1) 

834 # Remove tag1 as well, just to test that we can remove TAGGED 

835 # collections. 

836 registry.removeCollection(tag1) 

837 with self.assertRaises(MissingCollectionError): 

838 registry.getCollectionType(tag1) 

839 

840 def testCollectionChainFlatten(self): 

841 """Test that Registry.setCollectionChain obeys its 'flatten' option.""" 

842 registry = self.makeRegistry() 

843 registry.registerCollection("inner", CollectionType.CHAINED) 

844 registry.registerCollection("innermost", CollectionType.RUN) 

845 registry.setCollectionChain("inner", ["innermost"]) 

846 registry.registerCollection("outer", CollectionType.CHAINED) 

847 registry.setCollectionChain("outer", ["inner"], flatten=False) 

848 self.assertEqual(list(registry.getCollectionChain("outer")), ["inner"]) 

849 registry.setCollectionChain("outer", ["inner"], flatten=True) 

850 self.assertEqual(list(registry.getCollectionChain("outer")), ["innermost"]) 

851 

852 def testBasicTransaction(self): 

853 """Test that all operations within a single transaction block are 

854 rolled back if an exception propagates out of the block. 

855 """ 

856 registry = self.makeRegistry() 

857 storageClass = StorageClass("testDatasetType") 

858 registry.storageClasses.registerStorageClass(storageClass) 

859 with registry.transaction(): 

860 registry.insertDimensionData("instrument", {"name": "Cam1", "class_name": "A"}) 

861 with self.assertRaises(ValueError): 

862 with registry.transaction(): 

863 registry.insertDimensionData("instrument", {"name": "Cam2"}) 

864 raise ValueError("Oops, something went wrong") 

865 # Cam1 should exist 

866 self.assertEqual(registry.expandDataId(instrument="Cam1").records["instrument"].class_name, "A") 

867 # But Cam2 and Cam3 should both not exist 

868 with self.assertRaises(DataIdValueError): 

869 registry.expandDataId(instrument="Cam2") 

870 with self.assertRaises(DataIdValueError): 

871 registry.expandDataId(instrument="Cam3") 

872 

873 def testNestedTransaction(self): 

874 """Test that operations within a transaction block are not rolled back 

875 if an exception propagates out of an inner transaction block and is 

876 then caught. 

877 """ 

878 registry = self.makeRegistry() 

879 dimension = registry.dimensions["instrument"] 

880 dataId1 = {"instrument": "DummyCam"} 

881 dataId2 = {"instrument": "DummyCam2"} 

882 checkpointReached = False 

883 with registry.transaction(): 

884 # This should be added and (ultimately) committed. 

885 registry.insertDimensionData(dimension, dataId1) 

886 with self.assertRaises(sqlalchemy.exc.IntegrityError): 

887 with registry.transaction(savepoint=True): 

888 # This does not conflict, and should succeed (but not 

889 # be committed). 

890 registry.insertDimensionData(dimension, dataId2) 

891 checkpointReached = True 

892 # This should conflict and raise, triggerring a rollback 

893 # of the previous insertion within the same transaction 

894 # context, but not the original insertion in the outer 

895 # block. 

896 registry.insertDimensionData(dimension, dataId1) 

897 self.assertTrue(checkpointReached) 

898 self.assertIsNotNone(registry.expandDataId(dataId1, graph=dimension.graph)) 

899 with self.assertRaises(DataIdValueError): 

900 registry.expandDataId(dataId2, graph=dimension.graph) 

901 

902 def testInstrumentDimensions(self): 

903 """Test queries involving only instrument dimensions, with no joins to 

904 skymap.""" 

905 registry = self.makeRegistry() 

906 

907 # need a bunch of dimensions and datasets for test 

908 registry.insertDimensionData( 

909 "instrument", dict(name="DummyCam", visit_max=25, exposure_max=300, detector_max=6) 

910 ) 

911 registry.insertDimensionData( 

912 "physical_filter", 

913 dict(instrument="DummyCam", name="dummy_r", band="r"), 

914 dict(instrument="DummyCam", name="dummy_i", band="i"), 

915 ) 

916 registry.insertDimensionData( 

917 "detector", *[dict(instrument="DummyCam", id=i, full_name=str(i)) for i in range(1, 6)] 

918 ) 

919 registry.insertDimensionData( 

920 "visit_system", 

921 dict(instrument="DummyCam", id=1, name="default"), 

922 ) 

923 registry.insertDimensionData( 

924 "visit", 

925 dict(instrument="DummyCam", id=10, name="ten", physical_filter="dummy_i", visit_system=1), 

926 dict(instrument="DummyCam", id=11, name="eleven", physical_filter="dummy_r", visit_system=1), 

927 dict(instrument="DummyCam", id=20, name="twelve", physical_filter="dummy_r", visit_system=1), 

928 ) 

929 registry.insertDimensionData( 

930 "exposure", 

931 dict(instrument="DummyCam", id=100, obs_id="100", physical_filter="dummy_i"), 

932 dict(instrument="DummyCam", id=101, obs_id="101", physical_filter="dummy_i"), 

933 dict(instrument="DummyCam", id=110, obs_id="110", physical_filter="dummy_r"), 

934 dict(instrument="DummyCam", id=111, obs_id="111", physical_filter="dummy_r"), 

935 dict(instrument="DummyCam", id=200, obs_id="200", physical_filter="dummy_r"), 

936 dict(instrument="DummyCam", id=201, obs_id="201", physical_filter="dummy_r"), 

937 ) 

938 registry.insertDimensionData( 

939 "visit_definition", 

940 dict(instrument="DummyCam", exposure=100, visit_system=1, visit=10), 

941 dict(instrument="DummyCam", exposure=101, visit_system=1, visit=10), 

942 dict(instrument="DummyCam", exposure=110, visit_system=1, visit=11), 

943 dict(instrument="DummyCam", exposure=111, visit_system=1, visit=11), 

944 dict(instrument="DummyCam", exposure=200, visit_system=1, visit=20), 

945 dict(instrument="DummyCam", exposure=201, visit_system=1, visit=20), 

946 ) 

947 # dataset types 

948 run1 = "test1_r" 

949 run2 = "test2_r" 

950 tagged2 = "test2_t" 

951 registry.registerRun(run1) 

952 registry.registerRun(run2) 

953 registry.registerCollection(tagged2) 

954 storageClass = StorageClass("testDataset") 

955 registry.storageClasses.registerStorageClass(storageClass) 

956 rawType = DatasetType( 

957 name="RAW", 

958 dimensions=registry.dimensions.extract(("instrument", "exposure", "detector")), 

959 storageClass=storageClass, 

960 ) 

961 registry.registerDatasetType(rawType) 

962 calexpType = DatasetType( 

963 name="CALEXP", 

964 dimensions=registry.dimensions.extract(("instrument", "visit", "detector")), 

965 storageClass=storageClass, 

966 ) 

967 registry.registerDatasetType(calexpType) 

968 

969 # add pre-existing datasets 

970 for exposure in (100, 101, 110, 111): 

971 for detector in (1, 2, 3): 

972 # note that only 3 of 5 detectors have datasets 

973 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector) 

974 (ref,) = registry.insertDatasets(rawType, dataIds=[dataId], run=run1) 

975 # exposures 100 and 101 appear in both run1 and tagged2. 

976 # 100 has different datasets in the different collections 

977 # 101 has the same dataset in both collections. 

978 if exposure == 100: 

979 (ref,) = registry.insertDatasets(rawType, dataIds=[dataId], run=run2) 

980 if exposure in (100, 101): 

981 registry.associate(tagged2, [ref]) 

982 # Add pre-existing datasets to tagged2. 

983 for exposure in (200, 201): 

984 for detector in (3, 4, 5): 

985 # note that only 3 of 5 detectors have datasets 

986 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector) 

987 (ref,) = registry.insertDatasets(rawType, dataIds=[dataId], run=run2) 

988 registry.associate(tagged2, [ref]) 

989 

990 dimensions = DimensionGraph( 

991 registry.dimensions, dimensions=(rawType.dimensions.required | calexpType.dimensions.required) 

992 ) 

993 # Test that single dim string works as well as list of str 

994 rows = registry.queryDataIds("visit", datasets=rawType, collections=run1).expanded().toSet() 

995 rowsI = registry.queryDataIds(["visit"], datasets=rawType, collections=run1).expanded().toSet() 

996 self.assertEqual(rows, rowsI) 

997 # with empty expression 

998 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1).expanded().toSet() 

999 self.assertEqual(len(rows), 4 * 3) # 4 exposures times 3 detectors 

1000 for dataId in rows: 

1001 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit")) 

1002 packer1 = registry.dimensions.makePacker("visit_detector", dataId) 

1003 packer2 = registry.dimensions.makePacker("exposure_detector", dataId) 

1004 self.assertEqual( 

1005 packer1.unpack(packer1.pack(dataId)), 

1006 DataCoordinate.standardize(dataId, graph=packer1.dimensions), 

1007 ) 

1008 self.assertEqual( 

1009 packer2.unpack(packer2.pack(dataId)), 

1010 DataCoordinate.standardize(dataId, graph=packer2.dimensions), 

1011 ) 

1012 self.assertNotEqual(packer1.pack(dataId), packer2.pack(dataId)) 

1013 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101, 110, 111)) 

1014 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 11)) 

1015 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3)) 

1016 

1017 # second collection 

1018 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=tagged2).toSet() 

1019 self.assertEqual(len(rows), 4 * 3) # 4 exposures times 3 detectors 

1020 for dataId in rows: 

1021 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit")) 

1022 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101, 200, 201)) 

1023 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 20)) 

1024 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3, 4, 5)) 

1025 

1026 # with two input datasets 

1027 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=[run1, tagged2]).toSet() 

1028 self.assertEqual(len(set(rows)), 6 * 3) # 6 exposures times 3 detectors; set needed to de-dupe 

1029 for dataId in rows: 

1030 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit")) 

1031 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101, 110, 111, 200, 201)) 

1032 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 11, 20)) 

1033 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3, 4, 5)) 

1034 

1035 # limit to single visit 

1036 rows = registry.queryDataIds( 

1037 dimensions, datasets=rawType, collections=run1, where="visit = 10", instrument="DummyCam" 

1038 ).toSet() 

1039 self.assertEqual(len(rows), 2 * 3) # 2 exposures times 3 detectors 

1040 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101)) 

1041 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10,)) 

1042 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3)) 

1043 

1044 # more limiting expression, using link names instead of Table.column 

1045 rows = registry.queryDataIds( 

1046 dimensions, 

1047 datasets=rawType, 

1048 collections=run1, 

1049 where="visit = 10 and detector > 1 and 'DummyCam'=instrument", 

1050 ).toSet() 

1051 self.assertEqual(len(rows), 2 * 2) # 2 exposures times 2 detectors 

1052 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101)) 

1053 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10,)) 

1054 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (2, 3)) 

1055 

1056 # queryDataIds with only one of `datasets` and `collections` is an 

1057 # error. 

1058 with self.assertRaises(CollectionError): 

1059 registry.queryDataIds(dimensions, datasets=rawType) 

1060 with self.assertRaises(ArgumentError): 

1061 registry.queryDataIds(dimensions, collections=run1) 

1062 

1063 # expression excludes everything 

1064 rows = registry.queryDataIds( 

1065 dimensions, datasets=rawType, collections=run1, where="visit > 1000", instrument="DummyCam" 

1066 ).toSet() 

1067 self.assertEqual(len(rows), 0) 

1068 

1069 # Selecting by physical_filter, this is not in the dimensions, but it 

1070 # is a part of the full expression so it should work too. 

1071 rows = registry.queryDataIds( 

1072 dimensions, 

1073 datasets=rawType, 

1074 collections=run1, 

1075 where="physical_filter = 'dummy_r'", 

1076 instrument="DummyCam", 

1077 ).toSet() 

1078 self.assertEqual(len(rows), 2 * 3) # 2 exposures times 3 detectors 

1079 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (110, 111)) 

1080 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (11,)) 

1081 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3)) 

1082 

1083 def testSkyMapDimensions(self): 

1084 """Tests involving only skymap dimensions, no joins to instrument.""" 

1085 registry = self.makeRegistry() 

1086 

1087 # need a bunch of dimensions and datasets for test, we want 

1088 # "band" in the test so also have to add physical_filter 

1089 # dimensions 

1090 registry.insertDimensionData("instrument", dict(instrument="DummyCam")) 

1091 registry.insertDimensionData( 

1092 "physical_filter", 

1093 dict(instrument="DummyCam", name="dummy_r", band="r"), 

1094 dict(instrument="DummyCam", name="dummy_i", band="i"), 

1095 ) 

1096 registry.insertDimensionData("skymap", dict(name="DummyMap", hash="sha!".encode("utf8"))) 

1097 for tract in range(10): 

1098 registry.insertDimensionData("tract", dict(skymap="DummyMap", id=tract)) 

1099 registry.insertDimensionData( 

1100 "patch", 

1101 *[dict(skymap="DummyMap", tract=tract, id=patch, cell_x=0, cell_y=0) for patch in range(10)], 

1102 ) 

1103 

1104 # dataset types 

1105 run = "tésτ" 

1106 registry.registerRun(run) 

1107 storageClass = StorageClass("testDataset") 

1108 registry.storageClasses.registerStorageClass(storageClass) 

1109 calexpType = DatasetType( 

1110 name="deepCoadd_calexp", 

1111 dimensions=registry.dimensions.extract(("skymap", "tract", "patch", "band")), 

1112 storageClass=storageClass, 

1113 ) 

1114 registry.registerDatasetType(calexpType) 

1115 mergeType = DatasetType( 

1116 name="deepCoadd_mergeDet", 

1117 dimensions=registry.dimensions.extract(("skymap", "tract", "patch")), 

1118 storageClass=storageClass, 

1119 ) 

1120 registry.registerDatasetType(mergeType) 

1121 measType = DatasetType( 

1122 name="deepCoadd_meas", 

1123 dimensions=registry.dimensions.extract(("skymap", "tract", "patch", "band")), 

1124 storageClass=storageClass, 

1125 ) 

1126 registry.registerDatasetType(measType) 

1127 

1128 dimensions = DimensionGraph( 

1129 registry.dimensions, 

1130 dimensions=( 

1131 calexpType.dimensions.required | mergeType.dimensions.required | measType.dimensions.required 

1132 ), 

1133 ) 

1134 

1135 # add pre-existing datasets 

1136 for tract in (1, 3, 5): 

1137 for patch in (2, 4, 6, 7): 

1138 dataId = dict(skymap="DummyMap", tract=tract, patch=patch) 

1139 registry.insertDatasets(mergeType, dataIds=[dataId], run=run) 

1140 for aFilter in ("i", "r"): 

1141 dataId = dict(skymap="DummyMap", tract=tract, patch=patch, band=aFilter) 

1142 registry.insertDatasets(calexpType, dataIds=[dataId], run=run) 

1143 

1144 # with empty expression 

1145 rows = registry.queryDataIds(dimensions, datasets=[calexpType, mergeType], collections=run).toSet() 

1146 self.assertEqual(len(rows), 3 * 4 * 2) # 4 tracts x 4 patches x 2 filters 

1147 for dataId in rows: 

1148 self.assertCountEqual(dataId.keys(), ("skymap", "tract", "patch", "band")) 

1149 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 3, 5)) 

1150 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 4, 6, 7)) 

1151 self.assertCountEqual(set(dataId["band"] for dataId in rows), ("i", "r")) 

1152 

1153 # limit to 2 tracts and 2 patches 

1154 rows = registry.queryDataIds( 

1155 dimensions, 

1156 datasets=[calexpType, mergeType], 

1157 collections=run, 

1158 where="tract IN (1, 5) AND patch IN (2, 7)", 

1159 skymap="DummyMap", 

1160 ).toSet() 

1161 self.assertEqual(len(rows), 2 * 2 * 2) # 2 tracts x 2 patches x 2 filters 

1162 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 5)) 

1163 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 7)) 

1164 self.assertCountEqual(set(dataId["band"] for dataId in rows), ("i", "r")) 

1165 

1166 # limit to single filter 

1167 rows = registry.queryDataIds( 

1168 dimensions, datasets=[calexpType, mergeType], collections=run, where="band = 'i'" 

1169 ).toSet() 

1170 self.assertEqual(len(rows), 3 * 4 * 1) # 4 tracts x 4 patches x 2 filters 

1171 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 3, 5)) 

1172 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 4, 6, 7)) 

1173 self.assertCountEqual(set(dataId["band"] for dataId in rows), ("i",)) 

1174 

1175 # Specifying non-existing skymap is an exception 

1176 with self.assertRaisesRegex(DataIdValueError, "Unknown values specified for governor dimension"): 

1177 rows = registry.queryDataIds( 

1178 dimensions, datasets=[calexpType, mergeType], collections=run, where="skymap = 'Mars'" 

1179 ).toSet() 

1180 

1181 def testSpatialJoin(self): 

1182 """Test queries that involve spatial overlap joins.""" 

1183 registry = self.makeRegistry() 

1184 self.loadData(registry, "hsc-rc2-subset.yaml") 

1185 

1186 # Dictionary of spatial DatabaseDimensionElements, keyed by the name of 

1187 # the TopologicalFamily they belong to. We'll relate all elements in 

1188 # each family to all of the elements in each other family. 

1189 families = defaultdict(set) 

1190 # Dictionary of {element.name: {dataId: region}}. 

1191 regions = {} 

1192 for element in registry.dimensions.getDatabaseElements(): 

1193 if element.spatial is not None: 

1194 families[element.spatial.name].add(element) 

1195 regions[element.name] = { 

1196 record.dataId: record.region for record in registry.queryDimensionRecords(element) 

1197 } 

1198 

1199 # If this check fails, it's not necessarily a problem - it may just be 

1200 # a reasonable change to the default dimension definitions - but the 

1201 # test below depends on there being more than one family to do anything 

1202 # useful. 

1203 self.assertEqual(len(families), 2) 

1204 

1205 # Overlap DatabaseDimensionElements with each other. 

1206 for family1, family2 in itertools.combinations(families, 2): 

1207 for element1, element2 in itertools.product(families[family1], families[family2]): 

1208 graph = DimensionGraph.union(element1.graph, element2.graph) 

1209 # Construct expected set of overlapping data IDs via a 

1210 # brute-force comparison of the regions we've already fetched. 

1211 expected = { 

1212 DataCoordinate.standardize({**dataId1.byName(), **dataId2.byName()}, graph=graph) 

1213 for (dataId1, region1), (dataId2, region2) in itertools.product( 

1214 regions[element1.name].items(), regions[element2.name].items() 

1215 ) 

1216 if not region1.isDisjointFrom(region2) 

1217 } 

1218 self.assertGreater(len(expected), 2, msg="Test that we aren't just comparing empty sets.") 

1219 queried = set(registry.queryDataIds(graph)) 

1220 self.assertEqual(expected, queried) 

1221 

1222 # Overlap each DatabaseDimensionElement with the commonSkyPix system. 

1223 commonSkyPix = registry.dimensions.commonSkyPix 

1224 for elementName, regions in regions.items(): 

1225 graph = DimensionGraph.union(registry.dimensions[elementName].graph, commonSkyPix.graph) 

1226 expected = set() 

1227 for dataId, region in regions.items(): 

1228 for begin, end in commonSkyPix.pixelization.envelope(region): 

1229 expected.update( 

1230 DataCoordinate.standardize({commonSkyPix.name: index, **dataId.byName()}, graph=graph) 

1231 for index in range(begin, end) 

1232 ) 

1233 self.assertGreater(len(expected), 2, msg="Test that we aren't just comparing empty sets.") 

1234 queried = set(registry.queryDataIds(graph)) 

1235 self.assertEqual(expected, queried) 

1236 

1237 def testAbstractQuery(self): 

1238 """Test that we can run a query that just lists the known 

1239 bands. This is tricky because band is 

1240 backed by a query against physical_filter. 

1241 """ 

1242 registry = self.makeRegistry() 

1243 registry.insertDimensionData("instrument", dict(name="DummyCam")) 

1244 registry.insertDimensionData( 

1245 "physical_filter", 

1246 dict(instrument="DummyCam", name="dummy_i", band="i"), 

1247 dict(instrument="DummyCam", name="dummy_i2", band="i"), 

1248 dict(instrument="DummyCam", name="dummy_r", band="r"), 

1249 ) 

1250 rows = registry.queryDataIds(["band"]).toSet() 

1251 self.assertCountEqual( 

1252 rows, 

1253 [ 

1254 DataCoordinate.standardize(band="i", universe=registry.dimensions), 

1255 DataCoordinate.standardize(band="r", universe=registry.dimensions), 

1256 ], 

1257 ) 

1258 

1259 def testAttributeManager(self): 

1260 """Test basic functionality of attribute manager.""" 

1261 # number of attributes with schema versions in a fresh database, 

1262 # 6 managers with 3 records per manager, plus config for dimensions 

1263 VERSION_COUNT = 6 * 3 + 1 

1264 

1265 registry = self.makeRegistry() 

1266 attributes = registry._managers.attributes 

1267 

1268 # check what get() returns for non-existing key 

1269 self.assertIsNone(attributes.get("attr")) 

1270 self.assertEqual(attributes.get("attr", ""), "") 

1271 self.assertEqual(attributes.get("attr", "Value"), "Value") 

1272 self.assertEqual(len(list(attributes.items())), VERSION_COUNT) 

1273 

1274 # cannot store empty key or value 

1275 with self.assertRaises(ValueError): 

1276 attributes.set("", "value") 

1277 with self.assertRaises(ValueError): 

1278 attributes.set("attr", "") 

1279 

1280 # set value of non-existing key 

1281 attributes.set("attr", "value") 

1282 self.assertEqual(len(list(attributes.items())), VERSION_COUNT + 1) 

1283 self.assertEqual(attributes.get("attr"), "value") 

1284 

1285 # update value of existing key 

1286 with self.assertRaises(ButlerAttributeExistsError): 

1287 attributes.set("attr", "value2") 

1288 

1289 attributes.set("attr", "value2", force=True) 

1290 self.assertEqual(len(list(attributes.items())), VERSION_COUNT + 1) 

1291 self.assertEqual(attributes.get("attr"), "value2") 

1292 

1293 # delete existing key 

1294 self.assertTrue(attributes.delete("attr")) 

1295 self.assertEqual(len(list(attributes.items())), VERSION_COUNT) 

1296 

1297 # delete non-existing key 

1298 self.assertFalse(attributes.delete("non-attr")) 

1299 

1300 # store bunch of keys and get the list back 

1301 data = [ 

1302 ("version.core", "1.2.3"), 

1303 ("version.dimensions", "3.2.1"), 

1304 ("config.managers.opaque", "ByNameOpaqueTableStorageManager"), 

1305 ] 

1306 for key, value in data: 

1307 attributes.set(key, value) 

1308 items = dict(attributes.items()) 

1309 for key, value in data: 

1310 self.assertEqual(items[key], value) 

1311 

1312 def testQueryDatasetsDeduplication(self): 

1313 """Test that the findFirst option to queryDatasets selects datasets 

1314 from collections in the order given". 

1315 """ 

1316 registry = self.makeRegistry() 

1317 self.loadData(registry, "base.yaml") 

1318 self.loadData(registry, "datasets.yaml") 

1319 self.assertCountEqual( 

1320 list(registry.queryDatasets("bias", collections=["imported_g", "imported_r"])), 

1321 [ 

1322 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"), 

1323 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g"), 

1324 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g"), 

1325 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r"), 

1326 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r"), 

1327 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

1328 ], 

1329 ) 

1330 self.assertCountEqual( 

1331 list(registry.queryDatasets("bias", collections=["imported_g", "imported_r"], findFirst=True)), 

1332 [ 

1333 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"), 

1334 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g"), 

1335 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g"), 

1336 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

1337 ], 

1338 ) 

1339 self.assertCountEqual( 

1340 list(registry.queryDatasets("bias", collections=["imported_r", "imported_g"], findFirst=True)), 

1341 [ 

1342 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"), 

1343 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r"), 

1344 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r"), 

1345 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

1346 ], 

1347 ) 

1348 

1349 def testQueryResults(self): 

1350 """Test querying for data IDs and then manipulating the QueryResults 

1351 object returned to perform other queries. 

1352 """ 

1353 registry = self.makeRegistry() 

1354 self.loadData(registry, "base.yaml") 

1355 self.loadData(registry, "datasets.yaml") 

1356 bias = registry.getDatasetType("bias") 

1357 flat = registry.getDatasetType("flat") 

1358 # Obtain expected results from methods other than those we're testing 

1359 # here. That includes: 

1360 # - the dimensions of the data IDs we want to query: 

1361 expectedGraph = DimensionGraph(registry.dimensions, names=["detector", "physical_filter"]) 

1362 # - the dimensions of some other data IDs we'll extract from that: 

1363 expectedSubsetGraph = DimensionGraph(registry.dimensions, names=["detector"]) 

1364 # - the data IDs we expect to obtain from the first queries: 

1365 expectedDataIds = DataCoordinateSet( 

1366 { 

1367 DataCoordinate.standardize( 

1368 instrument="Cam1", detector=d, physical_filter=p, universe=registry.dimensions 

1369 ) 

1370 for d, p in itertools.product({1, 2, 3}, {"Cam1-G", "Cam1-R1", "Cam1-R2"}) 

1371 }, 

1372 graph=expectedGraph, 

1373 hasFull=False, 

1374 hasRecords=False, 

1375 ) 

1376 # - the flat datasets we expect to find from those data IDs, in just 

1377 # one collection (so deduplication is irrelevant): 

1378 expectedFlats = [ 

1379 registry.findDataset( 

1380 flat, instrument="Cam1", detector=1, physical_filter="Cam1-R1", collections="imported_r" 

1381 ), 

1382 registry.findDataset( 

1383 flat, instrument="Cam1", detector=2, physical_filter="Cam1-R1", collections="imported_r" 

1384 ), 

1385 registry.findDataset( 

1386 flat, instrument="Cam1", detector=3, physical_filter="Cam1-R2", collections="imported_r" 

1387 ), 

1388 ] 

1389 # - the data IDs we expect to extract from that: 

1390 expectedSubsetDataIds = expectedDataIds.subset(expectedSubsetGraph) 

1391 # - the bias datasets we expect to find from those data IDs, after we 

1392 # subset-out the physical_filter dimension, both with duplicates: 

1393 expectedAllBiases = [ 

1394 registry.findDataset(bias, instrument="Cam1", detector=1, collections="imported_g"), 

1395 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_g"), 

1396 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_g"), 

1397 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_r"), 

1398 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_r"), 

1399 ] 

1400 # - ...and without duplicates: 

1401 expectedDeduplicatedBiases = [ 

1402 registry.findDataset(bias, instrument="Cam1", detector=1, collections="imported_g"), 

1403 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_r"), 

1404 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_r"), 

1405 ] 

1406 # Test against those expected results, using a "lazy" query for the 

1407 # data IDs (which re-executes that query each time we use it to do 

1408 # something new). 

1409 dataIds = registry.queryDataIds( 

1410 ["detector", "physical_filter"], 

1411 where="detector.purpose = 'SCIENCE'", # this rejects detector=4 

1412 instrument="Cam1", 

1413 ) 

1414 self.assertEqual(dataIds.graph, expectedGraph) 

1415 self.assertEqual(dataIds.toSet(), expectedDataIds) 

1416 self.assertCountEqual( 

1417 list( 

1418 dataIds.findDatasets( 

1419 flat, 

1420 collections=["imported_r"], 

1421 ) 

1422 ), 

1423 expectedFlats, 

1424 ) 

1425 subsetDataIds = dataIds.subset(expectedSubsetGraph, unique=True) 

1426 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph) 

1427 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds) 

1428 self.assertCountEqual( 

1429 list(subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=False)), 

1430 expectedAllBiases, 

1431 ) 

1432 self.assertCountEqual( 

1433 list(subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=True)), 

1434 expectedDeduplicatedBiases, 

1435 ) 

1436 # Materialize the bias dataset queries (only) by putting the results 

1437 # into temporary tables, then repeat those tests. 

1438 with subsetDataIds.findDatasets( 

1439 bias, collections=["imported_r", "imported_g"], findFirst=False 

1440 ).materialize() as biases: 

1441 self.assertCountEqual(list(biases), expectedAllBiases) 

1442 with subsetDataIds.findDatasets( 

1443 bias, collections=["imported_r", "imported_g"], findFirst=True 

1444 ).materialize() as biases: 

1445 self.assertCountEqual(list(biases), expectedDeduplicatedBiases) 

1446 # Materialize the data ID subset query, but not the dataset queries. 

1447 with subsetDataIds.materialize() as subsetDataIds: 

1448 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph) 

1449 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds) 

1450 self.assertCountEqual( 

1451 list( 

1452 subsetDataIds.findDatasets( 

1453 bias, collections=["imported_r", "imported_g"], findFirst=False 

1454 ) 

1455 ), 

1456 expectedAllBiases, 

1457 ) 

1458 self.assertCountEqual( 

1459 list( 

1460 subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=True) 

1461 ), 

1462 expectedDeduplicatedBiases, 

1463 ) 

1464 # Materialize the dataset queries, too. 

1465 with subsetDataIds.findDatasets( 

1466 bias, collections=["imported_r", "imported_g"], findFirst=False 

1467 ).materialize() as biases: 

1468 self.assertCountEqual(list(biases), expectedAllBiases) 

1469 with subsetDataIds.findDatasets( 

1470 bias, collections=["imported_r", "imported_g"], findFirst=True 

1471 ).materialize() as biases: 

1472 self.assertCountEqual(list(biases), expectedDeduplicatedBiases) 

1473 # Materialize the original query, but none of the follow-up queries. 

1474 with dataIds.materialize() as dataIds: 

1475 self.assertEqual(dataIds.graph, expectedGraph) 

1476 self.assertEqual(dataIds.toSet(), expectedDataIds) 

1477 self.assertCountEqual( 

1478 list( 

1479 dataIds.findDatasets( 

1480 flat, 

1481 collections=["imported_r"], 

1482 ) 

1483 ), 

1484 expectedFlats, 

1485 ) 

1486 subsetDataIds = dataIds.subset(expectedSubsetGraph, unique=True) 

1487 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph) 

1488 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds) 

1489 self.assertCountEqual( 

1490 list( 

1491 subsetDataIds.findDatasets( 

1492 bias, collections=["imported_r", "imported_g"], findFirst=False 

1493 ) 

1494 ), 

1495 expectedAllBiases, 

1496 ) 

1497 self.assertCountEqual( 

1498 list( 

1499 subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=True) 

1500 ), 

1501 expectedDeduplicatedBiases, 

1502 ) 

1503 # Materialize just the bias dataset queries. 

1504 with subsetDataIds.findDatasets( 

1505 bias, collections=["imported_r", "imported_g"], findFirst=False 

1506 ).materialize() as biases: 

1507 self.assertCountEqual(list(biases), expectedAllBiases) 

1508 with subsetDataIds.findDatasets( 

1509 bias, collections=["imported_r", "imported_g"], findFirst=True 

1510 ).materialize() as biases: 

1511 self.assertCountEqual(list(biases), expectedDeduplicatedBiases) 

1512 # Materialize the subset data ID query, but not the dataset 

1513 # queries. 

1514 with subsetDataIds.materialize() as subsetDataIds: 

1515 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph) 

1516 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds) 

1517 self.assertCountEqual( 

1518 list( 

1519 subsetDataIds.findDatasets( 

1520 bias, collections=["imported_r", "imported_g"], findFirst=False 

1521 ) 

1522 ), 

1523 expectedAllBiases, 

1524 ) 

1525 self.assertCountEqual( 

1526 list( 

1527 subsetDataIds.findDatasets( 

1528 bias, collections=["imported_r", "imported_g"], findFirst=True 

1529 ) 

1530 ), 

1531 expectedDeduplicatedBiases, 

1532 ) 

1533 # Materialize the bias dataset queries, too, so now we're 

1534 # materializing every single step. 

1535 with subsetDataIds.findDatasets( 

1536 bias, collections=["imported_r", "imported_g"], findFirst=False 

1537 ).materialize() as biases: 

1538 self.assertCountEqual(list(biases), expectedAllBiases) 

1539 with subsetDataIds.findDatasets( 

1540 bias, collections=["imported_r", "imported_g"], findFirst=True 

1541 ).materialize() as biases: 

1542 self.assertCountEqual(list(biases), expectedDeduplicatedBiases) 

1543 

1544 def testEmptyDimensionsQueries(self): 

1545 """Test Query and QueryResults objects in the case where there are no 

1546 dimensions. 

1547 """ 

1548 # Set up test data: one dataset type, two runs, one dataset in each. 

1549 registry = self.makeRegistry() 

1550 self.loadData(registry, "base.yaml") 

1551 schema = DatasetType("schema", dimensions=registry.dimensions.empty, storageClass="Catalog") 

1552 registry.registerDatasetType(schema) 

1553 dataId = DataCoordinate.makeEmpty(registry.dimensions) 

1554 run1 = "run1" 

1555 run2 = "run2" 

1556 registry.registerRun(run1) 

1557 registry.registerRun(run2) 

1558 (dataset1,) = registry.insertDatasets(schema, dataIds=[dataId], run=run1) 

1559 (dataset2,) = registry.insertDatasets(schema, dataIds=[dataId], run=run2) 

1560 # Query directly for both of the datasets, and each one, one at a time. 

1561 self.checkQueryResults( 

1562 registry.queryDatasets(schema, collections=[run1, run2], findFirst=False), [dataset1, dataset2] 

1563 ) 

1564 self.checkQueryResults( 

1565 registry.queryDatasets(schema, collections=[run1, run2], findFirst=True), 

1566 [dataset1], 

1567 ) 

1568 self.checkQueryResults( 

1569 registry.queryDatasets(schema, collections=[run2, run1], findFirst=True), 

1570 [dataset2], 

1571 ) 

1572 # Query for data IDs with no dimensions. 

1573 dataIds = registry.queryDataIds([]) 

1574 self.checkQueryResults(dataIds, [dataId]) 

1575 # Use queried data IDs to find the datasets. 

1576 self.checkQueryResults( 

1577 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False), 

1578 [dataset1, dataset2], 

1579 ) 

1580 self.checkQueryResults( 

1581 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), 

1582 [dataset1], 

1583 ) 

1584 self.checkQueryResults( 

1585 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), 

1586 [dataset2], 

1587 ) 

1588 # Now materialize the data ID query results and repeat those tests. 

1589 with dataIds.materialize() as dataIds: 

1590 self.checkQueryResults(dataIds, [dataId]) 

1591 self.checkQueryResults( 

1592 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), 

1593 [dataset1], 

1594 ) 

1595 self.checkQueryResults( 

1596 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), 

1597 [dataset2], 

1598 ) 

1599 # Query for non-empty data IDs, then subset that to get the empty one. 

1600 # Repeat the above tests starting from that. 

1601 dataIds = registry.queryDataIds(["instrument"]).subset(registry.dimensions.empty, unique=True) 

1602 self.checkQueryResults(dataIds, [dataId]) 

1603 self.checkQueryResults( 

1604 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False), 

1605 [dataset1, dataset2], 

1606 ) 

1607 self.checkQueryResults( 

1608 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), 

1609 [dataset1], 

1610 ) 

1611 self.checkQueryResults( 

1612 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), 

1613 [dataset2], 

1614 ) 

1615 with dataIds.materialize() as dataIds: 

1616 self.checkQueryResults(dataIds, [dataId]) 

1617 self.checkQueryResults( 

1618 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False), 

1619 [dataset1, dataset2], 

1620 ) 

1621 self.checkQueryResults( 

1622 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), 

1623 [dataset1], 

1624 ) 

1625 self.checkQueryResults( 

1626 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), 

1627 [dataset2], 

1628 ) 

1629 # Query for non-empty data IDs, then materialize, then subset to get 

1630 # the empty one. Repeat again. 

1631 with registry.queryDataIds(["instrument"]).materialize() as nonEmptyDataIds: 

1632 dataIds = nonEmptyDataIds.subset(registry.dimensions.empty, unique=True) 

1633 self.checkQueryResults(dataIds, [dataId]) 

1634 self.checkQueryResults( 

1635 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False), 

1636 [dataset1, dataset2], 

1637 ) 

1638 self.checkQueryResults( 

1639 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), 

1640 [dataset1], 

1641 ) 

1642 self.checkQueryResults( 

1643 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), 

1644 [dataset2], 

1645 ) 

1646 with dataIds.materialize() as dataIds: 

1647 self.checkQueryResults(dataIds, [dataId]) 

1648 self.checkQueryResults( 

1649 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False), 

1650 [dataset1, dataset2], 

1651 ) 

1652 self.checkQueryResults( 

1653 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), 

1654 [dataset1], 

1655 ) 

1656 self.checkQueryResults( 

1657 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), 

1658 [dataset2], 

1659 ) 

1660 # Query for non-empty data IDs with a constraint on an empty-data-ID 

1661 # dataset that exists. 

1662 dataIds = registry.queryDataIds(["instrument"], datasets="schema", collections=...) 

1663 self.checkQueryResults( 

1664 dataIds.subset(unique=True), 

1665 [DataCoordinate.standardize(instrument="Cam1", universe=registry.dimensions)], 

1666 ) 

1667 # Again query for non-empty data IDs with a constraint on empty-data-ID 

1668 # datasets, but when the datasets don't exist. We delete the existing 

1669 # dataset and query just that collection rather than creating a new 

1670 # empty collection because this is a bit less likely for our build-time 

1671 # logic to shortcut-out (via the collection summaries), and such a 

1672 # shortcut would make this test a bit more trivial than we'd like. 

1673 registry.removeDatasets([dataset2]) 

1674 dataIds = registry.queryDataIds(["instrument"], datasets="schema", collections=run2) 

1675 self.checkQueryResults(dataIds, []) 

1676 

1677 def testDimensionDataModifications(self): 

1678 """Test that modifying dimension records via: 

1679 syncDimensionData(..., update=True) and 

1680 insertDimensionData(..., replace=True) works as expected, even in the 

1681 presence of datasets using those dimensions and spatial overlap 

1682 relationships. 

1683 """ 

1684 

1685 def unpack_range_set(ranges: lsst.sphgeom.RangeSet) -> Iterator[int]: 

1686 """Unpack a sphgeom.RangeSet into the integers it contains.""" 

1687 for begin, end in ranges: 

1688 yield from range(begin, end) 

1689 

1690 def range_set_hull( 

1691 ranges: lsst.sphgeom.RangeSet, 

1692 pixelization: lsst.sphgeom.HtmPixelization, 

1693 ) -> lsst.sphgeom.ConvexPolygon: 

1694 """Create a ConvexPolygon hull of the region defined by a set of 

1695 HTM pixelization index ranges. 

1696 """ 

1697 points = [] 

1698 for index in unpack_range_set(ranges): 

1699 points.extend(pixelization.triangle(index).getVertices()) 

1700 return lsst.sphgeom.ConvexPolygon(points) 

1701 

1702 # Use HTM to set up an initial parent region (one arbitrary trixel) 

1703 # and four child regions (the trixels within the parent at the next 

1704 # level. We'll use the parent as a tract/visit region and the children 

1705 # as its patch/visit_detector regions. 

1706 registry = self.makeRegistry() 

1707 htm6 = registry.dimensions.skypix["htm"][6].pixelization 

1708 commonSkyPix = registry.dimensions.commonSkyPix.pixelization 

1709 index = 12288 

1710 child_ranges_small = lsst.sphgeom.RangeSet(index).scaled(4) 

1711 assert htm6.universe().contains(child_ranges_small) 

1712 child_regions_small = [htm6.triangle(i) for i in unpack_range_set(child_ranges_small)] 

1713 parent_region_small = lsst.sphgeom.ConvexPolygon( 

1714 list(itertools.chain.from_iterable(c.getVertices() for c in child_regions_small)) 

1715 ) 

1716 assert all(parent_region_small.contains(c) for c in child_regions_small) 

1717 # Make a larger version of each child region, defined to be the set of 

1718 # htm6 trixels that overlap the original's bounding circle. Make a new 

1719 # parent that's the convex hull of the new children. 

1720 child_regions_large = [ 

1721 range_set_hull(htm6.envelope(c.getBoundingCircle()), htm6) for c in child_regions_small 

1722 ] 

1723 assert all(large.contains(small) for large, small in zip(child_regions_large, child_regions_small)) 

1724 parent_region_large = lsst.sphgeom.ConvexPolygon( 

1725 list(itertools.chain.from_iterable(c.getVertices() for c in child_regions_large)) 

1726 ) 

1727 assert all(parent_region_large.contains(c) for c in child_regions_large) 

1728 assert parent_region_large.contains(parent_region_small) 

1729 assert not parent_region_small.contains(parent_region_large) 

1730 assert not all(parent_region_small.contains(c) for c in child_regions_large) 

1731 # Find some commonSkyPix indices that overlap the large regions but not 

1732 # overlap the small regions. We use commonSkyPix here to make sure the 

1733 # real tests later involve what's in the database, not just post-query 

1734 # region filtering. 

1735 child_difference_indices = [] 

1736 for large, small in zip(child_regions_large, child_regions_small): 

1737 difference = list(unpack_range_set(commonSkyPix.envelope(large) - commonSkyPix.envelope(small))) 

1738 assert difference, "if this is empty, we can't test anything useful with these regions" 

1739 assert all( 

1740 not commonSkyPix.triangle(d).isDisjointFrom(large) 

1741 and commonSkyPix.triangle(d).isDisjointFrom(small) 

1742 for d in difference 

1743 ) 

1744 child_difference_indices.append(difference) 

1745 parent_difference_indices = list( 

1746 unpack_range_set( 

1747 commonSkyPix.envelope(parent_region_large) - commonSkyPix.envelope(parent_region_small) 

1748 ) 

1749 ) 

1750 assert parent_difference_indices, "if this is empty, we can't test anything useful with these regions" 

1751 assert all( 

1752 ( 

1753 not commonSkyPix.triangle(d).isDisjointFrom(parent_region_large) 

1754 and commonSkyPix.triangle(d).isDisjointFrom(parent_region_small) 

1755 ) 

1756 for d in parent_difference_indices 

1757 ) 

1758 # Now that we've finally got those regions, we'll insert the large ones 

1759 # as tract/patch dimension records. 

1760 skymap_name = "testing_v1" 

1761 registry.insertDimensionData( 

1762 "skymap", 

1763 { 

1764 "name": skymap_name, 

1765 "hash": bytes([42]), 

1766 "tract_max": 1, 

1767 "patch_nx_max": 2, 

1768 "patch_ny_max": 2, 

1769 }, 

1770 ) 

1771 registry.insertDimensionData("tract", {"skymap": skymap_name, "id": 0, "region": parent_region_large}) 

1772 registry.insertDimensionData( 

1773 "patch", 

1774 *[ 

1775 {"skymap": skymap_name, "tract": 0, "id": n, "cell_x": n % 2, "cell_y": n // 2, "region": c} 

1776 for n, c in enumerate(child_regions_large) 

1777 ], 

1778 ) 

1779 # Add at dataset that uses these dimensions to make sure that modifying 

1780 # them doesn't disrupt foreign keys (need to make sure DB doesn't 

1781 # implement insert with replace=True as delete-then-insert). 

1782 dataset_type = DatasetType( 

1783 "coadd", 

1784 dimensions=["tract", "patch"], 

1785 universe=registry.dimensions, 

1786 storageClass="Exposure", 

1787 ) 

1788 registry.registerDatasetType(dataset_type) 

1789 registry.registerCollection("the_run", CollectionType.RUN) 

1790 registry.insertDatasets( 

1791 dataset_type, 

1792 [{"skymap": skymap_name, "tract": 0, "patch": 2}], 

1793 run="the_run", 

1794 ) 

1795 # Query for tracts and patches that overlap some "difference" htm9 

1796 # pixels; there should be overlaps, because the database has 

1797 # the "large" suite of regions. 

1798 self.assertEqual( 

1799 {0}, 

1800 { 

1801 data_id["tract"] 

1802 for data_id in registry.queryDataIds( 

1803 ["tract"], 

1804 skymap=skymap_name, 

1805 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]}, 

1806 ) 

1807 }, 

1808 ) 

1809 for patch_id, patch_difference_indices in enumerate(child_difference_indices): 

1810 self.assertIn( 

1811 patch_id, 

1812 { 

1813 data_id["patch"] 

1814 for data_id in registry.queryDataIds( 

1815 ["patch"], 

1816 skymap=skymap_name, 

1817 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]}, 

1818 ) 

1819 }, 

1820 ) 

1821 # Use sync to update the tract region and insert to update the patch 

1822 # regions, to the "small" suite. 

1823 updated = registry.syncDimensionData( 

1824 "tract", 

1825 {"skymap": skymap_name, "id": 0, "region": parent_region_small}, 

1826 update=True, 

1827 ) 

1828 self.assertEqual(updated, {"region": parent_region_large}) 

1829 registry.insertDimensionData( 

1830 "patch", 

1831 *[ 

1832 {"skymap": skymap_name, "tract": 0, "id": n, "cell_x": n % 2, "cell_y": n // 2, "region": c} 

1833 for n, c in enumerate(child_regions_small) 

1834 ], 

1835 replace=True, 

1836 ) 

1837 # Query again; there now should be no such overlaps, because the 

1838 # database has the "small" suite of regions. 

1839 self.assertFalse( 

1840 set( 

1841 registry.queryDataIds( 

1842 ["tract"], 

1843 skymap=skymap_name, 

1844 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]}, 

1845 ) 

1846 ) 

1847 ) 

1848 for patch_id, patch_difference_indices in enumerate(child_difference_indices): 

1849 self.assertNotIn( 

1850 patch_id, 

1851 { 

1852 data_id["patch"] 

1853 for data_id in registry.queryDataIds( 

1854 ["patch"], 

1855 skymap=skymap_name, 

1856 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]}, 

1857 ) 

1858 }, 

1859 ) 

1860 # Update back to the large regions and query one more time. 

1861 updated = registry.syncDimensionData( 

1862 "tract", 

1863 {"skymap": skymap_name, "id": 0, "region": parent_region_large}, 

1864 update=True, 

1865 ) 

1866 self.assertEqual(updated, {"region": parent_region_small}) 

1867 registry.insertDimensionData( 

1868 "patch", 

1869 *[ 

1870 {"skymap": skymap_name, "tract": 0, "id": n, "cell_x": n % 2, "cell_y": n // 2, "region": c} 

1871 for n, c in enumerate(child_regions_large) 

1872 ], 

1873 replace=True, 

1874 ) 

1875 self.assertEqual( 

1876 {0}, 

1877 { 

1878 data_id["tract"] 

1879 for data_id in registry.queryDataIds( 

1880 ["tract"], 

1881 skymap=skymap_name, 

1882 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]}, 

1883 ) 

1884 }, 

1885 ) 

1886 for patch_id, patch_difference_indices in enumerate(child_difference_indices): 

1887 self.assertIn( 

1888 patch_id, 

1889 { 

1890 data_id["patch"] 

1891 for data_id in registry.queryDataIds( 

1892 ["patch"], 

1893 skymap=skymap_name, 

1894 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]}, 

1895 ) 

1896 }, 

1897 ) 

1898 

1899 def testCalibrationCollections(self): 

1900 """Test operations on `~CollectionType.CALIBRATION` collections, 

1901 including `Registry.certify`, `Registry.decertify`, and 

1902 `Registry.findDataset`. 

1903 """ 

1904 # Setup - make a Registry, fill it with some datasets in 

1905 # non-calibration collections. 

1906 registry = self.makeRegistry() 

1907 self.loadData(registry, "base.yaml") 

1908 self.loadData(registry, "datasets.yaml") 

1909 # Set up some timestamps. 

1910 t1 = astropy.time.Time("2020-01-01T01:00:00", format="isot", scale="tai") 

1911 t2 = astropy.time.Time("2020-01-01T02:00:00", format="isot", scale="tai") 

1912 t3 = astropy.time.Time("2020-01-01T03:00:00", format="isot", scale="tai") 

1913 t4 = astropy.time.Time("2020-01-01T04:00:00", format="isot", scale="tai") 

1914 t5 = astropy.time.Time("2020-01-01T05:00:00", format="isot", scale="tai") 

1915 allTimespans = [ 

1916 Timespan(a, b) for a, b in itertools.combinations([None, t1, t2, t3, t4, t5, None], r=2) 

1917 ] 

1918 # Get references to some datasets. 

1919 bias2a = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g") 

1920 bias3a = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g") 

1921 bias2b = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r") 

1922 bias3b = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r") 

1923 # Register the main calibration collection we'll be working with. 

1924 collection = "Cam1/calibs/default" 

1925 registry.registerCollection(collection, type=CollectionType.CALIBRATION) 

1926 # Cannot associate into a calibration collection (no timespan). 

1927 with self.assertRaises(CollectionTypeError): 

1928 registry.associate(collection, [bias2a]) 

1929 # Certify 2a dataset with [t2, t4) validity. 

1930 registry.certify(collection, [bias2a], Timespan(begin=t2, end=t4)) 

1931 # Test that we can query for this dataset via the new collection, both 

1932 # on its own and with a RUN collection, as long as we don't try to join 

1933 # in temporal dimensions or use findFirst=True. 

1934 self.assertEqual( 

1935 set(registry.queryDatasets("bias", findFirst=False, collections=collection)), 

1936 {bias2a}, 

1937 ) 

1938 self.assertEqual( 

1939 set(registry.queryDatasets("bias", findFirst=False, collections=[collection, "imported_r"])), 

1940 { 

1941 bias2a, 

1942 bias2b, 

1943 bias3b, 

1944 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

1945 }, 

1946 ) 

1947 self.assertEqual( 

1948 set(registry.queryDataIds("detector", datasets="bias", collections=collection)), 

1949 {registry.expandDataId(instrument="Cam1", detector=2)}, 

1950 ) 

1951 self.assertEqual( 

1952 set(registry.queryDataIds("detector", datasets="bias", collections=[collection, "imported_r"])), 

1953 { 

1954 registry.expandDataId(instrument="Cam1", detector=2), 

1955 registry.expandDataId(instrument="Cam1", detector=3), 

1956 registry.expandDataId(instrument="Cam1", detector=4), 

1957 }, 

1958 ) 

1959 

1960 # We should not be able to certify 2b with anything overlapping that 

1961 # window. 

1962 with self.assertRaises(ConflictingDefinitionError): 

1963 registry.certify(collection, [bias2b], Timespan(begin=None, end=t3)) 

1964 with self.assertRaises(ConflictingDefinitionError): 

1965 registry.certify(collection, [bias2b], Timespan(begin=None, end=t5)) 

1966 with self.assertRaises(ConflictingDefinitionError): 

1967 registry.certify(collection, [bias2b], Timespan(begin=t1, end=t3)) 

1968 with self.assertRaises(ConflictingDefinitionError): 

1969 registry.certify(collection, [bias2b], Timespan(begin=t1, end=t5)) 

1970 with self.assertRaises(ConflictingDefinitionError): 

1971 registry.certify(collection, [bias2b], Timespan(begin=t1, end=None)) 

1972 with self.assertRaises(ConflictingDefinitionError): 

1973 registry.certify(collection, [bias2b], Timespan(begin=t2, end=t3)) 

1974 with self.assertRaises(ConflictingDefinitionError): 

1975 registry.certify(collection, [bias2b], Timespan(begin=t2, end=t5)) 

1976 with self.assertRaises(ConflictingDefinitionError): 

1977 registry.certify(collection, [bias2b], Timespan(begin=t2, end=None)) 

1978 # We should be able to certify 3a with a range overlapping that window, 

1979 # because it's for a different detector. 

1980 # We'll certify 3a over [t1, t3). 

1981 registry.certify(collection, [bias3a], Timespan(begin=t1, end=t3)) 

1982 # Now we'll certify 2b and 3b together over [t4, ∞). 

1983 registry.certify(collection, [bias2b, bias3b], Timespan(begin=t4, end=None)) 

1984 

1985 # Fetch all associations and check that they are what we expect. 

1986 self.assertCountEqual( 

1987 list( 

1988 registry.queryDatasetAssociations( 

1989 "bias", 

1990 collections=[collection, "imported_g", "imported_r"], 

1991 ) 

1992 ), 

1993 [ 

1994 DatasetAssociation( 

1995 ref=registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"), 

1996 collection="imported_g", 

1997 timespan=None, 

1998 ), 

1999 DatasetAssociation( 

2000 ref=registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

2001 collection="imported_r", 

2002 timespan=None, 

2003 ), 

2004 DatasetAssociation(ref=bias2a, collection="imported_g", timespan=None), 

2005 DatasetAssociation(ref=bias3a, collection="imported_g", timespan=None), 

2006 DatasetAssociation(ref=bias2b, collection="imported_r", timespan=None), 

2007 DatasetAssociation(ref=bias3b, collection="imported_r", timespan=None), 

2008 DatasetAssociation(ref=bias2a, collection=collection, timespan=Timespan(begin=t2, end=t4)), 

2009 DatasetAssociation(ref=bias3a, collection=collection, timespan=Timespan(begin=t1, end=t3)), 

2010 DatasetAssociation(ref=bias2b, collection=collection, timespan=Timespan(begin=t4, end=None)), 

2011 DatasetAssociation(ref=bias3b, collection=collection, timespan=Timespan(begin=t4, end=None)), 

2012 ], 

2013 ) 

2014 

2015 class Ambiguous: 

2016 """Tag class to denote lookups that should be ambiguous.""" 

2017 

2018 pass 

2019 

2020 def assertLookup( 

2021 detector: int, timespan: Timespan, expected: Optional[Union[DatasetRef, Type[Ambiguous]]] 

2022 ) -> None: 

2023 """Local function that asserts that a bias lookup returns the given 

2024 expected result. 

2025 """ 

2026 if expected is Ambiguous: 

2027 with self.assertRaises(RuntimeError): 

2028 registry.findDataset( 

2029 "bias", 

2030 collections=collection, 

2031 instrument="Cam1", 

2032 detector=detector, 

2033 timespan=timespan, 

2034 ) 

2035 else: 

2036 self.assertEqual( 

2037 expected, 

2038 registry.findDataset( 

2039 "bias", 

2040 collections=collection, 

2041 instrument="Cam1", 

2042 detector=detector, 

2043 timespan=timespan, 

2044 ), 

2045 ) 

2046 

2047 # Systematically test lookups against expected results. 

2048 assertLookup(detector=2, timespan=Timespan(None, t1), expected=None) 

2049 assertLookup(detector=2, timespan=Timespan(None, t2), expected=None) 

2050 assertLookup(detector=2, timespan=Timespan(None, t3), expected=bias2a) 

2051 assertLookup(detector=2, timespan=Timespan(None, t4), expected=bias2a) 

2052 assertLookup(detector=2, timespan=Timespan(None, t5), expected=Ambiguous) 

2053 assertLookup(detector=2, timespan=Timespan(None, None), expected=Ambiguous) 

2054 assertLookup(detector=2, timespan=Timespan(t1, t2), expected=None) 

2055 assertLookup(detector=2, timespan=Timespan(t1, t3), expected=bias2a) 

2056 assertLookup(detector=2, timespan=Timespan(t1, t4), expected=bias2a) 

2057 assertLookup(detector=2, timespan=Timespan(t1, t5), expected=Ambiguous) 

2058 assertLookup(detector=2, timespan=Timespan(t1, None), expected=Ambiguous) 

2059 assertLookup(detector=2, timespan=Timespan(t2, t3), expected=bias2a) 

2060 assertLookup(detector=2, timespan=Timespan(t2, t4), expected=bias2a) 

2061 assertLookup(detector=2, timespan=Timespan(t2, t5), expected=Ambiguous) 

2062 assertLookup(detector=2, timespan=Timespan(t2, None), expected=Ambiguous) 

2063 assertLookup(detector=2, timespan=Timespan(t3, t4), expected=bias2a) 

2064 assertLookup(detector=2, timespan=Timespan(t3, t5), expected=Ambiguous) 

2065 assertLookup(detector=2, timespan=Timespan(t3, None), expected=Ambiguous) 

2066 assertLookup(detector=2, timespan=Timespan(t4, t5), expected=bias2b) 

2067 assertLookup(detector=2, timespan=Timespan(t4, None), expected=bias2b) 

2068 assertLookup(detector=2, timespan=Timespan(t5, None), expected=bias2b) 

2069 assertLookup(detector=3, timespan=Timespan(None, t1), expected=None) 

2070 assertLookup(detector=3, timespan=Timespan(None, t2), expected=bias3a) 

2071 assertLookup(detector=3, timespan=Timespan(None, t3), expected=bias3a) 

2072 assertLookup(detector=3, timespan=Timespan(None, t4), expected=bias3a) 

2073 assertLookup(detector=3, timespan=Timespan(None, t5), expected=Ambiguous) 

2074 assertLookup(detector=3, timespan=Timespan(None, None), expected=Ambiguous) 

2075 assertLookup(detector=3, timespan=Timespan(t1, t2), expected=bias3a) 

2076 assertLookup(detector=3, timespan=Timespan(t1, t3), expected=bias3a) 

2077 assertLookup(detector=3, timespan=Timespan(t1, t4), expected=bias3a) 

2078 assertLookup(detector=3, timespan=Timespan(t1, t5), expected=Ambiguous) 

2079 assertLookup(detector=3, timespan=Timespan(t1, None), expected=Ambiguous) 

2080 assertLookup(detector=3, timespan=Timespan(t2, t3), expected=bias3a) 

2081 assertLookup(detector=3, timespan=Timespan(t2, t4), expected=bias3a) 

2082 assertLookup(detector=3, timespan=Timespan(t2, t5), expected=Ambiguous) 

2083 assertLookup(detector=3, timespan=Timespan(t2, None), expected=Ambiguous) 

2084 assertLookup(detector=3, timespan=Timespan(t3, t4), expected=None) 

2085 assertLookup(detector=3, timespan=Timespan(t3, t5), expected=bias3b) 

2086 assertLookup(detector=3, timespan=Timespan(t3, None), expected=bias3b) 

2087 assertLookup(detector=3, timespan=Timespan(t4, t5), expected=bias3b) 

2088 assertLookup(detector=3, timespan=Timespan(t4, None), expected=bias3b) 

2089 assertLookup(detector=3, timespan=Timespan(t5, None), expected=bias3b) 

2090 

2091 # Decertify [t3, t5) for all data IDs, and do test lookups again. 

2092 # This should truncate bias2a to [t2, t3), leave bias3a unchanged at 

2093 # [t1, t3), and truncate bias2b and bias3b to [t5, ∞). 

2094 registry.decertify(collection=collection, datasetType="bias", timespan=Timespan(t3, t5)) 

2095 assertLookup(detector=2, timespan=Timespan(None, t1), expected=None) 

2096 assertLookup(detector=2, timespan=Timespan(None, t2), expected=None) 

2097 assertLookup(detector=2, timespan=Timespan(None, t3), expected=bias2a) 

2098 assertLookup(detector=2, timespan=Timespan(None, t4), expected=bias2a) 

2099 assertLookup(detector=2, timespan=Timespan(None, t5), expected=bias2a) 

2100 assertLookup(detector=2, timespan=Timespan(None, None), expected=Ambiguous) 

2101 assertLookup(detector=2, timespan=Timespan(t1, t2), expected=None) 

2102 assertLookup(detector=2, timespan=Timespan(t1, t3), expected=bias2a) 

2103 assertLookup(detector=2, timespan=Timespan(t1, t4), expected=bias2a) 

2104 assertLookup(detector=2, timespan=Timespan(t1, t5), expected=bias2a) 

2105 assertLookup(detector=2, timespan=Timespan(t1, None), expected=Ambiguous) 

2106 assertLookup(detector=2, timespan=Timespan(t2, t3), expected=bias2a) 

2107 assertLookup(detector=2, timespan=Timespan(t2, t4), expected=bias2a) 

2108 assertLookup(detector=2, timespan=Timespan(t2, t5), expected=bias2a) 

2109 assertLookup(detector=2, timespan=Timespan(t2, None), expected=Ambiguous) 

2110 assertLookup(detector=2, timespan=Timespan(t3, t4), expected=None) 

2111 assertLookup(detector=2, timespan=Timespan(t3, t5), expected=None) 

2112 assertLookup(detector=2, timespan=Timespan(t3, None), expected=bias2b) 

2113 assertLookup(detector=2, timespan=Timespan(t4, t5), expected=None) 

2114 assertLookup(detector=2, timespan=Timespan(t4, None), expected=bias2b) 

2115 assertLookup(detector=2, timespan=Timespan(t5, None), expected=bias2b) 

2116 assertLookup(detector=3, timespan=Timespan(None, t1), expected=None) 

2117 assertLookup(detector=3, timespan=Timespan(None, t2), expected=bias3a) 

2118 assertLookup(detector=3, timespan=Timespan(None, t3), expected=bias3a) 

2119 assertLookup(detector=3, timespan=Timespan(None, t4), expected=bias3a) 

2120 assertLookup(detector=3, timespan=Timespan(None, t5), expected=bias3a) 

2121 assertLookup(detector=3, timespan=Timespan(None, None), expected=Ambiguous) 

2122 assertLookup(detector=3, timespan=Timespan(t1, t2), expected=bias3a) 

2123 assertLookup(detector=3, timespan=Timespan(t1, t3), expected=bias3a) 

2124 assertLookup(detector=3, timespan=Timespan(t1, t4), expected=bias3a) 

2125 assertLookup(detector=3, timespan=Timespan(t1, t5), expected=bias3a) 

2126 assertLookup(detector=3, timespan=Timespan(t1, None), expected=Ambiguous) 

2127 assertLookup(detector=3, timespan=Timespan(t2, t3), expected=bias3a) 

2128 assertLookup(detector=3, timespan=Timespan(t2, t4), expected=bias3a) 

2129 assertLookup(detector=3, timespan=Timespan(t2, t5), expected=bias3a) 

2130 assertLookup(detector=3, timespan=Timespan(t2, None), expected=Ambiguous) 

2131 assertLookup(detector=3, timespan=Timespan(t3, t4), expected=None) 

2132 assertLookup(detector=3, timespan=Timespan(t3, t5), expected=None) 

2133 assertLookup(detector=3, timespan=Timespan(t3, None), expected=bias3b) 

2134 assertLookup(detector=3, timespan=Timespan(t4, t5), expected=None) 

2135 assertLookup(detector=3, timespan=Timespan(t4, None), expected=bias3b) 

2136 assertLookup(detector=3, timespan=Timespan(t5, None), expected=bias3b) 

2137 

2138 # Decertify everything, this time with explicit data IDs, then check 

2139 # that no lookups succeed. 

2140 registry.decertify( 

2141 collection, 

2142 "bias", 

2143 Timespan(None, None), 

2144 dataIds=[ 

2145 dict(instrument="Cam1", detector=2), 

2146 dict(instrument="Cam1", detector=3), 

2147 ], 

2148 ) 

2149 for detector in (2, 3): 

2150 for timespan in allTimespans: 

2151 assertLookup(detector=detector, timespan=timespan, expected=None) 

2152 # Certify bias2a and bias3a over (-∞, ∞), check that all lookups return 

2153 # those. 

2154 registry.certify( 

2155 collection, 

2156 [bias2a, bias3a], 

2157 Timespan(None, None), 

2158 ) 

2159 for timespan in allTimespans: 

2160 assertLookup(detector=2, timespan=timespan, expected=bias2a) 

2161 assertLookup(detector=3, timespan=timespan, expected=bias3a) 

2162 # Decertify just bias2 over [t2, t4). 

2163 # This should split a single certification row into two (and leave the 

2164 # other existing row, for bias3a, alone). 

2165 registry.decertify( 

2166 collection, "bias", Timespan(t2, t4), dataIds=[dict(instrument="Cam1", detector=2)] 

2167 ) 

2168 for timespan in allTimespans: 

2169 assertLookup(detector=3, timespan=timespan, expected=bias3a) 

2170 overlapsBefore = timespan.overlaps(Timespan(None, t2)) 

2171 overlapsAfter = timespan.overlaps(Timespan(t4, None)) 

2172 if overlapsBefore and overlapsAfter: 

2173 expected = Ambiguous 

2174 elif overlapsBefore or overlapsAfter: 

2175 expected = bias2a 

2176 else: 

2177 expected = None 

2178 assertLookup(detector=2, timespan=timespan, expected=expected) 

2179 

2180 def testSkipCalibs(self): 

2181 """Test how queries handle skipping of calibration collections.""" 

2182 registry = self.makeRegistry() 

2183 self.loadData(registry, "base.yaml") 

2184 self.loadData(registry, "datasets.yaml") 

2185 

2186 coll_calib = "Cam1/calibs/default" 

2187 registry.registerCollection(coll_calib, type=CollectionType.CALIBRATION) 

2188 

2189 # Add all biases to the calibration collection. 

2190 # Without this, the logic that prunes dataset subqueries based on 

2191 # datasetType-collection summary information will fire before the logic 

2192 # we want to test below. This is a good thing (it avoids the dreaded 

2193 # NotImplementedError a bit more often) everywhere but here. 

2194 registry.certify(coll_calib, registry.queryDatasets("bias", collections=...), Timespan(None, None)) 

2195 

2196 coll_list = [coll_calib, "imported_g", "imported_r"] 

2197 chain = "Cam1/chain" 

2198 registry.registerCollection(chain, type=CollectionType.CHAINED) 

2199 registry.setCollectionChain(chain, coll_list) 

2200 

2201 # explicit list will raise if findFirst=True or there are temporal 

2202 # dimensions 

2203 with self.assertRaises(NotImplementedError): 

2204 registry.queryDatasets("bias", collections=coll_list, findFirst=True) 

2205 with self.assertRaises(NotImplementedError): 

2206 registry.queryDataIds( 

2207 ["instrument", "detector", "exposure"], datasets="bias", collections=coll_list 

2208 ).count() 

2209 

2210 # chain will skip 

2211 datasets = list(registry.queryDatasets("bias", collections=chain)) 

2212 self.assertGreater(len(datasets), 0) 

2213 

2214 dataIds = list(registry.queryDataIds(["instrument", "detector"], datasets="bias", collections=chain)) 

2215 self.assertGreater(len(dataIds), 0) 

2216 

2217 # glob will skip too 

2218 datasets = list(registry.queryDatasets("bias", collections="*d*")) 

2219 self.assertGreater(len(datasets), 0) 

2220 

2221 # regular expression will skip too 

2222 pattern = re.compile(".*") 

2223 datasets = list(registry.queryDatasets("bias", collections=pattern)) 

2224 self.assertGreater(len(datasets), 0) 

2225 

2226 # ellipsis should work as usual 

2227 datasets = list(registry.queryDatasets("bias", collections=...)) 

2228 self.assertGreater(len(datasets), 0) 

2229 

2230 # few tests with findFirst 

2231 datasets = list(registry.queryDatasets("bias", collections=chain, findFirst=True)) 

2232 self.assertGreater(len(datasets), 0) 

2233 

2234 def testIngestTimeQuery(self): 

2235 

2236 registry = self.makeRegistry() 

2237 self.loadData(registry, "base.yaml") 

2238 dt0 = datetime.utcnow() 

2239 self.loadData(registry, "datasets.yaml") 

2240 dt1 = datetime.utcnow() 

2241 

2242 datasets = list(registry.queryDatasets(..., collections=...)) 

2243 len0 = len(datasets) 

2244 self.assertGreater(len0, 0) 

2245 

2246 where = "ingest_date > T'2000-01-01'" 

2247 datasets = list(registry.queryDatasets(..., collections=..., where=where)) 

2248 len1 = len(datasets) 

2249 self.assertEqual(len0, len1) 

2250 

2251 # no one will ever use this piece of software in 30 years 

2252 where = "ingest_date > T'2050-01-01'" 

2253 datasets = list(registry.queryDatasets(..., collections=..., where=where)) 

2254 len2 = len(datasets) 

2255 self.assertEqual(len2, 0) 

2256 

2257 # Check more exact timing to make sure there is no 37 seconds offset 

2258 # (after fixing DM-30124). SQLite time precision is 1 second, make 

2259 # sure that we don't test with higher precision. 

2260 tests = [ 

2261 # format: (timestamp, operator, expected_len) 

2262 (dt0 - timedelta(seconds=1), ">", len0), 

2263 (dt0 - timedelta(seconds=1), "<", 0), 

2264 (dt1 + timedelta(seconds=1), "<", len0), 

2265 (dt1 + timedelta(seconds=1), ">", 0), 

2266 ] 

2267 for dt, op, expect_len in tests: 

2268 dt_str = dt.isoformat(sep=" ") 

2269 

2270 where = f"ingest_date {op} T'{dt_str}'" 

2271 datasets = list(registry.queryDatasets(..., collections=..., where=where)) 

2272 self.assertEqual(len(datasets), expect_len) 

2273 

2274 # same with bind using datetime or astropy Time 

2275 where = f"ingest_date {op} ingest_time" 

2276 datasets = list( 

2277 registry.queryDatasets(..., collections=..., where=where, bind={"ingest_time": dt}) 

2278 ) 

2279 self.assertEqual(len(datasets), expect_len) 

2280 

2281 dt_astropy = astropy.time.Time(dt, format="datetime") 

2282 datasets = list( 

2283 registry.queryDatasets(..., collections=..., where=where, bind={"ingest_time": dt_astropy}) 

2284 ) 

2285 self.assertEqual(len(datasets), expect_len) 

2286 

2287 def testTimespanQueries(self): 

2288 """Test query expressions involving timespans.""" 

2289 registry = self.makeRegistry() 

2290 self.loadData(registry, "hsc-rc2-subset.yaml") 

2291 # All exposures in the database; mapping from ID to timespan. 

2292 visits = {record.id: record.timespan for record in registry.queryDimensionRecords("visit")} 

2293 # Just those IDs, sorted (which is also temporal sorting, because HSC 

2294 # exposure IDs are monotonically increasing). 

2295 ids = sorted(visits.keys()) 

2296 self.assertGreater(len(ids), 20) 

2297 # Pick some quasi-random indexes into `ids` to play with. 

2298 i1 = int(len(ids) * 0.1) 

2299 i2 = int(len(ids) * 0.3) 

2300 i3 = int(len(ids) * 0.6) 

2301 i4 = int(len(ids) * 0.8) 

2302 # Extract some times from those: just before the beginning of i1 (which 

2303 # should be after the end of the exposure before), exactly the 

2304 # beginning of i2, just after the beginning of i3 (and before its end), 

2305 # and the exact end of i4. 

2306 t1 = visits[ids[i1]].begin - astropy.time.TimeDelta(1.0, format="sec") 

2307 self.assertGreater(t1, visits[ids[i1 - 1]].end) 

2308 t2 = visits[ids[i2]].begin 

2309 t3 = visits[ids[i3]].begin + astropy.time.TimeDelta(1.0, format="sec") 

2310 self.assertLess(t3, visits[ids[i3]].end) 

2311 t4 = visits[ids[i4]].end 

2312 # Make sure those are actually in order. 

2313 self.assertEqual([t1, t2, t3, t4], sorted([t4, t3, t2, t1])) 

2314 

2315 bind = { 

2316 "t1": t1, 

2317 "t2": t2, 

2318 "t3": t3, 

2319 "t4": t4, 

2320 "ts23": Timespan(t2, t3), 

2321 } 

2322 

2323 def query(where): 

2324 """Helper function that queries for visit data IDs and returns 

2325 results as a sorted, deduplicated list of visit IDs. 

2326 """ 

2327 return sorted( 

2328 { 

2329 dataId["visit"] 

2330 for dataId in registry.queryDataIds("visit", instrument="HSC", bind=bind, where=where) 

2331 } 

2332 ) 

2333 

2334 # Try a bunch of timespan queries, mixing up the bounds themselves, 

2335 # where they appear in the expression, and how we get the timespan into 

2336 # the expression. 

2337 

2338 # t1 is before the start of i1, so this should not include i1. 

2339 self.assertEqual(ids[:i1], query("visit.timespan OVERLAPS (null, t1)")) 

2340 # t2 is exactly at the start of i2, but ends are exclusive, so these 

2341 # should not include i2. 

2342 self.assertEqual(ids[i1:i2], query("(t1, t2) OVERLAPS visit.timespan")) 

2343 self.assertEqual(ids[:i2], query("visit.timespan < (t2, t4)")) 

2344 # t3 is in the middle of i3, so this should include i3. 

2345 self.assertEqual(ids[i2 : i3 + 1], query("visit.timespan OVERLAPS ts23")) 

2346 # This one should not include t3 by the same reasoning. 

2347 self.assertEqual(ids[i3 + 1 :], query("visit.timespan > (t1, t3)")) 

2348 # t4 is exactly at the end of i4, so this should include i4. 

2349 self.assertEqual(ids[i3 : i4 + 1], query(f"visit.timespan OVERLAPS (T'{t3.tai.isot}', t4)")) 

2350 # i4's upper bound of t4 is exclusive so this should not include t4. 

2351 self.assertEqual(ids[i4 + 1 :], query("visit.timespan OVERLAPS (t4, NULL)")) 

2352 

2353 # Now some timespan vs. time scalar queries. 

2354 self.assertEqual(ids[:i2], query("visit.timespan < t2")) 

2355 self.assertEqual(ids[:i2], query("t2 > visit.timespan")) 

2356 self.assertEqual(ids[i3 + 1 :], query("visit.timespan > t3")) 

2357 self.assertEqual(ids[i3 + 1 :], query("t3 < visit.timespan")) 

2358 self.assertEqual(ids[i3 : i3 + 1], query("visit.timespan OVERLAPS t3")) 

2359 self.assertEqual(ids[i3 : i3 + 1], query(f"T'{t3.tai.isot}' OVERLAPS visit.timespan")) 

2360 

2361 # Empty timespans should not overlap anything. 

2362 self.assertEqual([], query("visit.timespan OVERLAPS (t3, t2)")) 

2363 

2364 def testCollectionSummaries(self): 

2365 """Test recording and retrieval of collection summaries.""" 

2366 self.maxDiff = None 

2367 registry = self.makeRegistry() 

2368 # Importing datasets from yaml should go through the code path where 

2369 # we update collection summaries as we insert datasets. 

2370 self.loadData(registry, "base.yaml") 

2371 self.loadData(registry, "datasets.yaml") 

2372 flat = registry.getDatasetType("flat") 

2373 expected1 = CollectionSummary.makeEmpty(registry.dimensions) 

2374 expected1.datasetTypes.add(registry.getDatasetType("bias")) 

2375 expected1.datasetTypes.add(flat) 

2376 expected1.dimensions.update_extract( 

2377 DataCoordinate.standardize(instrument="Cam1", universe=registry.dimensions) 

2378 ) 

2379 self.assertEqual(registry.getCollectionSummary("imported_g"), expected1) 

2380 self.assertEqual(registry.getCollectionSummary("imported_r"), expected1) 

2381 # Create a chained collection with both of the imported runs; the 

2382 # summary should be the same, because it's a union with itself. 

2383 chain = "chain" 

2384 registry.registerCollection(chain, CollectionType.CHAINED) 

2385 registry.setCollectionChain(chain, ["imported_r", "imported_g"]) 

2386 self.assertEqual(registry.getCollectionSummary(chain), expected1) 

2387 # Associate flats only into a tagged collection and a calibration 

2388 # collection to check summaries of those. 

2389 tag = "tag" 

2390 registry.registerCollection(tag, CollectionType.TAGGED) 

2391 registry.associate(tag, registry.queryDatasets(flat, collections="imported_g")) 

2392 calibs = "calibs" 

2393 registry.registerCollection(calibs, CollectionType.CALIBRATION) 

2394 registry.certify( 

2395 calibs, registry.queryDatasets(flat, collections="imported_g"), timespan=Timespan(None, None) 

2396 ) 

2397 expected2 = expected1.copy() 

2398 expected2.datasetTypes.discard("bias") 

2399 self.assertEqual(registry.getCollectionSummary(tag), expected2) 

2400 self.assertEqual(registry.getCollectionSummary(calibs), expected2) 

2401 # Explicitly calling Registry.refresh() should load those same 

2402 # summaries, via a totally different code path. 

2403 registry.refresh() 

2404 self.assertEqual(registry.getCollectionSummary("imported_g"), expected1) 

2405 self.assertEqual(registry.getCollectionSummary("imported_r"), expected1) 

2406 self.assertEqual(registry.getCollectionSummary(tag), expected2) 

2407 self.assertEqual(registry.getCollectionSummary(calibs), expected2) 

2408 

2409 def testBindInQueryDatasets(self): 

2410 """Test that the bind parameter is correctly forwarded in 

2411 queryDatasets recursion. 

2412 """ 

2413 registry = self.makeRegistry() 

2414 # Importing datasets from yaml should go through the code path where 

2415 # we update collection summaries as we insert datasets. 

2416 self.loadData(registry, "base.yaml") 

2417 self.loadData(registry, "datasets.yaml") 

2418 self.assertEqual( 

2419 set(registry.queryDatasets("flat", band="r", collections=...)), 

2420 set(registry.queryDatasets("flat", where="band=my_band", bind={"my_band": "r"}, collections=...)), 

2421 ) 

2422 

2423 def testQueryResultSummaries(self): 

2424 """Test summary methods like `count`, `any`, and `explain_no_results` 

2425 on `DataCoordinateQueryResults` and `DatasetQueryResults` 

2426 """ 

2427 registry = self.makeRegistry() 

2428 self.loadData(registry, "base.yaml") 

2429 self.loadData(registry, "datasets.yaml") 

2430 self.loadData(registry, "spatial.yaml") 

2431 # Default test dataset has two collections, each with both flats and 

2432 # biases. Add a new collection with only biases. 

2433 registry.registerCollection("biases", CollectionType.TAGGED) 

2434 registry.associate("biases", registry.queryDatasets("bias", collections=["imported_g"])) 

2435 # First query yields two results, and involves no postprocessing. 

2436 query1 = registry.queryDataIds(["physical_filter"], band="r") 

2437 self.assertTrue(query1.any(execute=False, exact=False)) 

2438 self.assertTrue(query1.any(execute=True, exact=False)) 

2439 self.assertTrue(query1.any(execute=True, exact=True)) 

2440 self.assertEqual(query1.count(exact=False), 2) 

2441 self.assertEqual(query1.count(exact=True), 2) 

2442 self.assertFalse(list(query1.explain_no_results())) 

2443 # Second query should yield no results, but this isn't detectable 

2444 # unless we actually run a query. 

2445 query2 = registry.queryDataIds(["physical_filter"], band="h") 

2446 self.assertTrue(query2.any(execute=False, exact=False)) 

2447 self.assertFalse(query2.any(execute=True, exact=False)) 

2448 self.assertFalse(query2.any(execute=True, exact=True)) 

2449 self.assertEqual(query2.count(exact=False), 0) 

2450 self.assertEqual(query2.count(exact=True), 0) 

2451 self.assertFalse(list(query2.explain_no_results())) 

2452 # These queries yield no results due to various problems that can be 

2453 # spotted prior to execution, yielding helpful diagnostics. 

2454 base_query = registry.queryDataIds(["detector", "physical_filter"]) 

2455 for query, snippets in [ 

2456 ( 

2457 # Dataset type name doesn't match any existing dataset types. 

2458 registry.queryDatasets("nonexistent", collections=...), 

2459 ["nonexistent"], 

2460 ), 

2461 ( 

2462 # Dataset type name doesn't match any existing dataset types. 

2463 base_query.findDatasets("nonexistent", collections=["biases"]), 

2464 ["nonexistent"], 

2465 ), 

2466 ( 

2467 # Dataset type name doesn't match any existing dataset types. 

2468 registry.queryDataIds(["detector"], datasets=["nonexistent"], collections=...), 

2469 ["nonexistent"], 

2470 ), 

2471 ( 

2472 # Dataset type object isn't registered. 

2473 registry.queryDatasets( 

2474 DatasetType( 

2475 "nonexistent", 

2476 dimensions=["instrument"], 

2477 universe=registry.dimensions, 

2478 storageClass="Image", 

2479 ), 

2480 collections=..., 

2481 ), 

2482 ["nonexistent"], 

2483 ), 

2484 ( 

2485 # Dataset type object isn't registered. 

2486 base_query.findDatasets( 

2487 DatasetType( 

2488 "nonexistent", 

2489 dimensions=["instrument"], 

2490 universe=registry.dimensions, 

2491 storageClass="Image", 

2492 ), 

2493 collections=["biases"], 

2494 ), 

2495 ["nonexistent"], 

2496 ), 

2497 ( 

2498 # No datasets of this type in this collection. 

2499 registry.queryDatasets("flat", collections=["biases"]), 

2500 ["flat", "biases"], 

2501 ), 

2502 ( 

2503 # No datasets of this type in this collection. 

2504 base_query.findDatasets("flat", collections=["biases"]), 

2505 ["flat", "biases"], 

2506 ), 

2507 ( 

2508 # No collections matching at all. 

2509 registry.queryDatasets("flat", collections=re.compile("potato.+")), 

2510 ["potato"], 

2511 ), 

2512 ( 

2513 # Dataset type name doesn't match any existing dataset types. 

2514 registry.queryDimensionRecords("detector", datasets=["nonexistent"], collections=...), 

2515 ["nonexistent"], 

2516 ), 

2517 ]: 

2518 

2519 self.assertFalse(query.any(execute=False, exact=False)) 

2520 self.assertFalse(query.any(execute=True, exact=False)) 

2521 self.assertFalse(query.any(execute=True, exact=True)) 

2522 self.assertEqual(query.count(exact=False), 0) 

2523 self.assertEqual(query.count(exact=True), 0) 

2524 messages = list(query.explain_no_results()) 

2525 self.assertTrue(messages) 

2526 # Want all expected snippets to appear in at least one message. 

2527 self.assertTrue( 

2528 any( 

2529 all(snippet in message for snippet in snippets) for message in query.explain_no_results() 

2530 ), 

2531 messages, 

2532 ) 

2533 

2534 # These queries yield no results due to problems that can be identified 

2535 # by cheap follow-up queries, yielding helpful diagnostics. 

2536 for query, snippets in [ 

2537 ( 

2538 # No records for one of the involved dimensions. 

2539 registry.queryDataIds(["subfilter"]), 

2540 ["dimension records", "subfilter"], 

2541 ), 

2542 ( 

2543 # No records for one of the involved dimensions. 

2544 registry.queryDimensionRecords("subfilter"), 

2545 ["dimension records", "subfilter"], 

2546 ), 

2547 ]: 

2548 self.assertFalse(query.any(execute=True, exact=False)) 

2549 self.assertFalse(query.any(execute=True, exact=True)) 

2550 self.assertEqual(query.count(exact=True), 0) 

2551 messages = list(query.explain_no_results()) 

2552 self.assertTrue(messages) 

2553 # Want all expected snippets to appear in at least one message. 

2554 self.assertTrue( 

2555 any( 

2556 all(snippet in message for snippet in snippets) for message in query.explain_no_results() 

2557 ), 

2558 messages, 

2559 ) 

2560 

2561 # This query yields four overlaps in the database, but one is filtered 

2562 # out in postprocessing. The count queries aren't accurate because 

2563 # they don't account for duplication that happens due to an internal 

2564 # join against commonSkyPix. 

2565 query3 = registry.queryDataIds(["visit", "tract"], instrument="Cam1", skymap="SkyMap1") 

2566 self.assertEqual( 

2567 { 

2568 DataCoordinate.standardize( 

2569 instrument="Cam1", 

2570 skymap="SkyMap1", 

2571 visit=v, 

2572 tract=t, 

2573 universe=registry.dimensions, 

2574 ) 

2575 for v, t in [(1, 0), (2, 0), (2, 1)] 

2576 }, 

2577 set(query3), 

2578 ) 

2579 self.assertTrue(query3.any(execute=False, exact=False)) 

2580 self.assertTrue(query3.any(execute=True, exact=False)) 

2581 self.assertTrue(query3.any(execute=True, exact=True)) 

2582 self.assertGreaterEqual(query3.count(exact=False), 4) 

2583 self.assertGreaterEqual(query3.count(exact=True), 3) 

2584 self.assertFalse(list(query3.explain_no_results())) 

2585 # This query yields overlaps in the database, but all are filtered 

2586 # out in postprocessing. The count queries again aren't very useful. 

2587 # We have to use `where=` here to avoid an optimization that 

2588 # (currently) skips the spatial postprocess-filtering because it 

2589 # recognizes that no spatial join is necessary. That's not ideal, but 

2590 # fixing it is out of scope for this ticket. 

2591 query4 = registry.queryDataIds( 

2592 ["visit", "tract"], 

2593 instrument="Cam1", 

2594 skymap="SkyMap1", 

2595 where="visit=1 AND detector=1 AND tract=0 AND patch=4", 

2596 ) 

2597 self.assertFalse(set(query4)) 

2598 self.assertTrue(query4.any(execute=False, exact=False)) 

2599 self.assertTrue(query4.any(execute=True, exact=False)) 

2600 self.assertFalse(query4.any(execute=True, exact=True)) 

2601 self.assertGreaterEqual(query4.count(exact=False), 1) 

2602 self.assertEqual(query4.count(exact=True), 0) 

2603 messages = list(query4.explain_no_results()) 

2604 self.assertTrue(messages) 

2605 self.assertTrue(any("regions did not overlap" in message for message in messages)) 

2606 

2607 # And there are cases when queries make empty results but we do not 

2608 # know how to explain that yet (could we just say miracles happen?) 

2609 query5 = registry.queryDimensionRecords( 

2610 "detector", where="detector.purpose = 'no-purpose'", instrument="Cam1" 

2611 ) 

2612 self.assertEqual(query5.count(exact=True), 0) 

2613 messages = list(query5.explain_no_results()) 

2614 self.assertFalse(messages) 

2615 

2616 def testQueryDataIdsOrderBy(self): 

2617 """Test order_by and limit on result returned by queryDataIds().""" 

2618 registry = self.makeRegistry() 

2619 self.loadData(registry, "base.yaml") 

2620 self.loadData(registry, "datasets.yaml") 

2621 self.loadData(registry, "spatial.yaml") 

2622 

2623 def do_query(dimensions=("visit", "tract"), datasets=None, collections=None): 

2624 return registry.queryDataIds( 

2625 dimensions, datasets=datasets, collections=collections, instrument="Cam1", skymap="SkyMap1" 

2626 ) 

2627 

2628 Test = namedtuple( 

2629 "testQueryDataIdsOrderByTest", 

2630 ("order_by", "keys", "result", "limit", "datasets", "collections"), 

2631 defaults=(None, None, None), 

2632 ) 

2633 

2634 test_data = ( 

2635 Test("tract,visit", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))), 

2636 Test("-tract,visit", "tract,visit", ((1, 2), (1, 2), (0, 1), (0, 1), (0, 2), (0, 2))), 

2637 Test("tract,-visit", "tract,visit", ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2))), 

2638 Test("-tract,-visit", "tract,visit", ((1, 2), (1, 2), (0, 2), (0, 2), (0, 1), (0, 1))), 

2639 Test( 

2640 "tract.id,visit.id", 

2641 "tract,visit", 

2642 ((0, 1), (0, 1), (0, 2)), 

2643 limit=(3,), 

2644 ), 

2645 Test("-tract,-visit", "tract,visit", ((1, 2), (1, 2), (0, 2)), limit=(3,)), 

2646 Test("tract,visit", "tract,visit", ((0, 2), (1, 2), (1, 2)), limit=(3, 3)), 

2647 Test("-tract,-visit", "tract,visit", ((0, 1),), limit=(3, 5)), 

2648 Test( 

2649 "tract,visit.exposure_time", "tract,visit", ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2)) 

2650 ), 

2651 Test( 

2652 "-tract,-visit.exposure_time", "tract,visit", ((1, 2), (1, 2), (0, 1), (0, 1), (0, 2), (0, 2)) 

2653 ), 

2654 Test("tract,-exposure_time", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))), 

2655 Test("tract,visit.name", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))), 

2656 Test( 

2657 "tract,-timespan.begin,timespan.end", 

2658 "tract,visit", 

2659 ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2)), 

2660 ), 

2661 Test("visit.day_obs,exposure.day_obs", "visit,exposure", ()), 

2662 Test("visit.timespan.begin,-exposure.timespan.begin", "visit,exposure", ()), 

2663 Test( 

2664 "tract,detector", 

2665 "tract,detector", 

2666 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)), 

2667 datasets="flat", 

2668 collections="imported_r", 

2669 ), 

2670 Test( 

2671 "tract,detector.full_name", 

2672 "tract,detector", 

2673 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)), 

2674 datasets="flat", 

2675 collections="imported_r", 

2676 ), 

2677 Test( 

2678 "tract,detector.raft,detector.name_in_raft", 

2679 "tract,detector", 

2680 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)), 

2681 datasets="flat", 

2682 collections="imported_r", 

2683 ), 

2684 ) 

2685 

2686 for test in test_data: 

2687 order_by = test.order_by.split(",") 

2688 keys = test.keys.split(",") 

2689 query = do_query(keys, test.datasets, test.collections).order_by(*order_by) 

2690 if test.limit is not None: 

2691 query = query.limit(*test.limit) 

2692 dataIds = tuple(tuple(dataId[k] for k in keys) for dataId in query) 

2693 self.assertEqual(dataIds, test.result) 

2694 

2695 # and materialize 

2696 query = do_query(keys).order_by(*order_by) 

2697 if test.limit is not None: 

2698 query = query.limit(*test.limit) 

2699 with query.materialize() as materialized: 

2700 dataIds = tuple(tuple(dataId[k] for k in keys) for dataId in materialized) 

2701 self.assertEqual(dataIds, test.result) 

2702 

2703 # errors in a name 

2704 for order_by in ("", "-"): 

2705 with self.assertRaisesRegex(ValueError, "Empty dimension name in ORDER BY"): 

2706 list(do_query().order_by(order_by)) 

2707 

2708 for order_by in ("undimension.name", "-undimension.name"): 

2709 with self.assertRaisesRegex(ValueError, "Unknown dimension element name 'undimension'"): 

2710 list(do_query().order_by(order_by)) 

2711 

2712 for order_by in ("attract", "-attract"): 

2713 with self.assertRaisesRegex(ValueError, "Metadata 'attract' cannot be found in any dimension"): 

2714 list(do_query().order_by(order_by)) 

2715 

2716 with self.assertRaisesRegex(ValueError, "Metadata 'exposure_time' exists in more than one dimension"): 

2717 list(do_query(("exposure", "visit")).order_by("exposure_time")) 

2718 

2719 with self.assertRaisesRegex(ValueError, "Timespan exists in more than one dimesion"): 

2720 list(do_query(("exposure", "visit")).order_by("timespan.begin")) 

2721 

2722 with self.assertRaisesRegex( 

2723 ValueError, "Cannot find any temporal dimension element for 'timespan.begin'" 

2724 ): 

2725 list(do_query(("tract")).order_by("timespan.begin")) 

2726 

2727 with self.assertRaisesRegex(ValueError, "Cannot use 'timespan.begin' with non-temporal element"): 

2728 list(do_query(("tract")).order_by("tract.timespan.begin")) 

2729 

2730 with self.assertRaisesRegex(ValueError, "Field 'name' does not exist in 'tract'."): 

2731 list(do_query(("tract")).order_by("tract.name")) 

2732 

2733 def testQueryDataIdsGovernorExceptions(self): 

2734 """Test exceptions raised by queryDataIds() for incorrect governors.""" 

2735 registry = self.makeRegistry() 

2736 self.loadData(registry, "base.yaml") 

2737 self.loadData(registry, "datasets.yaml") 

2738 self.loadData(registry, "spatial.yaml") 

2739 

2740 def do_query(dimensions, dataId=None, where=None, bind=None, **kwargs): 

2741 return registry.queryDataIds(dimensions, dataId=dataId, where=where, bind=bind, **kwargs) 

2742 

2743 Test = namedtuple( 

2744 "testQueryDataIdExceptionsTest", 

2745 ("dimensions", "dataId", "where", "bind", "kwargs", "exception", "count"), 

2746 defaults=(None, None, None, {}, None, 0), 

2747 ) 

2748 

2749 test_data = ( 

2750 Test("tract,visit", count=6), 

2751 Test("tract,visit", kwargs={"instrument": "Cam1", "skymap": "SkyMap1"}, count=6), 

2752 Test( 

2753 "tract,visit", kwargs={"instrument": "Cam2", "skymap": "SkyMap1"}, exception=DataIdValueError 

2754 ), 

2755 Test("tract,visit", dataId={"instrument": "Cam1", "skymap": "SkyMap1"}, count=6), 

2756 Test( 

2757 "tract,visit", dataId={"instrument": "Cam1", "skymap": "SkyMap2"}, exception=DataIdValueError 

2758 ), 

2759 Test("tract,visit", where="instrument='Cam1' AND skymap='SkyMap1'", count=6), 

2760 Test("tract,visit", where="instrument='Cam1' AND skymap='SkyMap5'", exception=DataIdValueError), 

2761 Test( 

2762 "tract,visit", 

2763 where="instrument=cam AND skymap=map", 

2764 bind={"cam": "Cam1", "map": "SkyMap1"}, 

2765 count=6, 

2766 ), 

2767 Test( 

2768 "tract,visit", 

2769 where="instrument=cam AND skymap=map", 

2770 bind={"cam": "Cam", "map": "SkyMap"}, 

2771 exception=DataIdValueError, 

2772 ), 

2773 ) 

2774 

2775 for test in test_data: 

2776 dimensions = test.dimensions.split(",") 

2777 if test.exception: 

2778 with self.assertRaises(test.exception): 

2779 do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs).count() 

2780 else: 

2781 query = do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs) 

2782 self.assertEqual(query.count(), test.count) 

2783 

2784 # and materialize 

2785 if test.exception: 

2786 with self.assertRaises(test.exception): 

2787 query = do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs) 

2788 with query.materialize() as materialized: 

2789 materialized.count() 

2790 else: 

2791 query = do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs) 

2792 with query.materialize() as materialized: 

2793 self.assertEqual(materialized.count(), test.count) 

2794 

2795 def testQueryDimensionRecordsOrderBy(self): 

2796 """Test order_by and limit on result returned by 

2797 queryDimensionRecords(). 

2798 """ 

2799 registry = self.makeRegistry() 

2800 self.loadData(registry, "base.yaml") 

2801 self.loadData(registry, "datasets.yaml") 

2802 self.loadData(registry, "spatial.yaml") 

2803 

2804 def do_query(element, datasets=None, collections=None): 

2805 return registry.queryDimensionRecords( 

2806 element, instrument="Cam1", datasets=datasets, collections=collections 

2807 ) 

2808 

2809 query = do_query("detector") 

2810 self.assertEqual(len(list(query)), 4) 

2811 

2812 Test = namedtuple( 

2813 "testQueryDataIdsOrderByTest", 

2814 ("element", "order_by", "result", "limit", "datasets", "collections"), 

2815 defaults=(None, None, None), 

2816 ) 

2817 

2818 test_data = ( 

2819 Test("detector", "detector", (1, 2, 3, 4)), 

2820 Test("detector", "-detector", (4, 3, 2, 1)), 

2821 Test("detector", "raft,-name_in_raft", (2, 1, 4, 3)), 

2822 Test("detector", "-detector.purpose", (4,), limit=(1,)), 

2823 Test("detector", "-purpose,detector.raft,name_in_raft", (2, 3), limit=(2, 2)), 

2824 Test("visit", "visit", (1, 2)), 

2825 Test("visit", "-visit.id", (2, 1)), 

2826 Test("visit", "zenith_angle", (1, 2)), 

2827 Test("visit", "-visit.name", (2, 1)), 

2828 Test("visit", "day_obs,-timespan.begin", (2, 1)), 

2829 ) 

2830 

2831 for test in test_data: 

2832 order_by = test.order_by.split(",") 

2833 query = do_query(test.element).order_by(*order_by) 

2834 if test.limit is not None: 

2835 query = query.limit(*test.limit) 

2836 dataIds = tuple(rec.id for rec in query) 

2837 self.assertEqual(dataIds, test.result) 

2838 

2839 # errors in a name 

2840 for order_by in ("", "-"): 

2841 with self.assertRaisesRegex(ValueError, "Empty dimension name in ORDER BY"): 

2842 list(do_query("detector").order_by(order_by)) 

2843 

2844 for order_by in ("undimension.name", "-undimension.name"): 

2845 with self.assertRaisesRegex(ValueError, "Element name mismatch: 'undimension'"): 

2846 list(do_query("detector").order_by(order_by)) 

2847 

2848 for order_by in ("attract", "-attract"): 

2849 with self.assertRaisesRegex(ValueError, "Field 'attract' does not exist in 'detector'."): 

2850 list(do_query("detector").order_by(order_by)) 

2851 

2852 def testQueryDimensionRecordsExceptions(self): 

2853 """Test exceptions raised by queryDimensionRecords().""" 

2854 registry = self.makeRegistry() 

2855 self.loadData(registry, "base.yaml") 

2856 self.loadData(registry, "datasets.yaml") 

2857 self.loadData(registry, "spatial.yaml") 

2858 

2859 result = registry.queryDimensionRecords("detector") 

2860 self.assertEqual(result.count(), 4) 

2861 result = registry.queryDimensionRecords("detector", instrument="Cam1") 

2862 self.assertEqual(result.count(), 4) 

2863 result = registry.queryDimensionRecords("detector", dataId={"instrument": "Cam1"}) 

2864 self.assertEqual(result.count(), 4) 

2865 result = registry.queryDimensionRecords("detector", where="instrument='Cam1'") 

2866 self.assertEqual(result.count(), 4) 

2867 result = registry.queryDimensionRecords("detector", where="instrument=instr", bind={"instr": "Cam1"}) 

2868 self.assertEqual(result.count(), 4) 

2869 

2870 with self.assertRaisesRegex( 

2871 DataIdValueError, "Could not fetch record for required dimension instrument" 

2872 ): 

2873 registry.queryDimensionRecords("detector", instrument="NotCam1") 

2874 

2875 with self.assertRaisesRegex( 

2876 DataIdValueError, "Could not fetch record for required dimension instrument" 

2877 ): 

2878 result = registry.queryDimensionRecords("detector", dataId={"instrument": "NotCam1"}) 

2879 

2880 with self.assertRaisesRegex(DataIdValueError, "Unknown values specified for governor dimension"): 

2881 result = registry.queryDimensionRecords("detector", where="instrument='NotCam1'") 

2882 result.count() 

2883 

2884 with self.assertRaisesRegex(DataIdValueError, "Unknown values specified for governor dimension"): 

2885 result = registry.queryDimensionRecords( 

2886 "detector", where="instrument=instr", bind={"instr": "NotCam1"} 

2887 ) 

2888 result.count() 

2889 

2890 def testDatasetConstrainedDimensionRecordQueries(self): 

2891 """Test that queryDimensionRecords works even when given a dataset 

2892 constraint whose dimensions extend beyond the requested dimension 

2893 element's. 

2894 """ 

2895 registry = self.makeRegistry() 

2896 self.loadData(registry, "base.yaml") 

2897 self.loadData(registry, "datasets.yaml") 

2898 # Query for physical_filter dimension records, using a dataset that 

2899 # has both physical_filter and dataset dimensions. 

2900 records = registry.queryDimensionRecords( 

2901 "physical_filter", 

2902 datasets=["flat"], 

2903 collections="imported_r", 

2904 ) 

2905 self.assertEqual({record.name for record in records}, {"Cam1-R1", "Cam1-R2"}) 

2906 

2907 def testSkyPixDatasetQueries(self): 

2908 """Test that we can build queries involving skypix dimensions as long 

2909 as a dataset type that uses those dimensions is included. 

2910 """ 

2911 registry = self.makeRegistry() 

2912 self.loadData(registry, "base.yaml") 

2913 dataset_type = DatasetType( 

2914 "a", dimensions=["htm7", "instrument"], universe=registry.dimensions, storageClass="int" 

2915 ) 

2916 registry.registerDatasetType(dataset_type) 

2917 run = "r" 

2918 registry.registerRun(run) 

2919 # First try queries where there are no datasets; the concern is whether 

2920 # we can even build and execute these queries without raising, even 

2921 # when "doomed" query shortcuts are in play. 

2922 self.assertFalse( 

2923 list(registry.queryDataIds(["htm7", "instrument"], datasets=dataset_type, collections=run)) 

2924 ) 

2925 self.assertFalse(list(registry.queryDatasets(dataset_type, collections=run))) 

2926 # Now add a dataset and see that we can get it back. 

2927 htm7 = registry.dimensions.skypix["htm"][7].pixelization 

2928 data_id = registry.expandDataId(instrument="Cam1", htm7=htm7.universe()[0][0]) 

2929 (ref,) = registry.insertDatasets(dataset_type, [data_id], run=run) 

2930 self.assertEqual( 

2931 set(registry.queryDataIds(["htm7", "instrument"], datasets=dataset_type, collections=run)), 

2932 {data_id}, 

2933 ) 

2934 self.assertEqual(set(registry.queryDatasets(dataset_type, collections=run)), {ref})