Coverage for python/lsst/daf/butler/registry/tests/_registry.py: 4%

1320 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2023-10-26 15:15 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = ["RegistryTests"] 

24 

25import itertools 

26import logging 

27import os 

28import re 

29import unittest 

30import uuid 

31from abc import ABC, abstractmethod 

32from collections import defaultdict, namedtuple 

33from datetime import datetime, timedelta 

34from typing import TYPE_CHECKING, Iterator, Optional, Type, Union 

35 

36import astropy.time 

37import sqlalchemy 

38 

39try: 

40 import numpy as np 

41except ImportError: 

42 np = None 

43 

44import lsst.sphgeom 

45 

46from ...core import ( 

47 DataCoordinate, 

48 DataCoordinateSet, 

49 DatasetAssociation, 

50 DatasetRef, 

51 DatasetType, 

52 DimensionGraph, 

53 NamedValueSet, 

54 StorageClass, 

55 Timespan, 

56 ddl, 

57) 

58from .._collection_summary import CollectionSummary 

59from .._collectionType import CollectionType 

60from .._config import RegistryConfig 

61from .._exceptions import ( 

62 ArgumentError, 

63 CollectionError, 

64 CollectionTypeError, 

65 ConflictingDefinitionError, 

66 DataIdValueError, 

67 DatasetTypeError, 

68 InconsistentDataIdError, 

69 MissingCollectionError, 

70 MissingDatasetTypeError, 

71 OrphanedRecordError, 

72) 

73from ..interfaces import ButlerAttributeExistsError, DatasetIdGenEnum 

74 

75if TYPE_CHECKING: 75 ↛ 76line 75 didn't jump to line 76, because the condition on line 75 was never true

76 from .._registry import Registry 

77 

78 

79class RegistryTests(ABC): 

80 """Generic tests for the `Registry` class that can be subclassed to 

81 generate tests for different configurations. 

82 """ 

83 

84 collectionsManager: Optional[str] = None 

85 """Name of the collections manager class, if subclass provides value for 

86 this member then it overrides name specified in default configuration 

87 (`str`). 

88 """ 

89 

90 datasetsManager: Optional[str] = None 

91 """Name of the datasets manager class, if subclass provides value for 

92 this member then it overrides name specified in default configuration 

93 (`str`). 

94 """ 

95 

96 @classmethod 

97 @abstractmethod 

98 def getDataDir(cls) -> str: 

99 """Return the root directory containing test data YAML files.""" 

100 raise NotImplementedError() 

101 

102 def makeRegistryConfig(self) -> RegistryConfig: 

103 """Create RegistryConfig used to create a registry. 

104 

105 This method should be called by a subclass from `makeRegistry`. 

106 Returned instance will be pre-configured based on the values of class 

107 members, and default-configured for all other parameters. Subclasses 

108 that need default configuration should just instantiate 

109 `RegistryConfig` directly. 

110 """ 

111 config = RegistryConfig() 

112 if self.collectionsManager: 

113 config["managers", "collections"] = self.collectionsManager 

114 if self.datasetsManager: 

115 config["managers", "datasets"] = self.datasetsManager 

116 return config 

117 

118 @abstractmethod 

119 def makeRegistry(self, share_repo_with: Optional[Registry] = None) -> Optional[Registry]: 

120 """Return the Registry instance to be tested. 

121 

122 Parameters 

123 ---------- 

124 share_repo_with : `Registry`, optional 

125 If provided, the new registry should point to the same data 

126 repository as this existing registry. 

127 

128 Returns 

129 ------- 

130 registry : `Registry` 

131 New `Registry` instance, or `None` *only* if `share_repo_with` is 

132 not `None` and this test case does not support that argument 

133 (e.g. it is impossible with in-memory SQLite DBs). 

134 """ 

135 raise NotImplementedError() 

136 

137 def loadData(self, registry: Registry, filename: str): 

138 """Load registry test data from ``getDataDir/<filename>``, 

139 which should be a YAML import/export file. 

140 """ 

141 from ...transfers import YamlRepoImportBackend 

142 

143 with open(os.path.join(self.getDataDir(), filename), "r") as stream: 

144 backend = YamlRepoImportBackend(stream, registry) 

145 backend.register() 

146 backend.load(datastore=None) 

147 

148 def checkQueryResults(self, results, expected): 

149 """Check that a query results object contains expected values. 

150 

151 Parameters 

152 ---------- 

153 results : `DataCoordinateQueryResults` or `DatasetQueryResults` 

154 A lazy-evaluation query results object. 

155 expected : `list` 

156 A list of `DataCoordinate` o `DatasetRef` objects that should be 

157 equal to results of the query, aside from ordering. 

158 """ 

159 self.assertCountEqual(list(results), expected) 

160 self.assertEqual(results.count(), len(expected)) 

161 if expected: 

162 self.assertTrue(results.any()) 

163 else: 

164 self.assertFalse(results.any()) 

165 

166 def testOpaque(self): 

167 """Tests for `Registry.registerOpaqueTable`, 

168 `Registry.insertOpaqueData`, `Registry.fetchOpaqueData`, and 

169 `Registry.deleteOpaqueData`. 

170 """ 

171 registry = self.makeRegistry() 

172 table = "opaque_table_for_testing" 

173 registry.registerOpaqueTable( 

174 table, 

175 spec=ddl.TableSpec( 

176 fields=[ 

177 ddl.FieldSpec("id", dtype=sqlalchemy.BigInteger, primaryKey=True), 

178 ddl.FieldSpec("name", dtype=sqlalchemy.String, length=16, nullable=False), 

179 ddl.FieldSpec("count", dtype=sqlalchemy.SmallInteger, nullable=True), 

180 ], 

181 ), 

182 ) 

183 rows = [ 

184 {"id": 1, "name": "one", "count": None}, 

185 {"id": 2, "name": "two", "count": 5}, 

186 {"id": 3, "name": "three", "count": 6}, 

187 ] 

188 registry.insertOpaqueData(table, *rows) 

189 self.assertCountEqual(rows, list(registry.fetchOpaqueData(table))) 

190 self.assertEqual(rows[0:1], list(registry.fetchOpaqueData(table, id=1))) 

191 self.assertEqual(rows[1:2], list(registry.fetchOpaqueData(table, name="two"))) 

192 self.assertEqual(rows[0:1], list(registry.fetchOpaqueData(table, id=(1, 3), name=("one", "two")))) 

193 self.assertEqual(rows, list(registry.fetchOpaqueData(table, id=(1, 2, 3)))) 

194 # Test very long IN clause which exceeds sqlite limit on number of 

195 # parameters. SQLite says the limit is 32k but it looks like it is 

196 # much higher. 

197 self.assertEqual(rows, list(registry.fetchOpaqueData(table, id=list(range(300_000))))) 

198 # Two IN clauses, each longer than 1k batch size, first with 

199 # duplicates, second has matching elements in different batches (after 

200 # sorting). 

201 self.assertEqual( 

202 rows[0:2], 

203 list( 

204 registry.fetchOpaqueData( 

205 table, 

206 id=list(range(1000)) + list(range(100, 0, -1)), 

207 name=["one"] + [f"q{i}" for i in range(2200)] + ["two"], 

208 ) 

209 ), 

210 ) 

211 self.assertEqual([], list(registry.fetchOpaqueData(table, id=1, name="two"))) 

212 registry.deleteOpaqueData(table, id=3) 

213 self.assertCountEqual(rows[:2], list(registry.fetchOpaqueData(table))) 

214 registry.deleteOpaqueData(table) 

215 self.assertEqual([], list(registry.fetchOpaqueData(table))) 

216 

217 def testDatasetType(self): 

218 """Tests for `Registry.registerDatasetType` and 

219 `Registry.getDatasetType`. 

220 """ 

221 registry = self.makeRegistry() 

222 # Check valid insert 

223 datasetTypeName = "test" 

224 storageClass = StorageClass("testDatasetType") 

225 registry.storageClasses.registerStorageClass(storageClass) 

226 dimensions = registry.dimensions.extract(("instrument", "visit")) 

227 differentDimensions = registry.dimensions.extract(("instrument", "patch")) 

228 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

229 # Inserting for the first time should return True 

230 self.assertTrue(registry.registerDatasetType(inDatasetType)) 

231 outDatasetType1 = registry.getDatasetType(datasetTypeName) 

232 self.assertEqual(outDatasetType1, inDatasetType) 

233 

234 # Re-inserting should work 

235 self.assertFalse(registry.registerDatasetType(inDatasetType)) 

236 # Except when they are not identical 

237 with self.assertRaises(ConflictingDefinitionError): 

238 nonIdenticalDatasetType = DatasetType(datasetTypeName, differentDimensions, storageClass) 

239 registry.registerDatasetType(nonIdenticalDatasetType) 

240 

241 # Template can be None 

242 datasetTypeName = "testNoneTemplate" 

243 storageClass = StorageClass("testDatasetType2") 

244 registry.storageClasses.registerStorageClass(storageClass) 

245 dimensions = registry.dimensions.extract(("instrument", "visit")) 

246 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

247 registry.registerDatasetType(inDatasetType) 

248 outDatasetType2 = registry.getDatasetType(datasetTypeName) 

249 self.assertEqual(outDatasetType2, inDatasetType) 

250 

251 allTypes = set(registry.queryDatasetTypes()) 

252 self.assertEqual(allTypes, {outDatasetType1, outDatasetType2}) 

253 

254 def testDimensions(self): 

255 """Tests for `Registry.insertDimensionData`, 

256 `Registry.syncDimensionData`, and `Registry.expandDataId`. 

257 """ 

258 registry = self.makeRegistry() 

259 dimensionName = "instrument" 

260 dimension = registry.dimensions[dimensionName] 

261 dimensionValue = { 

262 "name": "DummyCam", 

263 "visit_max": 10, 

264 "visit_system": 0, 

265 "exposure_max": 10, 

266 "detector_max": 2, 

267 "class_name": "lsst.pipe.base.Instrument", 

268 } 

269 registry.insertDimensionData(dimensionName, dimensionValue) 

270 # Inserting the same value twice should fail 

271 with self.assertRaises(sqlalchemy.exc.IntegrityError): 

272 registry.insertDimensionData(dimensionName, dimensionValue) 

273 # expandDataId should retrieve the record we just inserted 

274 self.assertEqual( 

275 registry.expandDataId(instrument="DummyCam", graph=dimension.graph) 

276 .records[dimensionName] 

277 .toDict(), 

278 dimensionValue, 

279 ) 

280 # expandDataId should raise if there is no record with the given ID. 

281 with self.assertRaises(DataIdValueError): 

282 registry.expandDataId({"instrument": "Unknown"}, graph=dimension.graph) 

283 # band doesn't have a table; insert should fail. 

284 with self.assertRaises(TypeError): 

285 registry.insertDimensionData("band", {"band": "i"}) 

286 dimensionName2 = "physical_filter" 

287 dimension2 = registry.dimensions[dimensionName2] 

288 dimensionValue2 = {"name": "DummyCam_i", "band": "i"} 

289 # Missing required dependency ("instrument") should fail 

290 with self.assertRaises(KeyError): 

291 registry.insertDimensionData(dimensionName2, dimensionValue2) 

292 # Adding required dependency should fix the failure 

293 dimensionValue2["instrument"] = "DummyCam" 

294 registry.insertDimensionData(dimensionName2, dimensionValue2) 

295 # expandDataId should retrieve the record we just inserted. 

296 self.assertEqual( 

297 registry.expandDataId(instrument="DummyCam", physical_filter="DummyCam_i", graph=dimension2.graph) 

298 .records[dimensionName2] 

299 .toDict(), 

300 dimensionValue2, 

301 ) 

302 # Use syncDimensionData to insert a new record successfully. 

303 dimensionName3 = "detector" 

304 dimensionValue3 = { 

305 "instrument": "DummyCam", 

306 "id": 1, 

307 "full_name": "one", 

308 "name_in_raft": "zero", 

309 "purpose": "SCIENCE", 

310 } 

311 self.assertTrue(registry.syncDimensionData(dimensionName3, dimensionValue3)) 

312 # Sync that again. Note that one field ("raft") is NULL, and that 

313 # should be okay. 

314 self.assertFalse(registry.syncDimensionData(dimensionName3, dimensionValue3)) 

315 # Now try that sync with the same primary key but a different value. 

316 # This should fail. 

317 with self.assertRaises(ConflictingDefinitionError): 

318 registry.syncDimensionData( 

319 dimensionName3, 

320 { 

321 "instrument": "DummyCam", 

322 "id": 1, 

323 "full_name": "one", 

324 "name_in_raft": "four", 

325 "purpose": "SCIENCE", 

326 }, 

327 ) 

328 

329 @unittest.skipIf(np is None, "numpy not available.") 

330 def testNumpyDataId(self): 

331 """Test that we can use a numpy int in a dataId.""" 

332 registry = self.makeRegistry() 

333 dimensionEntries = [ 

334 ("instrument", {"instrument": "DummyCam"}), 

335 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}), 

336 # Using an np.int64 here fails unless Records.fromDict is also 

337 # patched to look for numbers.Integral 

338 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}), 

339 ] 

340 for args in dimensionEntries: 

341 registry.insertDimensionData(*args) 

342 

343 # Try a normal integer and something that looks like an int but 

344 # is not. 

345 for visit_id in (42, np.int64(42)): 

346 with self.subTest(visit_id=visit_id, id_type=type(visit_id).__name__): 

347 expanded = registry.expandDataId({"instrument": "DummyCam", "visit": visit_id}) 

348 self.assertEqual(expanded["visit"], int(visit_id)) 

349 self.assertIsInstance(expanded["visit"], int) 

350 

351 def testDataIdRelationships(self): 

352 """Test that `Registry.expandDataId` raises an exception when the given 

353 keys are inconsistent. 

354 """ 

355 registry = self.makeRegistry() 

356 self.loadData(registry, "base.yaml") 

357 # Insert a few more dimension records for the next test. 

358 registry.insertDimensionData( 

359 "exposure", 

360 {"instrument": "Cam1", "id": 1, "obs_id": "one", "physical_filter": "Cam1-G"}, 

361 ) 

362 registry.insertDimensionData( 

363 "exposure", 

364 {"instrument": "Cam1", "id": 2, "obs_id": "two", "physical_filter": "Cam1-G"}, 

365 ) 

366 registry.insertDimensionData( 

367 "visit_system", 

368 {"instrument": "Cam1", "id": 0, "name": "one-to-one"}, 

369 ) 

370 registry.insertDimensionData( 

371 "visit", 

372 {"instrument": "Cam1", "id": 1, "name": "one", "physical_filter": "Cam1-G", "visit_system": 0}, 

373 ) 

374 registry.insertDimensionData( 

375 "visit_definition", 

376 {"instrument": "Cam1", "visit": 1, "exposure": 1, "visit_system": 0}, 

377 ) 

378 with self.assertRaises(InconsistentDataIdError): 

379 registry.expandDataId( 

380 {"instrument": "Cam1", "visit": 1, "exposure": 2}, 

381 ) 

382 

383 def testDataset(self): 

384 """Basic tests for `Registry.insertDatasets`, `Registry.getDataset`, 

385 and `Registry.removeDatasets`. 

386 """ 

387 registry = self.makeRegistry() 

388 self.loadData(registry, "base.yaml") 

389 run = "tésτ" 

390 registry.registerRun(run) 

391 datasetType = registry.getDatasetType("bias") 

392 dataId = {"instrument": "Cam1", "detector": 2} 

393 (ref,) = registry.insertDatasets(datasetType, dataIds=[dataId], run=run) 

394 outRef = registry.getDataset(ref.id) 

395 self.assertIsNotNone(ref.id) 

396 self.assertEqual(ref, outRef) 

397 with self.assertRaises(ConflictingDefinitionError): 

398 registry.insertDatasets(datasetType, dataIds=[dataId], run=run) 

399 registry.removeDatasets([ref]) 

400 self.assertIsNone(registry.findDataset(datasetType, dataId, collections=[run])) 

401 

402 def testFindDataset(self): 

403 """Tests for `Registry.findDataset`.""" 

404 registry = self.makeRegistry() 

405 self.loadData(registry, "base.yaml") 

406 run = "tésτ" 

407 datasetType = registry.getDatasetType("bias") 

408 dataId = {"instrument": "Cam1", "detector": 4} 

409 registry.registerRun(run) 

410 (inputRef,) = registry.insertDatasets(datasetType, dataIds=[dataId], run=run) 

411 outputRef = registry.findDataset(datasetType, dataId, collections=[run]) 

412 self.assertEqual(outputRef, inputRef) 

413 # Check that retrieval with invalid dataId raises 

414 with self.assertRaises(LookupError): 

415 dataId = {"instrument": "Cam1"} # no detector 

416 registry.findDataset(datasetType, dataId, collections=run) 

417 # Check that different dataIds match to different datasets 

418 dataId1 = {"instrument": "Cam1", "detector": 1} 

419 (inputRef1,) = registry.insertDatasets(datasetType, dataIds=[dataId1], run=run) 

420 dataId2 = {"instrument": "Cam1", "detector": 2} 

421 (inputRef2,) = registry.insertDatasets(datasetType, dataIds=[dataId2], run=run) 

422 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef1) 

423 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef2) 

424 self.assertNotEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef2) 

425 self.assertNotEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef1) 

426 # Check that requesting a non-existing dataId returns None 

427 nonExistingDataId = {"instrument": "Cam1", "detector": 3} 

428 self.assertIsNone(registry.findDataset(datasetType, nonExistingDataId, collections=run)) 

429 

430 def testRemoveDatasetTypeSuccess(self): 

431 """Test that Registry.removeDatasetType works when there are no 

432 datasets of that type present. 

433 """ 

434 registry = self.makeRegistry() 

435 self.loadData(registry, "base.yaml") 

436 registry.removeDatasetType("flat") 

437 with self.assertRaises(MissingDatasetTypeError): 

438 registry.getDatasetType("flat") 

439 

440 def testRemoveDatasetTypeFailure(self): 

441 """Test that Registry.removeDatasetType raises when there are datasets 

442 of that type present or if the dataset type is for a component. 

443 """ 

444 registry = self.makeRegistry() 

445 self.loadData(registry, "base.yaml") 

446 self.loadData(registry, "datasets.yaml") 

447 with self.assertRaises(OrphanedRecordError): 

448 registry.removeDatasetType("flat") 

449 with self.assertRaises(ValueError): 

450 registry.removeDatasetType(DatasetType.nameWithComponent("flat", "image")) 

451 

452 def testImportDatasetsUUID(self): 

453 """Test for `Registry._importDatasets` with UUID dataset ID.""" 

454 if not self.datasetsManager.endswith(".ByDimensionsDatasetRecordStorageManagerUUID"): 

455 self.skipTest(f"Unexpected dataset manager {self.datasetsManager}") 

456 

457 registry = self.makeRegistry() 

458 self.loadData(registry, "base.yaml") 

459 for run in range(6): 

460 registry.registerRun(f"run{run}") 

461 datasetTypeBias = registry.getDatasetType("bias") 

462 datasetTypeFlat = registry.getDatasetType("flat") 

463 dataIdBias1 = {"instrument": "Cam1", "detector": 1} 

464 dataIdBias2 = {"instrument": "Cam1", "detector": 2} 

465 dataIdFlat1 = {"instrument": "Cam1", "detector": 1, "physical_filter": "Cam1-G", "band": "g"} 

466 

467 dataset_id = uuid.uuid4() 

468 ref = DatasetRef(datasetTypeBias, dataIdBias1, id=dataset_id, run="run0") 

469 (ref1,) = registry._importDatasets([ref]) 

470 # UUID is used without change 

471 self.assertEqual(ref.id, ref1.id) 

472 

473 # All different failure modes 

474 refs = ( 

475 # Importing same DatasetRef with different dataset ID is an error 

476 DatasetRef(datasetTypeBias, dataIdBias1, id=uuid.uuid4(), run="run0"), 

477 # Same DatasetId but different DataId 

478 DatasetRef(datasetTypeBias, dataIdBias2, id=ref1.id, run="run0"), 

479 DatasetRef(datasetTypeFlat, dataIdFlat1, id=ref1.id, run="run0"), 

480 # Same DatasetRef and DatasetId but different run 

481 DatasetRef(datasetTypeBias, dataIdBias1, id=ref1.id, run="run1"), 

482 ) 

483 for ref in refs: 

484 with self.assertRaises(ConflictingDefinitionError): 

485 registry._importDatasets([ref]) 

486 

487 # Test for non-unique IDs, they can be re-imported multiple times. 

488 for run, idGenMode in ((2, DatasetIdGenEnum.DATAID_TYPE), (4, DatasetIdGenEnum.DATAID_TYPE_RUN)): 

489 with self.subTest(idGenMode=idGenMode): 

490 # Use integer dataset ID to force UUID calculation in _import 

491 ref = DatasetRef(datasetTypeBias, dataIdBias1, id=0, run=f"run{run}") 

492 (ref1,) = registry._importDatasets([ref], idGenerationMode=idGenMode) 

493 self.assertIsInstance(ref1.id, uuid.UUID) 

494 self.assertEqual(ref1.id.version, 5) 

495 

496 # Importing it again is OK 

497 (ref2,) = registry._importDatasets([ref1]) 

498 self.assertEqual(ref2.id, ref1.id) 

499 

500 # Cannot import to different run with the same ID 

501 ref = DatasetRef(datasetTypeBias, dataIdBias1, id=ref1.id, run=f"run{run+1}") 

502 with self.assertRaises(ConflictingDefinitionError): 

503 registry._importDatasets([ref]) 

504 

505 ref = DatasetRef(datasetTypeBias, dataIdBias1, id=0, run=f"run{run+1}") 

506 if idGenMode is DatasetIdGenEnum.DATAID_TYPE: 

507 # Cannot import same DATAID_TYPE ref into a new run 

508 with self.assertRaises(ConflictingDefinitionError): 

509 (ref2,) = registry._importDatasets([ref], idGenerationMode=idGenMode) 

510 else: 

511 # DATAID_TYPE_RUN ref can be imported into a new run 

512 (ref2,) = registry._importDatasets([ref], idGenerationMode=idGenMode) 

513 

514 def testDatasetTypeComponentQueries(self): 

515 """Test component options when querying for dataset types. 

516 

517 All of the behavior here is deprecated, so many of these tests are 

518 currently wrapped in a context to check that we get a warning whenever 

519 a component dataset is actually returned. 

520 """ 

521 registry = self.makeRegistry() 

522 self.loadData(registry, "base.yaml") 

523 self.loadData(registry, "datasets.yaml") 

524 # Test querying for dataset types with different inputs. 

525 # First query for all dataset types; components should only be included 

526 # when components=True. 

527 self.assertEqual({"bias", "flat"}, NamedValueSet(registry.queryDatasetTypes()).names) 

528 self.assertEqual({"bias", "flat"}, NamedValueSet(registry.queryDatasetTypes(components=False)).names) 

529 with self.assertWarns(FutureWarning): 

530 self.assertLess( 

531 {"bias", "flat", "bias.wcs", "flat.photoCalib"}, 

532 NamedValueSet(registry.queryDatasetTypes(components=True)).names, 

533 ) 

534 # Use a pattern that can match either parent or components. Again, 

535 # components are only returned if components=True. 

536 self.assertEqual({"bias"}, NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"))).names) 

537 self.assertEqual( 

538 {"bias"}, NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"), components=False)).names 

539 ) 

540 with self.assertWarns(FutureWarning): 

541 self.assertLess( 

542 {"bias", "bias.wcs"}, 

543 NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"), components=True)).names, 

544 ) 

545 # This pattern matches only a component. In this case we also return 

546 # that component dataset type if components=None. 

547 with self.assertWarns(FutureWarning): 

548 self.assertEqual( 

549 {"bias.wcs"}, NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"))).names 

550 ) 

551 self.assertEqual( 

552 set(), 

553 NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"), components=False)).names, 

554 ) 

555 with self.assertWarns(FutureWarning): 

556 self.assertEqual( 

557 {"bias.wcs"}, 

558 NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"), components=True)).names, 

559 ) 

560 # Add a dataset type using a StorageClass that we'll then remove; check 

561 # that this does not affect our ability to query for dataset types 

562 # (though it will warn). 

563 tempStorageClass = StorageClass( 

564 name="TempStorageClass", 

565 components={ 

566 "data1": registry.storageClasses.getStorageClass("StructuredDataDict"), 

567 "data2": registry.storageClasses.getStorageClass("StructuredDataDict"), 

568 }, 

569 ) 

570 registry.storageClasses.registerStorageClass(tempStorageClass) 

571 datasetType = DatasetType( 

572 "temporary", 

573 dimensions=["instrument"], 

574 storageClass=tempStorageClass, 

575 universe=registry.dimensions, 

576 ) 

577 registry.registerDatasetType(datasetType) 

578 registry.storageClasses._unregisterStorageClass(tempStorageClass.name) 

579 datasetType._storageClass = None 

580 del tempStorageClass 

581 # Querying for all dataset types, including components, should include 

582 # at least all non-component dataset types (and I don't want to 

583 # enumerate all of the Exposure components for bias and flat here). 

584 with self.assertWarns(FutureWarning): 

585 with self.assertLogs("lsst.daf.butler.registry", logging.WARN) as cm: 

586 everything = NamedValueSet(registry.queryDatasetTypes(components=True)) 

587 self.assertIn("TempStorageClass", cm.output[0]) 

588 self.assertLess({"bias", "flat", "temporary"}, everything.names) 

589 # It should not include "temporary.columns", because we tried to remove 

590 # the storage class that would tell it about that. So if the next line 

591 # fails (i.e. "temporary.columns" _is_ in everything.names), it means 

592 # this part of the test isn't doing anything, because the _unregister 

593 # call about isn't simulating the real-life case we want it to 

594 # simulate, in which different versions of daf_butler in entirely 

595 # different Python processes interact with the same repo. 

596 self.assertNotIn("temporary.data", everything.names) 

597 # Query for dataset types that start with "temp". This should again 

598 # not include the component, and also not fail. 

599 with self.assertLogs("lsst.daf.butler.registry", logging.WARN) as cm: 

600 startsWithTemp = NamedValueSet(registry.queryDatasetTypes(re.compile("temp.*"), components=True)) 

601 self.assertIn("TempStorageClass", cm.output[0]) 

602 self.assertEqual({"temporary"}, startsWithTemp.names) 

603 # Querying with no components should not warn at all. 

604 with self.assertLogs("lsst.daf.butler.registries", logging.WARN) as cm: 

605 startsWithTemp = NamedValueSet(registry.queryDatasetTypes(re.compile("temp.*"), components=False)) 

606 # Must issue a warning of our own to be captured. 

607 logging.getLogger("lsst.daf.butler.registries").warning("test message") 

608 self.assertEqual(len(cm.output), 1) 

609 self.assertIn("test message", cm.output[0]) 

610 

611 def testComponentLookups(self): 

612 """Test searching for component datasets via their parents. 

613 

614 All of the behavior here is deprecated, so many of these tests are 

615 currently wrapped in a context to check that we get a warning whenever 

616 a component dataset is actually returned. 

617 """ 

618 registry = self.makeRegistry() 

619 self.loadData(registry, "base.yaml") 

620 self.loadData(registry, "datasets.yaml") 

621 # Test getting the child dataset type (which does still exist in the 

622 # Registry), and check for consistency with 

623 # DatasetRef.makeComponentRef. 

624 collection = "imported_g" 

625 parentType = registry.getDatasetType("bias") 

626 childType = registry.getDatasetType("bias.wcs") 

627 parentRefResolved = registry.findDataset( 

628 parentType, collections=collection, instrument="Cam1", detector=1 

629 ) 

630 self.assertIsInstance(parentRefResolved, DatasetRef) 

631 self.assertEqual(childType, parentRefResolved.makeComponentRef("wcs").datasetType) 

632 # Search for a single dataset with findDataset. 

633 childRef1 = registry.findDataset("bias.wcs", collections=collection, dataId=parentRefResolved.dataId) 

634 self.assertEqual(childRef1, parentRefResolved.makeComponentRef("wcs")) 

635 # Search for detector data IDs constrained by component dataset 

636 # existence with queryDataIds. 

637 with self.assertWarns(FutureWarning): 

638 dataIds = registry.queryDataIds( 

639 ["detector"], 

640 datasets=["bias.wcs"], 

641 collections=collection, 

642 ).toSet() 

643 self.assertEqual( 

644 dataIds, 

645 DataCoordinateSet( 

646 { 

647 DataCoordinate.standardize(instrument="Cam1", detector=d, graph=parentType.dimensions) 

648 for d in (1, 2, 3) 

649 }, 

650 parentType.dimensions, 

651 ), 

652 ) 

653 # Search for multiple datasets of a single type with queryDatasets. 

654 with self.assertWarns(FutureWarning): 

655 childRefs2 = set( 

656 registry.queryDatasets( 

657 "bias.wcs", 

658 collections=collection, 

659 ) 

660 ) 

661 self.assertEqual( 

662 {ref.unresolved() for ref in childRefs2}, {DatasetRef(childType, dataId) for dataId in dataIds} 

663 ) 

664 

665 def testCollections(self): 

666 """Tests for registry methods that manage collections.""" 

667 registry = self.makeRegistry() 

668 other_registry = self.makeRegistry(share_repo_with=registry) 

669 self.loadData(registry, "base.yaml") 

670 self.loadData(registry, "datasets.yaml") 

671 run1 = "imported_g" 

672 run2 = "imported_r" 

673 # Test setting a collection docstring after it has been created. 

674 registry.setCollectionDocumentation(run1, "doc for run1") 

675 self.assertEqual(registry.getCollectionDocumentation(run1), "doc for run1") 

676 registry.setCollectionDocumentation(run1, None) 

677 self.assertIsNone(registry.getCollectionDocumentation(run1)) 

678 datasetType = "bias" 

679 # Find some datasets via their run's collection. 

680 dataId1 = {"instrument": "Cam1", "detector": 1} 

681 ref1 = registry.findDataset(datasetType, dataId1, collections=run1) 

682 self.assertIsNotNone(ref1) 

683 dataId2 = {"instrument": "Cam1", "detector": 2} 

684 ref2 = registry.findDataset(datasetType, dataId2, collections=run1) 

685 self.assertIsNotNone(ref2) 

686 # Associate those into a new collection, then look for them there. 

687 tag1 = "tag1" 

688 registry.registerCollection(tag1, type=CollectionType.TAGGED, doc="doc for tag1") 

689 # Check that we can query for old and new collections by type. 

690 self.assertEqual(set(registry.queryCollections(collectionTypes=CollectionType.RUN)), {run1, run2}) 

691 self.assertEqual( 

692 set(registry.queryCollections(collectionTypes={CollectionType.TAGGED, CollectionType.RUN})), 

693 {tag1, run1, run2}, 

694 ) 

695 self.assertEqual(registry.getCollectionDocumentation(tag1), "doc for tag1") 

696 registry.associate(tag1, [ref1, ref2]) 

697 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1) 

698 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

699 # Disassociate one and verify that we can't it there anymore... 

700 registry.disassociate(tag1, [ref1]) 

701 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=tag1)) 

702 # ...but we can still find ref2 in tag1, and ref1 in the run. 

703 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run1), ref1) 

704 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

705 collections = set(registry.queryCollections()) 

706 self.assertEqual(collections, {run1, run2, tag1}) 

707 # Associate both refs into tag1 again; ref2 is already there, but that 

708 # should be a harmless no-op. 

709 registry.associate(tag1, [ref1, ref2]) 

710 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1) 

711 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

712 # Get a different dataset (from a different run) that has the same 

713 # dataset type and data ID as ref2. 

714 ref2b = registry.findDataset(datasetType, dataId2, collections=run2) 

715 self.assertNotEqual(ref2, ref2b) 

716 # Attempting to associate that into tag1 should be an error. 

717 with self.assertRaises(ConflictingDefinitionError): 

718 registry.associate(tag1, [ref2b]) 

719 # That error shouldn't have messed up what we had before. 

720 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1) 

721 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

722 # Attempt to associate the conflicting dataset again, this time with 

723 # a dataset that isn't in the collection and won't cause a conflict. 

724 # Should also fail without modifying anything. 

725 dataId3 = {"instrument": "Cam1", "detector": 3} 

726 ref3 = registry.findDataset(datasetType, dataId3, collections=run1) 

727 with self.assertRaises(ConflictingDefinitionError): 

728 registry.associate(tag1, [ref3, ref2b]) 

729 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1) 

730 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

731 self.assertIsNone(registry.findDataset(datasetType, dataId3, collections=tag1)) 

732 # Register a chained collection that searches [tag1, run2] 

733 chain1 = "chain1" 

734 registry.registerCollection(chain1, type=CollectionType.CHAINED) 

735 self.assertIs(registry.getCollectionType(chain1), CollectionType.CHAINED) 

736 # Chained collection exists, but has no collections in it. 

737 self.assertFalse(registry.getCollectionChain(chain1)) 

738 # If we query for all collections, we should get the chained collection 

739 # only if we don't ask to flatten it (i.e. yield only its children). 

740 self.assertEqual(set(registry.queryCollections(flattenChains=False)), {tag1, run1, run2, chain1}) 

741 self.assertEqual(set(registry.queryCollections(flattenChains=True)), {tag1, run1, run2}) 

742 # Attempt to set its child collections to something circular; that 

743 # should fail. 

744 with self.assertRaises(ValueError): 

745 registry.setCollectionChain(chain1, [tag1, chain1]) 

746 # Add the child collections. 

747 registry.setCollectionChain(chain1, [tag1, run2]) 

748 self.assertEqual(list(registry.getCollectionChain(chain1)), [tag1, run2]) 

749 self.assertEqual(registry.getCollectionParentChains(tag1), {chain1}) 

750 self.assertEqual(registry.getCollectionParentChains(run2), {chain1}) 

751 # Refresh the other registry that points to the same repo, and make 

752 # sure it can see the things we've done (note that this does require 

753 # an explicit refresh(); that's the documented behavior, because 

754 # caching is ~impossible otherwise). 

755 if other_registry is not None: 

756 other_registry.refresh() 

757 self.assertEqual(list(other_registry.getCollectionChain(chain1)), [tag1, run2]) 

758 self.assertEqual(other_registry.getCollectionParentChains(tag1), {chain1}) 

759 self.assertEqual(other_registry.getCollectionParentChains(run2), {chain1}) 

760 # Searching for dataId1 or dataId2 in the chain should return ref1 and 

761 # ref2, because both are in tag1. 

762 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain1), ref1) 

763 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain1), ref2) 

764 # Now disassociate ref2 from tag1. The search (for bias) with 

765 # dataId2 in chain1 should then: 

766 # 1. not find it in tag1 

767 # 2. find a different dataset in run2 

768 registry.disassociate(tag1, [ref2]) 

769 ref2b = registry.findDataset(datasetType, dataId2, collections=chain1) 

770 self.assertNotEqual(ref2b, ref2) 

771 self.assertEqual(ref2b, registry.findDataset(datasetType, dataId2, collections=run2)) 

772 # Define a new chain so we can test recursive chains. 

773 chain2 = "chain2" 

774 registry.registerCollection(chain2, type=CollectionType.CHAINED) 

775 registry.setCollectionChain(chain2, [run2, chain1]) 

776 self.assertEqual(registry.getCollectionParentChains(chain1), {chain2}) 

777 self.assertEqual(registry.getCollectionParentChains(run2), {chain1, chain2}) 

778 # Query for collections matching a regex. 

779 self.assertCountEqual( 

780 list(registry.queryCollections(re.compile("imported_."), flattenChains=False)), 

781 ["imported_r", "imported_g"], 

782 ) 

783 # Query for collections matching a regex or an explicit str. 

784 self.assertCountEqual( 

785 list(registry.queryCollections([re.compile("imported_."), "chain1"], flattenChains=False)), 

786 ["imported_r", "imported_g", "chain1"], 

787 ) 

788 # Search for bias with dataId1 should find it via tag1 in chain2, 

789 # recursing, because is not in run1. 

790 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=run2)) 

791 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain2), ref1) 

792 # Search for bias with dataId2 should find it in run2 (ref2b). 

793 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain2), ref2b) 

794 # Search for a flat that is in run2. That should not be found 

795 # at the front of chain2, because of the restriction to bias 

796 # on run2 there, but it should be found in at the end of chain1. 

797 dataId4 = {"instrument": "Cam1", "detector": 3, "physical_filter": "Cam1-R2"} 

798 ref4 = registry.findDataset("flat", dataId4, collections=run2) 

799 self.assertIsNotNone(ref4) 

800 self.assertEqual(ref4, registry.findDataset("flat", dataId4, collections=chain2)) 

801 # Deleting a collection that's part of a CHAINED collection is not 

802 # allowed, and is exception-safe. 

803 with self.assertRaises(Exception): 

804 registry.removeCollection(run2) 

805 self.assertEqual(registry.getCollectionType(run2), CollectionType.RUN) 

806 with self.assertRaises(Exception): 

807 registry.removeCollection(chain1) 

808 self.assertEqual(registry.getCollectionType(chain1), CollectionType.CHAINED) 

809 # Actually remove chain2, test that it's gone by asking for its type. 

810 registry.removeCollection(chain2) 

811 with self.assertRaises(MissingCollectionError): 

812 registry.getCollectionType(chain2) 

813 # Actually remove run2 and chain1, which should work now. 

814 registry.removeCollection(chain1) 

815 registry.removeCollection(run2) 

816 with self.assertRaises(MissingCollectionError): 

817 registry.getCollectionType(run2) 

818 with self.assertRaises(MissingCollectionError): 

819 registry.getCollectionType(chain1) 

820 # Remove tag1 as well, just to test that we can remove TAGGED 

821 # collections. 

822 registry.removeCollection(tag1) 

823 with self.assertRaises(MissingCollectionError): 

824 registry.getCollectionType(tag1) 

825 

826 def testCollectionChainFlatten(self): 

827 """Test that Registry.setCollectionChain obeys its 'flatten' option.""" 

828 registry = self.makeRegistry() 

829 registry.registerCollection("inner", CollectionType.CHAINED) 

830 registry.registerCollection("innermost", CollectionType.RUN) 

831 registry.setCollectionChain("inner", ["innermost"]) 

832 registry.registerCollection("outer", CollectionType.CHAINED) 

833 registry.setCollectionChain("outer", ["inner"], flatten=False) 

834 self.assertEqual(list(registry.getCollectionChain("outer")), ["inner"]) 

835 registry.setCollectionChain("outer", ["inner"], flatten=True) 

836 self.assertEqual(list(registry.getCollectionChain("outer")), ["innermost"]) 

837 

838 def testBasicTransaction(self): 

839 """Test that all operations within a single transaction block are 

840 rolled back if an exception propagates out of the block. 

841 """ 

842 registry = self.makeRegistry() 

843 storageClass = StorageClass("testDatasetType") 

844 registry.storageClasses.registerStorageClass(storageClass) 

845 with registry.transaction(): 

846 registry.insertDimensionData("instrument", {"name": "Cam1", "class_name": "A"}) 

847 with self.assertRaises(ValueError): 

848 with registry.transaction(): 

849 registry.insertDimensionData("instrument", {"name": "Cam2"}) 

850 raise ValueError("Oops, something went wrong") 

851 # Cam1 should exist 

852 self.assertEqual(registry.expandDataId(instrument="Cam1").records["instrument"].class_name, "A") 

853 # But Cam2 and Cam3 should both not exist 

854 with self.assertRaises(DataIdValueError): 

855 registry.expandDataId(instrument="Cam2") 

856 with self.assertRaises(DataIdValueError): 

857 registry.expandDataId(instrument="Cam3") 

858 

859 def testNestedTransaction(self): 

860 """Test that operations within a transaction block are not rolled back 

861 if an exception propagates out of an inner transaction block and is 

862 then caught. 

863 """ 

864 registry = self.makeRegistry() 

865 dimension = registry.dimensions["instrument"] 

866 dataId1 = {"instrument": "DummyCam"} 

867 dataId2 = {"instrument": "DummyCam2"} 

868 checkpointReached = False 

869 with registry.transaction(): 

870 # This should be added and (ultimately) committed. 

871 registry.insertDimensionData(dimension, dataId1) 

872 with self.assertRaises(sqlalchemy.exc.IntegrityError): 

873 with registry.transaction(savepoint=True): 

874 # This does not conflict, and should succeed (but not 

875 # be committed). 

876 registry.insertDimensionData(dimension, dataId2) 

877 checkpointReached = True 

878 # This should conflict and raise, triggerring a rollback 

879 # of the previous insertion within the same transaction 

880 # context, but not the original insertion in the outer 

881 # block. 

882 registry.insertDimensionData(dimension, dataId1) 

883 self.assertTrue(checkpointReached) 

884 self.assertIsNotNone(registry.expandDataId(dataId1, graph=dimension.graph)) 

885 with self.assertRaises(DataIdValueError): 

886 registry.expandDataId(dataId2, graph=dimension.graph) 

887 

888 def testInstrumentDimensions(self): 

889 """Test queries involving only instrument dimensions, with no joins to 

890 skymap.""" 

891 registry = self.makeRegistry() 

892 

893 # need a bunch of dimensions and datasets for test 

894 registry.insertDimensionData( 

895 "instrument", dict(name="DummyCam", visit_max=25, exposure_max=300, detector_max=6) 

896 ) 

897 registry.insertDimensionData( 

898 "physical_filter", 

899 dict(instrument="DummyCam", name="dummy_r", band="r"), 

900 dict(instrument="DummyCam", name="dummy_i", band="i"), 

901 ) 

902 registry.insertDimensionData( 

903 "detector", *[dict(instrument="DummyCam", id=i, full_name=str(i)) for i in range(1, 6)] 

904 ) 

905 registry.insertDimensionData( 

906 "visit_system", 

907 dict(instrument="DummyCam", id=1, name="default"), 

908 ) 

909 registry.insertDimensionData( 

910 "visit", 

911 dict(instrument="DummyCam", id=10, name="ten", physical_filter="dummy_i", visit_system=1), 

912 dict(instrument="DummyCam", id=11, name="eleven", physical_filter="dummy_r", visit_system=1), 

913 dict(instrument="DummyCam", id=20, name="twelve", physical_filter="dummy_r", visit_system=1), 

914 ) 

915 registry.insertDimensionData( 

916 "exposure", 

917 dict(instrument="DummyCam", id=100, obs_id="100", physical_filter="dummy_i"), 

918 dict(instrument="DummyCam", id=101, obs_id="101", physical_filter="dummy_i"), 

919 dict(instrument="DummyCam", id=110, obs_id="110", physical_filter="dummy_r"), 

920 dict(instrument="DummyCam", id=111, obs_id="111", physical_filter="dummy_r"), 

921 dict(instrument="DummyCam", id=200, obs_id="200", physical_filter="dummy_r"), 

922 dict(instrument="DummyCam", id=201, obs_id="201", physical_filter="dummy_r"), 

923 ) 

924 registry.insertDimensionData( 

925 "visit_definition", 

926 dict(instrument="DummyCam", exposure=100, visit_system=1, visit=10), 

927 dict(instrument="DummyCam", exposure=101, visit_system=1, visit=10), 

928 dict(instrument="DummyCam", exposure=110, visit_system=1, visit=11), 

929 dict(instrument="DummyCam", exposure=111, visit_system=1, visit=11), 

930 dict(instrument="DummyCam", exposure=200, visit_system=1, visit=20), 

931 dict(instrument="DummyCam", exposure=201, visit_system=1, visit=20), 

932 ) 

933 # dataset types 

934 run1 = "test1_r" 

935 run2 = "test2_r" 

936 tagged2 = "test2_t" 

937 registry.registerRun(run1) 

938 registry.registerRun(run2) 

939 registry.registerCollection(tagged2) 

940 storageClass = StorageClass("testDataset") 

941 registry.storageClasses.registerStorageClass(storageClass) 

942 rawType = DatasetType( 

943 name="RAW", 

944 dimensions=registry.dimensions.extract(("instrument", "exposure", "detector")), 

945 storageClass=storageClass, 

946 ) 

947 registry.registerDatasetType(rawType) 

948 calexpType = DatasetType( 

949 name="CALEXP", 

950 dimensions=registry.dimensions.extract(("instrument", "visit", "detector")), 

951 storageClass=storageClass, 

952 ) 

953 registry.registerDatasetType(calexpType) 

954 

955 # add pre-existing datasets 

956 for exposure in (100, 101, 110, 111): 

957 for detector in (1, 2, 3): 

958 # note that only 3 of 5 detectors have datasets 

959 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector) 

960 (ref,) = registry.insertDatasets(rawType, dataIds=[dataId], run=run1) 

961 # exposures 100 and 101 appear in both run1 and tagged2. 

962 # 100 has different datasets in the different collections 

963 # 101 has the same dataset in both collections. 

964 if exposure == 100: 

965 (ref,) = registry.insertDatasets(rawType, dataIds=[dataId], run=run2) 

966 if exposure in (100, 101): 

967 registry.associate(tagged2, [ref]) 

968 # Add pre-existing datasets to tagged2. 

969 for exposure in (200, 201): 

970 for detector in (3, 4, 5): 

971 # note that only 3 of 5 detectors have datasets 

972 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector) 

973 (ref,) = registry.insertDatasets(rawType, dataIds=[dataId], run=run2) 

974 registry.associate(tagged2, [ref]) 

975 

976 dimensions = DimensionGraph( 

977 registry.dimensions, dimensions=(rawType.dimensions.required | calexpType.dimensions.required) 

978 ) 

979 # Test that single dim string works as well as list of str 

980 rows = registry.queryDataIds("visit", datasets=rawType, collections=run1).expanded().toSet() 

981 rowsI = registry.queryDataIds(["visit"], datasets=rawType, collections=run1).expanded().toSet() 

982 self.assertEqual(rows, rowsI) 

983 # with empty expression 

984 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1).expanded().toSet() 

985 self.assertEqual(len(rows), 4 * 3) # 4 exposures times 3 detectors 

986 for dataId in rows: 

987 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit")) 

988 packer1 = registry.dimensions.makePacker("visit_detector", dataId) 

989 packer2 = registry.dimensions.makePacker("exposure_detector", dataId) 

990 self.assertEqual( 

991 packer1.unpack(packer1.pack(dataId)), 

992 DataCoordinate.standardize(dataId, graph=packer1.dimensions), 

993 ) 

994 self.assertEqual( 

995 packer2.unpack(packer2.pack(dataId)), 

996 DataCoordinate.standardize(dataId, graph=packer2.dimensions), 

997 ) 

998 self.assertNotEqual(packer1.pack(dataId), packer2.pack(dataId)) 

999 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101, 110, 111)) 

1000 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 11)) 

1001 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3)) 

1002 

1003 # second collection 

1004 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=tagged2).toSet() 

1005 self.assertEqual(len(rows), 4 * 3) # 4 exposures times 3 detectors 

1006 for dataId in rows: 

1007 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit")) 

1008 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101, 200, 201)) 

1009 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 20)) 

1010 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3, 4, 5)) 

1011 

1012 # with two input datasets 

1013 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=[run1, tagged2]).toSet() 

1014 self.assertEqual(len(set(rows)), 6 * 3) # 6 exposures times 3 detectors; set needed to de-dupe 

1015 for dataId in rows: 

1016 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit")) 

1017 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101, 110, 111, 200, 201)) 

1018 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 11, 20)) 

1019 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3, 4, 5)) 

1020 

1021 # limit to single visit 

1022 rows = registry.queryDataIds( 

1023 dimensions, datasets=rawType, collections=run1, where="visit = 10", instrument="DummyCam" 

1024 ).toSet() 

1025 self.assertEqual(len(rows), 2 * 3) # 2 exposures times 3 detectors 

1026 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101)) 

1027 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10,)) 

1028 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3)) 

1029 

1030 # more limiting expression, using link names instead of Table.column 

1031 rows = registry.queryDataIds( 

1032 dimensions, 

1033 datasets=rawType, 

1034 collections=run1, 

1035 where="visit = 10 and detector > 1 and 'DummyCam'=instrument", 

1036 ).toSet() 

1037 self.assertEqual(len(rows), 2 * 2) # 2 exposures times 2 detectors 

1038 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101)) 

1039 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10,)) 

1040 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (2, 3)) 

1041 

1042 # queryDataIds with only one of `datasets` and `collections` is an 

1043 # error. 

1044 with self.assertRaises(CollectionError): 

1045 registry.queryDataIds(dimensions, datasets=rawType) 

1046 with self.assertRaises(ArgumentError): 

1047 registry.queryDataIds(dimensions, collections=run1) 

1048 

1049 # expression excludes everything 

1050 rows = registry.queryDataIds( 

1051 dimensions, datasets=rawType, collections=run1, where="visit > 1000", instrument="DummyCam" 

1052 ).toSet() 

1053 self.assertEqual(len(rows), 0) 

1054 

1055 # Selecting by physical_filter, this is not in the dimensions, but it 

1056 # is a part of the full expression so it should work too. 

1057 rows = registry.queryDataIds( 

1058 dimensions, 

1059 datasets=rawType, 

1060 collections=run1, 

1061 where="physical_filter = 'dummy_r'", 

1062 instrument="DummyCam", 

1063 ).toSet() 

1064 self.assertEqual(len(rows), 2 * 3) # 2 exposures times 3 detectors 

1065 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (110, 111)) 

1066 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (11,)) 

1067 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3)) 

1068 

1069 def testSkyMapDimensions(self): 

1070 """Tests involving only skymap dimensions, no joins to instrument.""" 

1071 registry = self.makeRegistry() 

1072 

1073 # need a bunch of dimensions and datasets for test, we want 

1074 # "band" in the test so also have to add physical_filter 

1075 # dimensions 

1076 registry.insertDimensionData("instrument", dict(instrument="DummyCam")) 

1077 registry.insertDimensionData( 

1078 "physical_filter", 

1079 dict(instrument="DummyCam", name="dummy_r", band="r"), 

1080 dict(instrument="DummyCam", name="dummy_i", band="i"), 

1081 ) 

1082 registry.insertDimensionData("skymap", dict(name="DummyMap", hash="sha!".encode("utf8"))) 

1083 for tract in range(10): 

1084 registry.insertDimensionData("tract", dict(skymap="DummyMap", id=tract)) 

1085 registry.insertDimensionData( 

1086 "patch", 

1087 *[dict(skymap="DummyMap", tract=tract, id=patch, cell_x=0, cell_y=0) for patch in range(10)], 

1088 ) 

1089 

1090 # dataset types 

1091 run = "tésτ" 

1092 registry.registerRun(run) 

1093 storageClass = StorageClass("testDataset") 

1094 registry.storageClasses.registerStorageClass(storageClass) 

1095 calexpType = DatasetType( 

1096 name="deepCoadd_calexp", 

1097 dimensions=registry.dimensions.extract(("skymap", "tract", "patch", "band")), 

1098 storageClass=storageClass, 

1099 ) 

1100 registry.registerDatasetType(calexpType) 

1101 mergeType = DatasetType( 

1102 name="deepCoadd_mergeDet", 

1103 dimensions=registry.dimensions.extract(("skymap", "tract", "patch")), 

1104 storageClass=storageClass, 

1105 ) 

1106 registry.registerDatasetType(mergeType) 

1107 measType = DatasetType( 

1108 name="deepCoadd_meas", 

1109 dimensions=registry.dimensions.extract(("skymap", "tract", "patch", "band")), 

1110 storageClass=storageClass, 

1111 ) 

1112 registry.registerDatasetType(measType) 

1113 

1114 dimensions = DimensionGraph( 

1115 registry.dimensions, 

1116 dimensions=( 

1117 calexpType.dimensions.required | mergeType.dimensions.required | measType.dimensions.required 

1118 ), 

1119 ) 

1120 

1121 # add pre-existing datasets 

1122 for tract in (1, 3, 5): 

1123 for patch in (2, 4, 6, 7): 

1124 dataId = dict(skymap="DummyMap", tract=tract, patch=patch) 

1125 registry.insertDatasets(mergeType, dataIds=[dataId], run=run) 

1126 for aFilter in ("i", "r"): 

1127 dataId = dict(skymap="DummyMap", tract=tract, patch=patch, band=aFilter) 

1128 registry.insertDatasets(calexpType, dataIds=[dataId], run=run) 

1129 

1130 # with empty expression 

1131 rows = registry.queryDataIds(dimensions, datasets=[calexpType, mergeType], collections=run).toSet() 

1132 self.assertEqual(len(rows), 3 * 4 * 2) # 4 tracts x 4 patches x 2 filters 

1133 for dataId in rows: 

1134 self.assertCountEqual(dataId.keys(), ("skymap", "tract", "patch", "band")) 

1135 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 3, 5)) 

1136 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 4, 6, 7)) 

1137 self.assertCountEqual(set(dataId["band"] for dataId in rows), ("i", "r")) 

1138 

1139 # limit to 2 tracts and 2 patches 

1140 rows = registry.queryDataIds( 

1141 dimensions, 

1142 datasets=[calexpType, mergeType], 

1143 collections=run, 

1144 where="tract IN (1, 5) AND patch IN (2, 7)", 

1145 skymap="DummyMap", 

1146 ).toSet() 

1147 self.assertEqual(len(rows), 2 * 2 * 2) # 2 tracts x 2 patches x 2 filters 

1148 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 5)) 

1149 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 7)) 

1150 self.assertCountEqual(set(dataId["band"] for dataId in rows), ("i", "r")) 

1151 

1152 # limit to single filter 

1153 rows = registry.queryDataIds( 

1154 dimensions, datasets=[calexpType, mergeType], collections=run, where="band = 'i'" 

1155 ).toSet() 

1156 self.assertEqual(len(rows), 3 * 4 * 1) # 4 tracts x 4 patches x 2 filters 

1157 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 3, 5)) 

1158 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 4, 6, 7)) 

1159 self.assertCountEqual(set(dataId["band"] for dataId in rows), ("i",)) 

1160 

1161 # Specifying non-existing skymap is an exception 

1162 with self.assertRaisesRegex(DataIdValueError, "Unknown values specified for governor dimension"): 

1163 rows = registry.queryDataIds( 

1164 dimensions, datasets=[calexpType, mergeType], collections=run, where="skymap = 'Mars'" 

1165 ).toSet() 

1166 

1167 def testSpatialJoin(self): 

1168 """Test queries that involve spatial overlap joins.""" 

1169 registry = self.makeRegistry() 

1170 self.loadData(registry, "hsc-rc2-subset.yaml") 

1171 

1172 # Dictionary of spatial DatabaseDimensionElements, keyed by the name of 

1173 # the TopologicalFamily they belong to. We'll relate all elements in 

1174 # each family to all of the elements in each other family. 

1175 families = defaultdict(set) 

1176 # Dictionary of {element.name: {dataId: region}}. 

1177 regions = {} 

1178 for element in registry.dimensions.getDatabaseElements(): 

1179 if element.spatial is not None: 

1180 families[element.spatial.name].add(element) 

1181 regions[element.name] = { 

1182 record.dataId: record.region for record in registry.queryDimensionRecords(element) 

1183 } 

1184 

1185 # If this check fails, it's not necessarily a problem - it may just be 

1186 # a reasonable change to the default dimension definitions - but the 

1187 # test below depends on there being more than one family to do anything 

1188 # useful. 

1189 self.assertEqual(len(families), 2) 

1190 

1191 # Overlap DatabaseDimensionElements with each other. 

1192 for family1, family2 in itertools.combinations(families, 2): 

1193 for element1, element2 in itertools.product(families[family1], families[family2]): 

1194 graph = DimensionGraph.union(element1.graph, element2.graph) 

1195 # Construct expected set of overlapping data IDs via a 

1196 # brute-force comparison of the regions we've already fetched. 

1197 expected = { 

1198 DataCoordinate.standardize({**dataId1.byName(), **dataId2.byName()}, graph=graph) 

1199 for (dataId1, region1), (dataId2, region2) in itertools.product( 

1200 regions[element1.name].items(), regions[element2.name].items() 

1201 ) 

1202 if not region1.isDisjointFrom(region2) 

1203 } 

1204 self.assertGreater(len(expected), 2, msg="Test that we aren't just comparing empty sets.") 

1205 queried = set(registry.queryDataIds(graph)) 

1206 self.assertEqual(expected, queried) 

1207 

1208 # Overlap each DatabaseDimensionElement with the commonSkyPix system. 

1209 commonSkyPix = registry.dimensions.commonSkyPix 

1210 for elementName, regions in regions.items(): 

1211 graph = DimensionGraph.union(registry.dimensions[elementName].graph, commonSkyPix.graph) 

1212 expected = set() 

1213 for dataId, region in regions.items(): 

1214 for begin, end in commonSkyPix.pixelization.envelope(region): 

1215 expected.update( 

1216 DataCoordinate.standardize({commonSkyPix.name: index, **dataId.byName()}, graph=graph) 

1217 for index in range(begin, end) 

1218 ) 

1219 self.assertGreater(len(expected), 2, msg="Test that we aren't just comparing empty sets.") 

1220 queried = set(registry.queryDataIds(graph)) 

1221 self.assertEqual(expected, queried) 

1222 

1223 def testAbstractQuery(self): 

1224 """Test that we can run a query that just lists the known 

1225 bands. This is tricky because band is 

1226 backed by a query against physical_filter. 

1227 """ 

1228 registry = self.makeRegistry() 

1229 registry.insertDimensionData("instrument", dict(name="DummyCam")) 

1230 registry.insertDimensionData( 

1231 "physical_filter", 

1232 dict(instrument="DummyCam", name="dummy_i", band="i"), 

1233 dict(instrument="DummyCam", name="dummy_i2", band="i"), 

1234 dict(instrument="DummyCam", name="dummy_r", band="r"), 

1235 ) 

1236 rows = registry.queryDataIds(["band"]).toSet() 

1237 self.assertCountEqual( 

1238 rows, 

1239 [ 

1240 DataCoordinate.standardize(band="i", universe=registry.dimensions), 

1241 DataCoordinate.standardize(band="r", universe=registry.dimensions), 

1242 ], 

1243 ) 

1244 

1245 def testAttributeManager(self): 

1246 """Test basic functionality of attribute manager.""" 

1247 # number of attributes with schema versions in a fresh database, 

1248 # 6 managers with 3 records per manager, plus config for dimensions 

1249 VERSION_COUNT = 6 * 3 + 1 

1250 

1251 registry = self.makeRegistry() 

1252 attributes = registry._managers.attributes 

1253 

1254 # check what get() returns for non-existing key 

1255 self.assertIsNone(attributes.get("attr")) 

1256 self.assertEqual(attributes.get("attr", ""), "") 

1257 self.assertEqual(attributes.get("attr", "Value"), "Value") 

1258 self.assertEqual(len(list(attributes.items())), VERSION_COUNT) 

1259 

1260 # cannot store empty key or value 

1261 with self.assertRaises(ValueError): 

1262 attributes.set("", "value") 

1263 with self.assertRaises(ValueError): 

1264 attributes.set("attr", "") 

1265 

1266 # set value of non-existing key 

1267 attributes.set("attr", "value") 

1268 self.assertEqual(len(list(attributes.items())), VERSION_COUNT + 1) 

1269 self.assertEqual(attributes.get("attr"), "value") 

1270 

1271 # update value of existing key 

1272 with self.assertRaises(ButlerAttributeExistsError): 

1273 attributes.set("attr", "value2") 

1274 

1275 attributes.set("attr", "value2", force=True) 

1276 self.assertEqual(len(list(attributes.items())), VERSION_COUNT + 1) 

1277 self.assertEqual(attributes.get("attr"), "value2") 

1278 

1279 # delete existing key 

1280 self.assertTrue(attributes.delete("attr")) 

1281 self.assertEqual(len(list(attributes.items())), VERSION_COUNT) 

1282 

1283 # delete non-existing key 

1284 self.assertFalse(attributes.delete("non-attr")) 

1285 

1286 # store bunch of keys and get the list back 

1287 data = [ 

1288 ("version.core", "1.2.3"), 

1289 ("version.dimensions", "3.2.1"), 

1290 ("config.managers.opaque", "ByNameOpaqueTableStorageManager"), 

1291 ] 

1292 for key, value in data: 

1293 attributes.set(key, value) 

1294 items = dict(attributes.items()) 

1295 for key, value in data: 

1296 self.assertEqual(items[key], value) 

1297 

1298 def testQueryDatasetsDeduplication(self): 

1299 """Test that the findFirst option to queryDatasets selects datasets 

1300 from collections in the order given". 

1301 """ 

1302 registry = self.makeRegistry() 

1303 self.loadData(registry, "base.yaml") 

1304 self.loadData(registry, "datasets.yaml") 

1305 self.assertCountEqual( 

1306 list(registry.queryDatasets("bias", collections=["imported_g", "imported_r"])), 

1307 [ 

1308 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"), 

1309 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g"), 

1310 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g"), 

1311 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r"), 

1312 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r"), 

1313 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

1314 ], 

1315 ) 

1316 self.assertCountEqual( 

1317 list(registry.queryDatasets("bias", collections=["imported_g", "imported_r"], findFirst=True)), 

1318 [ 

1319 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"), 

1320 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g"), 

1321 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g"), 

1322 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

1323 ], 

1324 ) 

1325 self.assertCountEqual( 

1326 list(registry.queryDatasets("bias", collections=["imported_r", "imported_g"], findFirst=True)), 

1327 [ 

1328 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"), 

1329 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r"), 

1330 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r"), 

1331 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

1332 ], 

1333 ) 

1334 

1335 def testQueryResults(self): 

1336 """Test querying for data IDs and then manipulating the QueryResults 

1337 object returned to perform other queries. 

1338 """ 

1339 registry = self.makeRegistry() 

1340 self.loadData(registry, "base.yaml") 

1341 self.loadData(registry, "datasets.yaml") 

1342 bias = registry.getDatasetType("bias") 

1343 flat = registry.getDatasetType("flat") 

1344 # Obtain expected results from methods other than those we're testing 

1345 # here. That includes: 

1346 # - the dimensions of the data IDs we want to query: 

1347 expectedGraph = DimensionGraph(registry.dimensions, names=["detector", "physical_filter"]) 

1348 # - the dimensions of some other data IDs we'll extract from that: 

1349 expectedSubsetGraph = DimensionGraph(registry.dimensions, names=["detector"]) 

1350 # - the data IDs we expect to obtain from the first queries: 

1351 expectedDataIds = DataCoordinateSet( 

1352 { 

1353 DataCoordinate.standardize( 

1354 instrument="Cam1", detector=d, physical_filter=p, universe=registry.dimensions 

1355 ) 

1356 for d, p in itertools.product({1, 2, 3}, {"Cam1-G", "Cam1-R1", "Cam1-R2"}) 

1357 }, 

1358 graph=expectedGraph, 

1359 hasFull=False, 

1360 hasRecords=False, 

1361 ) 

1362 # - the flat datasets we expect to find from those data IDs, in just 

1363 # one collection (so deduplication is irrelevant): 

1364 expectedFlats = [ 

1365 registry.findDataset( 

1366 flat, instrument="Cam1", detector=1, physical_filter="Cam1-R1", collections="imported_r" 

1367 ), 

1368 registry.findDataset( 

1369 flat, instrument="Cam1", detector=2, physical_filter="Cam1-R1", collections="imported_r" 

1370 ), 

1371 registry.findDataset( 

1372 flat, instrument="Cam1", detector=3, physical_filter="Cam1-R2", collections="imported_r" 

1373 ), 

1374 ] 

1375 # - the data IDs we expect to extract from that: 

1376 expectedSubsetDataIds = expectedDataIds.subset(expectedSubsetGraph) 

1377 # - the bias datasets we expect to find from those data IDs, after we 

1378 # subset-out the physical_filter dimension, both with duplicates: 

1379 expectedAllBiases = [ 

1380 registry.findDataset(bias, instrument="Cam1", detector=1, collections="imported_g"), 

1381 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_g"), 

1382 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_g"), 

1383 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_r"), 

1384 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_r"), 

1385 ] 

1386 # - ...and without duplicates: 

1387 expectedDeduplicatedBiases = [ 

1388 registry.findDataset(bias, instrument="Cam1", detector=1, collections="imported_g"), 

1389 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_r"), 

1390 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_r"), 

1391 ] 

1392 # Test against those expected results, using a "lazy" query for the 

1393 # data IDs (which re-executes that query each time we use it to do 

1394 # something new). 

1395 dataIds = registry.queryDataIds( 

1396 ["detector", "physical_filter"], 

1397 where="detector.purpose = 'SCIENCE'", # this rejects detector=4 

1398 instrument="Cam1", 

1399 ) 

1400 self.assertEqual(dataIds.graph, expectedGraph) 

1401 self.assertEqual(dataIds.toSet(), expectedDataIds) 

1402 self.assertCountEqual( 

1403 list( 

1404 dataIds.findDatasets( 

1405 flat, 

1406 collections=["imported_r"], 

1407 ) 

1408 ), 

1409 expectedFlats, 

1410 ) 

1411 subsetDataIds = dataIds.subset(expectedSubsetGraph, unique=True) 

1412 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph) 

1413 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds) 

1414 self.assertCountEqual( 

1415 list(subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=False)), 

1416 expectedAllBiases, 

1417 ) 

1418 self.assertCountEqual( 

1419 list(subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=True)), 

1420 expectedDeduplicatedBiases, 

1421 ) 

1422 

1423 # Check dimensions match. 

1424 with self.assertRaises(ValueError): 

1425 subsetDataIds.findDatasets("flat", collections=["imported_r", "imported_g"], findFirst=True) 

1426 

1427 # Use a component dataset type. 

1428 self.assertCountEqual( 

1429 [ 

1430 ref.makeComponentRef("image") 

1431 for ref in subsetDataIds.findDatasets( 

1432 bias, 

1433 collections=["imported_r", "imported_g"], 

1434 findFirst=False, 

1435 ) 

1436 ], 

1437 [ref.makeComponentRef("image") for ref in expectedAllBiases], 

1438 ) 

1439 

1440 # Use a named dataset type that does not exist and a dataset type 

1441 # object that does not exist. 

1442 unknown_type = DatasetType("not_known", dimensions=bias.dimensions, storageClass="Exposure") 

1443 

1444 # Test both string name and dataset type object. 

1445 test_type: Union[str, DatasetType] 

1446 for test_type, test_type_name in ( 

1447 (unknown_type, unknown_type.name), 

1448 (unknown_type.name, unknown_type.name), 

1449 ): 

1450 with self.assertRaisesRegex(DatasetTypeError, expected_regex=test_type_name): 

1451 list( 

1452 subsetDataIds.findDatasets( 

1453 test_type, collections=["imported_r", "imported_g"], findFirst=True 

1454 ) 

1455 ) 

1456 

1457 # Materialize the bias dataset queries (only) by putting the results 

1458 # into temporary tables, then repeat those tests. 

1459 with subsetDataIds.findDatasets( 

1460 bias, collections=["imported_r", "imported_g"], findFirst=False 

1461 ).materialize() as biases: 

1462 self.assertCountEqual(list(biases), expectedAllBiases) 

1463 with subsetDataIds.findDatasets( 

1464 bias, collections=["imported_r", "imported_g"], findFirst=True 

1465 ).materialize() as biases: 

1466 self.assertCountEqual(list(biases), expectedDeduplicatedBiases) 

1467 # Materialize the data ID subset query, but not the dataset queries. 

1468 with subsetDataIds.materialize() as subsetDataIds: 

1469 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph) 

1470 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds) 

1471 self.assertCountEqual( 

1472 list( 

1473 subsetDataIds.findDatasets( 

1474 bias, collections=["imported_r", "imported_g"], findFirst=False 

1475 ) 

1476 ), 

1477 expectedAllBiases, 

1478 ) 

1479 self.assertCountEqual( 

1480 list( 

1481 subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=True) 

1482 ), 

1483 expectedDeduplicatedBiases, 

1484 ) 

1485 # Materialize the dataset queries, too. 

1486 with subsetDataIds.findDatasets( 

1487 bias, collections=["imported_r", "imported_g"], findFirst=False 

1488 ).materialize() as biases: 

1489 self.assertCountEqual(list(biases), expectedAllBiases) 

1490 with subsetDataIds.findDatasets( 

1491 bias, collections=["imported_r", "imported_g"], findFirst=True 

1492 ).materialize() as biases: 

1493 self.assertCountEqual(list(biases), expectedDeduplicatedBiases) 

1494 # Materialize the original query, but none of the follow-up queries. 

1495 with dataIds.materialize() as dataIds: 

1496 self.assertEqual(dataIds.graph, expectedGraph) 

1497 self.assertEqual(dataIds.toSet(), expectedDataIds) 

1498 self.assertCountEqual( 

1499 list( 

1500 dataIds.findDatasets( 

1501 flat, 

1502 collections=["imported_r"], 

1503 ) 

1504 ), 

1505 expectedFlats, 

1506 ) 

1507 subsetDataIds = dataIds.subset(expectedSubsetGraph, unique=True) 

1508 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph) 

1509 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds) 

1510 self.assertCountEqual( 

1511 list( 

1512 subsetDataIds.findDatasets( 

1513 bias, collections=["imported_r", "imported_g"], findFirst=False 

1514 ) 

1515 ), 

1516 expectedAllBiases, 

1517 ) 

1518 self.assertCountEqual( 

1519 list( 

1520 subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=True) 

1521 ), 

1522 expectedDeduplicatedBiases, 

1523 ) 

1524 # Materialize just the bias dataset queries. 

1525 with subsetDataIds.findDatasets( 

1526 bias, collections=["imported_r", "imported_g"], findFirst=False 

1527 ).materialize() as biases: 

1528 self.assertCountEqual(list(biases), expectedAllBiases) 

1529 with subsetDataIds.findDatasets( 

1530 bias, collections=["imported_r", "imported_g"], findFirst=True 

1531 ).materialize() as biases: 

1532 self.assertCountEqual(list(biases), expectedDeduplicatedBiases) 

1533 # Materialize the subset data ID query, but not the dataset 

1534 # queries. 

1535 with subsetDataIds.materialize() as subsetDataIds: 

1536 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph) 

1537 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds) 

1538 self.assertCountEqual( 

1539 list( 

1540 subsetDataIds.findDatasets( 

1541 bias, collections=["imported_r", "imported_g"], findFirst=False 

1542 ) 

1543 ), 

1544 expectedAllBiases, 

1545 ) 

1546 self.assertCountEqual( 

1547 list( 

1548 subsetDataIds.findDatasets( 

1549 bias, collections=["imported_r", "imported_g"], findFirst=True 

1550 ) 

1551 ), 

1552 expectedDeduplicatedBiases, 

1553 ) 

1554 # Materialize the bias dataset queries, too, so now we're 

1555 # materializing every single step. 

1556 with subsetDataIds.findDatasets( 

1557 bias, collections=["imported_r", "imported_g"], findFirst=False 

1558 ).materialize() as biases: 

1559 self.assertCountEqual(list(biases), expectedAllBiases) 

1560 with subsetDataIds.findDatasets( 

1561 bias, collections=["imported_r", "imported_g"], findFirst=True 

1562 ).materialize() as biases: 

1563 self.assertCountEqual(list(biases), expectedDeduplicatedBiases) 

1564 

1565 def testEmptyDimensionsQueries(self): 

1566 """Test Query and QueryResults objects in the case where there are no 

1567 dimensions. 

1568 """ 

1569 # Set up test data: one dataset type, two runs, one dataset in each. 

1570 registry = self.makeRegistry() 

1571 self.loadData(registry, "base.yaml") 

1572 schema = DatasetType("schema", dimensions=registry.dimensions.empty, storageClass="Catalog") 

1573 registry.registerDatasetType(schema) 

1574 dataId = DataCoordinate.makeEmpty(registry.dimensions) 

1575 run1 = "run1" 

1576 run2 = "run2" 

1577 registry.registerRun(run1) 

1578 registry.registerRun(run2) 

1579 (dataset1,) = registry.insertDatasets(schema, dataIds=[dataId], run=run1) 

1580 (dataset2,) = registry.insertDatasets(schema, dataIds=[dataId], run=run2) 

1581 # Query directly for both of the datasets, and each one, one at a time. 

1582 self.checkQueryResults( 

1583 registry.queryDatasets(schema, collections=[run1, run2], findFirst=False), [dataset1, dataset2] 

1584 ) 

1585 self.checkQueryResults( 

1586 registry.queryDatasets(schema, collections=[run1, run2], findFirst=True), 

1587 [dataset1], 

1588 ) 

1589 self.checkQueryResults( 

1590 registry.queryDatasets(schema, collections=[run2, run1], findFirst=True), 

1591 [dataset2], 

1592 ) 

1593 # Query for data IDs with no dimensions. 

1594 dataIds = registry.queryDataIds([]) 

1595 self.checkQueryResults(dataIds, [dataId]) 

1596 # Use queried data IDs to find the datasets. 

1597 self.checkQueryResults( 

1598 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False), 

1599 [dataset1, dataset2], 

1600 ) 

1601 self.checkQueryResults( 

1602 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), 

1603 [dataset1], 

1604 ) 

1605 self.checkQueryResults( 

1606 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), 

1607 [dataset2], 

1608 ) 

1609 # Now materialize the data ID query results and repeat those tests. 

1610 with dataIds.materialize() as dataIds: 

1611 self.checkQueryResults(dataIds, [dataId]) 

1612 self.checkQueryResults( 

1613 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), 

1614 [dataset1], 

1615 ) 

1616 self.checkQueryResults( 

1617 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), 

1618 [dataset2], 

1619 ) 

1620 # Query for non-empty data IDs, then subset that to get the empty one. 

1621 # Repeat the above tests starting from that. 

1622 dataIds = registry.queryDataIds(["instrument"]).subset(registry.dimensions.empty, unique=True) 

1623 self.checkQueryResults(dataIds, [dataId]) 

1624 self.checkQueryResults( 

1625 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False), 

1626 [dataset1, dataset2], 

1627 ) 

1628 self.checkQueryResults( 

1629 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), 

1630 [dataset1], 

1631 ) 

1632 self.checkQueryResults( 

1633 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), 

1634 [dataset2], 

1635 ) 

1636 with dataIds.materialize() as dataIds: 

1637 self.checkQueryResults(dataIds, [dataId]) 

1638 self.checkQueryResults( 

1639 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False), 

1640 [dataset1, dataset2], 

1641 ) 

1642 self.checkQueryResults( 

1643 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), 

1644 [dataset1], 

1645 ) 

1646 self.checkQueryResults( 

1647 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), 

1648 [dataset2], 

1649 ) 

1650 # Query for non-empty data IDs, then materialize, then subset to get 

1651 # the empty one. Repeat again. 

1652 with registry.queryDataIds(["instrument"]).materialize() as nonEmptyDataIds: 

1653 dataIds = nonEmptyDataIds.subset(registry.dimensions.empty, unique=True) 

1654 self.checkQueryResults(dataIds, [dataId]) 

1655 self.checkQueryResults( 

1656 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False), 

1657 [dataset1, dataset2], 

1658 ) 

1659 self.checkQueryResults( 

1660 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), 

1661 [dataset1], 

1662 ) 

1663 self.checkQueryResults( 

1664 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), 

1665 [dataset2], 

1666 ) 

1667 with dataIds.materialize() as dataIds: 

1668 self.checkQueryResults(dataIds, [dataId]) 

1669 self.checkQueryResults( 

1670 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False), 

1671 [dataset1, dataset2], 

1672 ) 

1673 self.checkQueryResults( 

1674 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), 

1675 [dataset1], 

1676 ) 

1677 self.checkQueryResults( 

1678 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), 

1679 [dataset2], 

1680 ) 

1681 # Query for non-empty data IDs with a constraint on an empty-data-ID 

1682 # dataset that exists. 

1683 dataIds = registry.queryDataIds(["instrument"], datasets="schema", collections=...) 

1684 self.checkQueryResults( 

1685 dataIds.subset(unique=True), 

1686 [DataCoordinate.standardize(instrument="Cam1", universe=registry.dimensions)], 

1687 ) 

1688 # Again query for non-empty data IDs with a constraint on empty-data-ID 

1689 # datasets, but when the datasets don't exist. We delete the existing 

1690 # dataset and query just that collection rather than creating a new 

1691 # empty collection because this is a bit less likely for our build-time 

1692 # logic to shortcut-out (via the collection summaries), and such a 

1693 # shortcut would make this test a bit more trivial than we'd like. 

1694 registry.removeDatasets([dataset2]) 

1695 dataIds = registry.queryDataIds(["instrument"], datasets="schema", collections=run2) 

1696 self.checkQueryResults(dataIds, []) 

1697 

1698 def testDimensionDataModifications(self): 

1699 """Test that modifying dimension records via: 

1700 syncDimensionData(..., update=True) and 

1701 insertDimensionData(..., replace=True) works as expected, even in the 

1702 presence of datasets using those dimensions and spatial overlap 

1703 relationships. 

1704 """ 

1705 

1706 def unpack_range_set(ranges: lsst.sphgeom.RangeSet) -> Iterator[int]: 

1707 """Unpack a sphgeom.RangeSet into the integers it contains.""" 

1708 for begin, end in ranges: 

1709 yield from range(begin, end) 

1710 

1711 def range_set_hull( 

1712 ranges: lsst.sphgeom.RangeSet, 

1713 pixelization: lsst.sphgeom.HtmPixelization, 

1714 ) -> lsst.sphgeom.ConvexPolygon: 

1715 """Create a ConvexPolygon hull of the region defined by a set of 

1716 HTM pixelization index ranges. 

1717 """ 

1718 points = [] 

1719 for index in unpack_range_set(ranges): 

1720 points.extend(pixelization.triangle(index).getVertices()) 

1721 return lsst.sphgeom.ConvexPolygon(points) 

1722 

1723 # Use HTM to set up an initial parent region (one arbitrary trixel) 

1724 # and four child regions (the trixels within the parent at the next 

1725 # level. We'll use the parent as a tract/visit region and the children 

1726 # as its patch/visit_detector regions. 

1727 registry = self.makeRegistry() 

1728 htm6 = registry.dimensions.skypix["htm"][6].pixelization 

1729 commonSkyPix = registry.dimensions.commonSkyPix.pixelization 

1730 index = 12288 

1731 child_ranges_small = lsst.sphgeom.RangeSet(index).scaled(4) 

1732 assert htm6.universe().contains(child_ranges_small) 

1733 child_regions_small = [htm6.triangle(i) for i in unpack_range_set(child_ranges_small)] 

1734 parent_region_small = lsst.sphgeom.ConvexPolygon( 

1735 list(itertools.chain.from_iterable(c.getVertices() for c in child_regions_small)) 

1736 ) 

1737 assert all(parent_region_small.contains(c) for c in child_regions_small) 

1738 # Make a larger version of each child region, defined to be the set of 

1739 # htm6 trixels that overlap the original's bounding circle. Make a new 

1740 # parent that's the convex hull of the new children. 

1741 child_regions_large = [ 

1742 range_set_hull(htm6.envelope(c.getBoundingCircle()), htm6) for c in child_regions_small 

1743 ] 

1744 assert all(large.contains(small) for large, small in zip(child_regions_large, child_regions_small)) 

1745 parent_region_large = lsst.sphgeom.ConvexPolygon( 

1746 list(itertools.chain.from_iterable(c.getVertices() for c in child_regions_large)) 

1747 ) 

1748 assert all(parent_region_large.contains(c) for c in child_regions_large) 

1749 assert parent_region_large.contains(parent_region_small) 

1750 assert not parent_region_small.contains(parent_region_large) 

1751 assert not all(parent_region_small.contains(c) for c in child_regions_large) 

1752 # Find some commonSkyPix indices that overlap the large regions but not 

1753 # overlap the small regions. We use commonSkyPix here to make sure the 

1754 # real tests later involve what's in the database, not just post-query 

1755 # filtering of regions. 

1756 child_difference_indices = [] 

1757 for large, small in zip(child_regions_large, child_regions_small): 

1758 difference = list(unpack_range_set(commonSkyPix.envelope(large) - commonSkyPix.envelope(small))) 

1759 assert difference, "if this is empty, we can't test anything useful with these regions" 

1760 assert all( 

1761 not commonSkyPix.triangle(d).isDisjointFrom(large) 

1762 and commonSkyPix.triangle(d).isDisjointFrom(small) 

1763 for d in difference 

1764 ) 

1765 child_difference_indices.append(difference) 

1766 parent_difference_indices = list( 

1767 unpack_range_set( 

1768 commonSkyPix.envelope(parent_region_large) - commonSkyPix.envelope(parent_region_small) 

1769 ) 

1770 ) 

1771 assert parent_difference_indices, "if this is empty, we can't test anything useful with these regions" 

1772 assert all( 

1773 ( 

1774 not commonSkyPix.triangle(d).isDisjointFrom(parent_region_large) 

1775 and commonSkyPix.triangle(d).isDisjointFrom(parent_region_small) 

1776 ) 

1777 for d in parent_difference_indices 

1778 ) 

1779 # Now that we've finally got those regions, we'll insert the large ones 

1780 # as tract/patch dimension records. 

1781 skymap_name = "testing_v1" 

1782 registry.insertDimensionData( 

1783 "skymap", 

1784 { 

1785 "name": skymap_name, 

1786 "hash": bytes([42]), 

1787 "tract_max": 1, 

1788 "patch_nx_max": 2, 

1789 "patch_ny_max": 2, 

1790 }, 

1791 ) 

1792 registry.insertDimensionData("tract", {"skymap": skymap_name, "id": 0, "region": parent_region_large}) 

1793 registry.insertDimensionData( 

1794 "patch", 

1795 *[ 

1796 {"skymap": skymap_name, "tract": 0, "id": n, "cell_x": n % 2, "cell_y": n // 2, "region": c} 

1797 for n, c in enumerate(child_regions_large) 

1798 ], 

1799 ) 

1800 # Add at dataset that uses these dimensions to make sure that modifying 

1801 # them doesn't disrupt foreign keys (need to make sure DB doesn't 

1802 # implement insert with replace=True as delete-then-insert). 

1803 dataset_type = DatasetType( 

1804 "coadd", 

1805 dimensions=["tract", "patch"], 

1806 universe=registry.dimensions, 

1807 storageClass="Exposure", 

1808 ) 

1809 registry.registerDatasetType(dataset_type) 

1810 registry.registerCollection("the_run", CollectionType.RUN) 

1811 registry.insertDatasets( 

1812 dataset_type, 

1813 [{"skymap": skymap_name, "tract": 0, "patch": 2}], 

1814 run="the_run", 

1815 ) 

1816 # Query for tracts and patches that overlap some "difference" htm9 

1817 # pixels; there should be overlaps, because the database has 

1818 # the "large" suite of regions. 

1819 self.assertEqual( 

1820 {0}, 

1821 { 

1822 data_id["tract"] 

1823 for data_id in registry.queryDataIds( 

1824 ["tract"], 

1825 skymap=skymap_name, 

1826 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]}, 

1827 ) 

1828 }, 

1829 ) 

1830 for patch_id, patch_difference_indices in enumerate(child_difference_indices): 

1831 self.assertIn( 

1832 patch_id, 

1833 { 

1834 data_id["patch"] 

1835 for data_id in registry.queryDataIds( 

1836 ["patch"], 

1837 skymap=skymap_name, 

1838 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]}, 

1839 ) 

1840 }, 

1841 ) 

1842 # Use sync to update the tract region and insert to update the regions 

1843 # of the patches, to the "small" suite. 

1844 updated = registry.syncDimensionData( 

1845 "tract", 

1846 {"skymap": skymap_name, "id": 0, "region": parent_region_small}, 

1847 update=True, 

1848 ) 

1849 self.assertEqual(updated, {"region": parent_region_large}) 

1850 registry.insertDimensionData( 

1851 "patch", 

1852 *[ 

1853 {"skymap": skymap_name, "tract": 0, "id": n, "cell_x": n % 2, "cell_y": n // 2, "region": c} 

1854 for n, c in enumerate(child_regions_small) 

1855 ], 

1856 replace=True, 

1857 ) 

1858 # Query again; there now should be no such overlaps, because the 

1859 # database has the "small" suite of regions. 

1860 self.assertFalse( 

1861 set( 

1862 registry.queryDataIds( 

1863 ["tract"], 

1864 skymap=skymap_name, 

1865 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]}, 

1866 ) 

1867 ) 

1868 ) 

1869 for patch_id, patch_difference_indices in enumerate(child_difference_indices): 

1870 self.assertNotIn( 

1871 patch_id, 

1872 { 

1873 data_id["patch"] 

1874 for data_id in registry.queryDataIds( 

1875 ["patch"], 

1876 skymap=skymap_name, 

1877 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]}, 

1878 ) 

1879 }, 

1880 ) 

1881 # Update back to the large regions and query one more time. 

1882 updated = registry.syncDimensionData( 

1883 "tract", 

1884 {"skymap": skymap_name, "id": 0, "region": parent_region_large}, 

1885 update=True, 

1886 ) 

1887 self.assertEqual(updated, {"region": parent_region_small}) 

1888 registry.insertDimensionData( 

1889 "patch", 

1890 *[ 

1891 {"skymap": skymap_name, "tract": 0, "id": n, "cell_x": n % 2, "cell_y": n // 2, "region": c} 

1892 for n, c in enumerate(child_regions_large) 

1893 ], 

1894 replace=True, 

1895 ) 

1896 self.assertEqual( 

1897 {0}, 

1898 { 

1899 data_id["tract"] 

1900 for data_id in registry.queryDataIds( 

1901 ["tract"], 

1902 skymap=skymap_name, 

1903 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]}, 

1904 ) 

1905 }, 

1906 ) 

1907 for patch_id, patch_difference_indices in enumerate(child_difference_indices): 

1908 self.assertIn( 

1909 patch_id, 

1910 { 

1911 data_id["patch"] 

1912 for data_id in registry.queryDataIds( 

1913 ["patch"], 

1914 skymap=skymap_name, 

1915 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]}, 

1916 ) 

1917 }, 

1918 ) 

1919 

1920 def testCalibrationCollections(self): 

1921 """Test operations on `~CollectionType.CALIBRATION` collections, 

1922 including `Registry.certify`, `Registry.decertify`, and 

1923 `Registry.findDataset`. 

1924 """ 

1925 # Setup - make a Registry, fill it with some datasets in 

1926 # non-calibration collections. 

1927 registry = self.makeRegistry() 

1928 self.loadData(registry, "base.yaml") 

1929 self.loadData(registry, "datasets.yaml") 

1930 # Set up some timestamps. 

1931 t1 = astropy.time.Time("2020-01-01T01:00:00", format="isot", scale="tai") 

1932 t2 = astropy.time.Time("2020-01-01T02:00:00", format="isot", scale="tai") 

1933 t3 = astropy.time.Time("2020-01-01T03:00:00", format="isot", scale="tai") 

1934 t4 = astropy.time.Time("2020-01-01T04:00:00", format="isot", scale="tai") 

1935 t5 = astropy.time.Time("2020-01-01T05:00:00", format="isot", scale="tai") 

1936 allTimespans = [ 

1937 Timespan(a, b) for a, b in itertools.combinations([None, t1, t2, t3, t4, t5, None], r=2) 

1938 ] 

1939 # Get references to some datasets. 

1940 bias2a = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g") 

1941 bias3a = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g") 

1942 bias2b = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r") 

1943 bias3b = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r") 

1944 # Register the main calibration collection we'll be working with. 

1945 collection = "Cam1/calibs/default" 

1946 registry.registerCollection(collection, type=CollectionType.CALIBRATION) 

1947 # Cannot associate into a calibration collection (no timespan). 

1948 with self.assertRaises(CollectionTypeError): 

1949 registry.associate(collection, [bias2a]) 

1950 # Certify 2a dataset with [t2, t4) validity. 

1951 registry.certify(collection, [bias2a], Timespan(begin=t2, end=t4)) 

1952 # Test that we can query for this dataset via the new collection, both 

1953 # on its own and with a RUN collection, as long as we don't try to join 

1954 # in temporal dimensions or use findFirst=True. 

1955 self.assertEqual( 

1956 set(registry.queryDatasets("bias", findFirst=False, collections=collection)), 

1957 {bias2a}, 

1958 ) 

1959 self.assertEqual( 

1960 set(registry.queryDatasets("bias", findFirst=False, collections=[collection, "imported_r"])), 

1961 { 

1962 bias2a, 

1963 bias2b, 

1964 bias3b, 

1965 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

1966 }, 

1967 ) 

1968 self.assertEqual( 

1969 set(registry.queryDataIds("detector", datasets="bias", collections=collection)), 

1970 {registry.expandDataId(instrument="Cam1", detector=2)}, 

1971 ) 

1972 self.assertEqual( 

1973 set(registry.queryDataIds("detector", datasets="bias", collections=[collection, "imported_r"])), 

1974 { 

1975 registry.expandDataId(instrument="Cam1", detector=2), 

1976 registry.expandDataId(instrument="Cam1", detector=3), 

1977 registry.expandDataId(instrument="Cam1", detector=4), 

1978 }, 

1979 ) 

1980 

1981 # We should not be able to certify 2b with anything overlapping that 

1982 # window. 

1983 with self.assertRaises(ConflictingDefinitionError): 

1984 registry.certify(collection, [bias2b], Timespan(begin=None, end=t3)) 

1985 with self.assertRaises(ConflictingDefinitionError): 

1986 registry.certify(collection, [bias2b], Timespan(begin=None, end=t5)) 

1987 with self.assertRaises(ConflictingDefinitionError): 

1988 registry.certify(collection, [bias2b], Timespan(begin=t1, end=t3)) 

1989 with self.assertRaises(ConflictingDefinitionError): 

1990 registry.certify(collection, [bias2b], Timespan(begin=t1, end=t5)) 

1991 with self.assertRaises(ConflictingDefinitionError): 

1992 registry.certify(collection, [bias2b], Timespan(begin=t1, end=None)) 

1993 with self.assertRaises(ConflictingDefinitionError): 

1994 registry.certify(collection, [bias2b], Timespan(begin=t2, end=t3)) 

1995 with self.assertRaises(ConflictingDefinitionError): 

1996 registry.certify(collection, [bias2b], Timespan(begin=t2, end=t5)) 

1997 with self.assertRaises(ConflictingDefinitionError): 

1998 registry.certify(collection, [bias2b], Timespan(begin=t2, end=None)) 

1999 # We should be able to certify 3a with a range overlapping that window, 

2000 # because it's for a different detector. 

2001 # We'll certify 3a over [t1, t3). 

2002 registry.certify(collection, [bias3a], Timespan(begin=t1, end=t3)) 

2003 # Now we'll certify 2b and 3b together over [t4, ∞). 

2004 registry.certify(collection, [bias2b, bias3b], Timespan(begin=t4, end=None)) 

2005 

2006 # Fetch all associations and check that they are what we expect. 

2007 self.assertCountEqual( 

2008 list( 

2009 registry.queryDatasetAssociations( 

2010 "bias", 

2011 collections=[collection, "imported_g", "imported_r"], 

2012 ) 

2013 ), 

2014 [ 

2015 DatasetAssociation( 

2016 ref=registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"), 

2017 collection="imported_g", 

2018 timespan=None, 

2019 ), 

2020 DatasetAssociation( 

2021 ref=registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

2022 collection="imported_r", 

2023 timespan=None, 

2024 ), 

2025 DatasetAssociation(ref=bias2a, collection="imported_g", timespan=None), 

2026 DatasetAssociation(ref=bias3a, collection="imported_g", timespan=None), 

2027 DatasetAssociation(ref=bias2b, collection="imported_r", timespan=None), 

2028 DatasetAssociation(ref=bias3b, collection="imported_r", timespan=None), 

2029 DatasetAssociation(ref=bias2a, collection=collection, timespan=Timespan(begin=t2, end=t4)), 

2030 DatasetAssociation(ref=bias3a, collection=collection, timespan=Timespan(begin=t1, end=t3)), 

2031 DatasetAssociation(ref=bias2b, collection=collection, timespan=Timespan(begin=t4, end=None)), 

2032 DatasetAssociation(ref=bias3b, collection=collection, timespan=Timespan(begin=t4, end=None)), 

2033 ], 

2034 ) 

2035 

2036 class Ambiguous: 

2037 """Tag class to denote lookups that should be ambiguous.""" 

2038 

2039 pass 

2040 

2041 def assertLookup( 

2042 detector: int, timespan: Timespan, expected: Optional[Union[DatasetRef, Type[Ambiguous]]] 

2043 ) -> None: 

2044 """Local function that asserts that a bias lookup returns the given 

2045 expected result. 

2046 """ 

2047 if expected is Ambiguous: 

2048 with self.assertRaises(RuntimeError): 

2049 registry.findDataset( 

2050 "bias", 

2051 collections=collection, 

2052 instrument="Cam1", 

2053 detector=detector, 

2054 timespan=timespan, 

2055 ) 

2056 else: 

2057 self.assertEqual( 

2058 expected, 

2059 registry.findDataset( 

2060 "bias", 

2061 collections=collection, 

2062 instrument="Cam1", 

2063 detector=detector, 

2064 timespan=timespan, 

2065 ), 

2066 ) 

2067 

2068 # Systematically test lookups against expected results. 

2069 assertLookup(detector=2, timespan=Timespan(None, t1), expected=None) 

2070 assertLookup(detector=2, timespan=Timespan(None, t2), expected=None) 

2071 assertLookup(detector=2, timespan=Timespan(None, t3), expected=bias2a) 

2072 assertLookup(detector=2, timespan=Timespan(None, t4), expected=bias2a) 

2073 assertLookup(detector=2, timespan=Timespan(None, t5), expected=Ambiguous) 

2074 assertLookup(detector=2, timespan=Timespan(None, None), expected=Ambiguous) 

2075 assertLookup(detector=2, timespan=Timespan(t1, t2), expected=None) 

2076 assertLookup(detector=2, timespan=Timespan(t1, t3), expected=bias2a) 

2077 assertLookup(detector=2, timespan=Timespan(t1, t4), expected=bias2a) 

2078 assertLookup(detector=2, timespan=Timespan(t1, t5), expected=Ambiguous) 

2079 assertLookup(detector=2, timespan=Timespan(t1, None), expected=Ambiguous) 

2080 assertLookup(detector=2, timespan=Timespan(t2, t3), expected=bias2a) 

2081 assertLookup(detector=2, timespan=Timespan(t2, t4), expected=bias2a) 

2082 assertLookup(detector=2, timespan=Timespan(t2, t5), expected=Ambiguous) 

2083 assertLookup(detector=2, timespan=Timespan(t2, None), expected=Ambiguous) 

2084 assertLookup(detector=2, timespan=Timespan(t3, t4), expected=bias2a) 

2085 assertLookup(detector=2, timespan=Timespan(t3, t5), expected=Ambiguous) 

2086 assertLookup(detector=2, timespan=Timespan(t3, None), expected=Ambiguous) 

2087 assertLookup(detector=2, timespan=Timespan(t4, t5), expected=bias2b) 

2088 assertLookup(detector=2, timespan=Timespan(t4, None), expected=bias2b) 

2089 assertLookup(detector=2, timespan=Timespan(t5, None), expected=bias2b) 

2090 assertLookup(detector=3, timespan=Timespan(None, t1), expected=None) 

2091 assertLookup(detector=3, timespan=Timespan(None, t2), expected=bias3a) 

2092 assertLookup(detector=3, timespan=Timespan(None, t3), expected=bias3a) 

2093 assertLookup(detector=3, timespan=Timespan(None, t4), expected=bias3a) 

2094 assertLookup(detector=3, timespan=Timespan(None, t5), expected=Ambiguous) 

2095 assertLookup(detector=3, timespan=Timespan(None, None), expected=Ambiguous) 

2096 assertLookup(detector=3, timespan=Timespan(t1, t2), expected=bias3a) 

2097 assertLookup(detector=3, timespan=Timespan(t1, t3), expected=bias3a) 

2098 assertLookup(detector=3, timespan=Timespan(t1, t4), expected=bias3a) 

2099 assertLookup(detector=3, timespan=Timespan(t1, t5), expected=Ambiguous) 

2100 assertLookup(detector=3, timespan=Timespan(t1, None), expected=Ambiguous) 

2101 assertLookup(detector=3, timespan=Timespan(t2, t3), expected=bias3a) 

2102 assertLookup(detector=3, timespan=Timespan(t2, t4), expected=bias3a) 

2103 assertLookup(detector=3, timespan=Timespan(t2, t5), expected=Ambiguous) 

2104 assertLookup(detector=3, timespan=Timespan(t2, None), expected=Ambiguous) 

2105 assertLookup(detector=3, timespan=Timespan(t3, t4), expected=None) 

2106 assertLookup(detector=3, timespan=Timespan(t3, t5), expected=bias3b) 

2107 assertLookup(detector=3, timespan=Timespan(t3, None), expected=bias3b) 

2108 assertLookup(detector=3, timespan=Timespan(t4, t5), expected=bias3b) 

2109 assertLookup(detector=3, timespan=Timespan(t4, None), expected=bias3b) 

2110 assertLookup(detector=3, timespan=Timespan(t5, None), expected=bias3b) 

2111 

2112 # Decertify [t3, t5) for all data IDs, and do test lookups again. 

2113 # This should truncate bias2a to [t2, t3), leave bias3a unchanged at 

2114 # [t1, t3), and truncate bias2b and bias3b to [t5, ∞). 

2115 registry.decertify(collection=collection, datasetType="bias", timespan=Timespan(t3, t5)) 

2116 assertLookup(detector=2, timespan=Timespan(None, t1), expected=None) 

2117 assertLookup(detector=2, timespan=Timespan(None, t2), expected=None) 

2118 assertLookup(detector=2, timespan=Timespan(None, t3), expected=bias2a) 

2119 assertLookup(detector=2, timespan=Timespan(None, t4), expected=bias2a) 

2120 assertLookup(detector=2, timespan=Timespan(None, t5), expected=bias2a) 

2121 assertLookup(detector=2, timespan=Timespan(None, None), expected=Ambiguous) 

2122 assertLookup(detector=2, timespan=Timespan(t1, t2), expected=None) 

2123 assertLookup(detector=2, timespan=Timespan(t1, t3), expected=bias2a) 

2124 assertLookup(detector=2, timespan=Timespan(t1, t4), expected=bias2a) 

2125 assertLookup(detector=2, timespan=Timespan(t1, t5), expected=bias2a) 

2126 assertLookup(detector=2, timespan=Timespan(t1, None), expected=Ambiguous) 

2127 assertLookup(detector=2, timespan=Timespan(t2, t3), expected=bias2a) 

2128 assertLookup(detector=2, timespan=Timespan(t2, t4), expected=bias2a) 

2129 assertLookup(detector=2, timespan=Timespan(t2, t5), expected=bias2a) 

2130 assertLookup(detector=2, timespan=Timespan(t2, None), expected=Ambiguous) 

2131 assertLookup(detector=2, timespan=Timespan(t3, t4), expected=None) 

2132 assertLookup(detector=2, timespan=Timespan(t3, t5), expected=None) 

2133 assertLookup(detector=2, timespan=Timespan(t3, None), expected=bias2b) 

2134 assertLookup(detector=2, timespan=Timespan(t4, t5), expected=None) 

2135 assertLookup(detector=2, timespan=Timespan(t4, None), expected=bias2b) 

2136 assertLookup(detector=2, timespan=Timespan(t5, None), expected=bias2b) 

2137 assertLookup(detector=3, timespan=Timespan(None, t1), expected=None) 

2138 assertLookup(detector=3, timespan=Timespan(None, t2), expected=bias3a) 

2139 assertLookup(detector=3, timespan=Timespan(None, t3), expected=bias3a) 

2140 assertLookup(detector=3, timespan=Timespan(None, t4), expected=bias3a) 

2141 assertLookup(detector=3, timespan=Timespan(None, t5), expected=bias3a) 

2142 assertLookup(detector=3, timespan=Timespan(None, None), expected=Ambiguous) 

2143 assertLookup(detector=3, timespan=Timespan(t1, t2), expected=bias3a) 

2144 assertLookup(detector=3, timespan=Timespan(t1, t3), expected=bias3a) 

2145 assertLookup(detector=3, timespan=Timespan(t1, t4), expected=bias3a) 

2146 assertLookup(detector=3, timespan=Timespan(t1, t5), expected=bias3a) 

2147 assertLookup(detector=3, timespan=Timespan(t1, None), expected=Ambiguous) 

2148 assertLookup(detector=3, timespan=Timespan(t2, t3), expected=bias3a) 

2149 assertLookup(detector=3, timespan=Timespan(t2, t4), expected=bias3a) 

2150 assertLookup(detector=3, timespan=Timespan(t2, t5), expected=bias3a) 

2151 assertLookup(detector=3, timespan=Timespan(t2, None), expected=Ambiguous) 

2152 assertLookup(detector=3, timespan=Timespan(t3, t4), expected=None) 

2153 assertLookup(detector=3, timespan=Timespan(t3, t5), expected=None) 

2154 assertLookup(detector=3, timespan=Timespan(t3, None), expected=bias3b) 

2155 assertLookup(detector=3, timespan=Timespan(t4, t5), expected=None) 

2156 assertLookup(detector=3, timespan=Timespan(t4, None), expected=bias3b) 

2157 assertLookup(detector=3, timespan=Timespan(t5, None), expected=bias3b) 

2158 

2159 # Decertify everything, this time with explicit data IDs, then check 

2160 # that no lookups succeed. 

2161 registry.decertify( 

2162 collection, 

2163 "bias", 

2164 Timespan(None, None), 

2165 dataIds=[ 

2166 dict(instrument="Cam1", detector=2), 

2167 dict(instrument="Cam1", detector=3), 

2168 ], 

2169 ) 

2170 for detector in (2, 3): 

2171 for timespan in allTimespans: 

2172 assertLookup(detector=detector, timespan=timespan, expected=None) 

2173 # Certify bias2a and bias3a over (-∞, ∞), check that all lookups return 

2174 # those. 

2175 registry.certify( 

2176 collection, 

2177 [bias2a, bias3a], 

2178 Timespan(None, None), 

2179 ) 

2180 for timespan in allTimespans: 

2181 assertLookup(detector=2, timespan=timespan, expected=bias2a) 

2182 assertLookup(detector=3, timespan=timespan, expected=bias3a) 

2183 # Decertify just bias2 over [t2, t4). 

2184 # This should split a single certification row into two (and leave the 

2185 # other existing row, for bias3a, alone). 

2186 registry.decertify( 

2187 collection, "bias", Timespan(t2, t4), dataIds=[dict(instrument="Cam1", detector=2)] 

2188 ) 

2189 for timespan in allTimespans: 

2190 assertLookup(detector=3, timespan=timespan, expected=bias3a) 

2191 overlapsBefore = timespan.overlaps(Timespan(None, t2)) 

2192 overlapsAfter = timespan.overlaps(Timespan(t4, None)) 

2193 if overlapsBefore and overlapsAfter: 

2194 expected = Ambiguous 

2195 elif overlapsBefore or overlapsAfter: 

2196 expected = bias2a 

2197 else: 

2198 expected = None 

2199 assertLookup(detector=2, timespan=timespan, expected=expected) 

2200 

2201 def testSkipCalibs(self): 

2202 """Test how queries handle skipping of calibration collections.""" 

2203 registry = self.makeRegistry() 

2204 self.loadData(registry, "base.yaml") 

2205 self.loadData(registry, "datasets.yaml") 

2206 

2207 coll_calib = "Cam1/calibs/default" 

2208 registry.registerCollection(coll_calib, type=CollectionType.CALIBRATION) 

2209 

2210 # Add all biases to the calibration collection. 

2211 # Without this, the logic that prunes dataset subqueries based on 

2212 # datasetType-collection summary information will fire before the logic 

2213 # we want to test below. This is a good thing (it avoids the dreaded 

2214 # NotImplementedError a bit more often) everywhere but here. 

2215 registry.certify(coll_calib, registry.queryDatasets("bias", collections=...), Timespan(None, None)) 

2216 

2217 coll_list = [coll_calib, "imported_g", "imported_r"] 

2218 chain = "Cam1/chain" 

2219 registry.registerCollection(chain, type=CollectionType.CHAINED) 

2220 registry.setCollectionChain(chain, coll_list) 

2221 

2222 # explicit list will raise if findFirst=True or there are temporal 

2223 # dimensions 

2224 with self.assertRaises(NotImplementedError): 

2225 registry.queryDatasets("bias", collections=coll_list, findFirst=True) 

2226 with self.assertRaises(NotImplementedError): 

2227 registry.queryDataIds( 

2228 ["instrument", "detector", "exposure"], datasets="bias", collections=coll_list 

2229 ).count() 

2230 

2231 # chain will skip 

2232 datasets = list(registry.queryDatasets("bias", collections=chain)) 

2233 self.assertGreater(len(datasets), 0) 

2234 

2235 dataIds = list(registry.queryDataIds(["instrument", "detector"], datasets="bias", collections=chain)) 

2236 self.assertGreater(len(dataIds), 0) 

2237 

2238 # glob will skip too 

2239 datasets = list(registry.queryDatasets("bias", collections="*d*")) 

2240 self.assertGreater(len(datasets), 0) 

2241 

2242 # regular expression will skip too 

2243 pattern = re.compile(".*") 

2244 datasets = list(registry.queryDatasets("bias", collections=pattern)) 

2245 self.assertGreater(len(datasets), 0) 

2246 

2247 # ellipsis should work as usual 

2248 datasets = list(registry.queryDatasets("bias", collections=...)) 

2249 self.assertGreater(len(datasets), 0) 

2250 

2251 # few tests with findFirst 

2252 datasets = list(registry.queryDatasets("bias", collections=chain, findFirst=True)) 

2253 self.assertGreater(len(datasets), 0) 

2254 

2255 def testIngestTimeQuery(self): 

2256 registry = self.makeRegistry() 

2257 self.loadData(registry, "base.yaml") 

2258 dt0 = datetime.utcnow() 

2259 self.loadData(registry, "datasets.yaml") 

2260 dt1 = datetime.utcnow() 

2261 

2262 datasets = list(registry.queryDatasets(..., collections=...)) 

2263 len0 = len(datasets) 

2264 self.assertGreater(len0, 0) 

2265 

2266 where = "ingest_date > T'2000-01-01'" 

2267 datasets = list(registry.queryDatasets(..., collections=..., where=where)) 

2268 len1 = len(datasets) 

2269 self.assertEqual(len0, len1) 

2270 

2271 # no one will ever use this piece of software in 30 years 

2272 where = "ingest_date > T'2050-01-01'" 

2273 datasets = list(registry.queryDatasets(..., collections=..., where=where)) 

2274 len2 = len(datasets) 

2275 self.assertEqual(len2, 0) 

2276 

2277 # Check more exact timing to make sure there is no 37 seconds offset 

2278 # (after fixing DM-30124). SQLite time precision is 1 second, make 

2279 # sure that we don't test with higher precision. 

2280 tests = [ 

2281 # format: (timestamp, operator, expected_len) 

2282 (dt0 - timedelta(seconds=1), ">", len0), 

2283 (dt0 - timedelta(seconds=1), "<", 0), 

2284 (dt1 + timedelta(seconds=1), "<", len0), 

2285 (dt1 + timedelta(seconds=1), ">", 0), 

2286 ] 

2287 for dt, op, expect_len in tests: 

2288 dt_str = dt.isoformat(sep=" ") 

2289 

2290 where = f"ingest_date {op} T'{dt_str}'" 

2291 datasets = list(registry.queryDatasets(..., collections=..., where=where)) 

2292 self.assertEqual(len(datasets), expect_len) 

2293 

2294 # same with bind using datetime or astropy Time 

2295 where = f"ingest_date {op} ingest_time" 

2296 datasets = list( 

2297 registry.queryDatasets(..., collections=..., where=where, bind={"ingest_time": dt}) 

2298 ) 

2299 self.assertEqual(len(datasets), expect_len) 

2300 

2301 dt_astropy = astropy.time.Time(dt, format="datetime") 

2302 datasets = list( 

2303 registry.queryDatasets(..., collections=..., where=where, bind={"ingest_time": dt_astropy}) 

2304 ) 

2305 self.assertEqual(len(datasets), expect_len) 

2306 

2307 def testTimespanQueries(self): 

2308 """Test query expressions involving timespans.""" 

2309 registry = self.makeRegistry() 

2310 self.loadData(registry, "hsc-rc2-subset.yaml") 

2311 # All exposures in the database; mapping from ID to timespan. 

2312 visits = {record.id: record.timespan for record in registry.queryDimensionRecords("visit")} 

2313 # Just those IDs, sorted (which is also temporal sorting, because HSC 

2314 # exposure IDs are monotonically increasing). 

2315 ids = sorted(visits.keys()) 

2316 self.assertGreater(len(ids), 20) 

2317 # Pick some quasi-random indexes into `ids` to play with. 

2318 i1 = int(len(ids) * 0.1) 

2319 i2 = int(len(ids) * 0.3) 

2320 i3 = int(len(ids) * 0.6) 

2321 i4 = int(len(ids) * 0.8) 

2322 # Extract some times from those: just before the beginning of i1 (which 

2323 # should be after the end of the exposure before), exactly the 

2324 # beginning of i2, just after the beginning of i3 (and before its end), 

2325 # and the exact end of i4. 

2326 t1 = visits[ids[i1]].begin - astropy.time.TimeDelta(1.0, format="sec") 

2327 self.assertGreater(t1, visits[ids[i1 - 1]].end) 

2328 t2 = visits[ids[i2]].begin 

2329 t3 = visits[ids[i3]].begin + astropy.time.TimeDelta(1.0, format="sec") 

2330 self.assertLess(t3, visits[ids[i3]].end) 

2331 t4 = visits[ids[i4]].end 

2332 # Make sure those are actually in order. 

2333 self.assertEqual([t1, t2, t3, t4], sorted([t4, t3, t2, t1])) 

2334 

2335 bind = { 

2336 "t1": t1, 

2337 "t2": t2, 

2338 "t3": t3, 

2339 "t4": t4, 

2340 "ts23": Timespan(t2, t3), 

2341 } 

2342 

2343 def query(where): 

2344 """Helper function that queries for visit data IDs and returns 

2345 results as a sorted, deduplicated list of visit IDs. 

2346 """ 

2347 return sorted( 

2348 { 

2349 dataId["visit"] 

2350 for dataId in registry.queryDataIds("visit", instrument="HSC", bind=bind, where=where) 

2351 } 

2352 ) 

2353 

2354 # Try a bunch of timespan queries, mixing up the bounds themselves, 

2355 # where they appear in the expression, and how we get the timespan into 

2356 # the expression. 

2357 

2358 # t1 is before the start of i1, so this should not include i1. 

2359 self.assertEqual(ids[:i1], query("visit.timespan OVERLAPS (null, t1)")) 

2360 # t2 is exactly at the start of i2, but ends are exclusive, so these 

2361 # should not include i2. 

2362 self.assertEqual(ids[i1:i2], query("(t1, t2) OVERLAPS visit.timespan")) 

2363 self.assertEqual(ids[:i2], query("visit.timespan < (t2, t4)")) 

2364 # t3 is in the middle of i3, so this should include i3. 

2365 self.assertEqual(ids[i2 : i3 + 1], query("visit.timespan OVERLAPS ts23")) 

2366 # This one should not include t3 by the same reasoning. 

2367 self.assertEqual(ids[i3 + 1 :], query("visit.timespan > (t1, t3)")) 

2368 # t4 is exactly at the end of i4, so this should include i4. 

2369 self.assertEqual(ids[i3 : i4 + 1], query(f"visit.timespan OVERLAPS (T'{t3.tai.isot}', t4)")) 

2370 # i4's upper bound of t4 is exclusive so this should not include t4. 

2371 self.assertEqual(ids[i4 + 1 :], query("visit.timespan OVERLAPS (t4, NULL)")) 

2372 

2373 # Now some timespan vs. time scalar queries. 

2374 self.assertEqual(ids[:i2], query("visit.timespan < t2")) 

2375 self.assertEqual(ids[:i2], query("t2 > visit.timespan")) 

2376 self.assertEqual(ids[i3 + 1 :], query("visit.timespan > t3")) 

2377 self.assertEqual(ids[i3 + 1 :], query("t3 < visit.timespan")) 

2378 self.assertEqual(ids[i3 : i3 + 1], query("visit.timespan OVERLAPS t3")) 

2379 self.assertEqual(ids[i3 : i3 + 1], query(f"T'{t3.tai.isot}' OVERLAPS visit.timespan")) 

2380 

2381 # Empty timespans should not overlap anything. 

2382 self.assertEqual([], query("visit.timespan OVERLAPS (t3, t2)")) 

2383 

2384 def testCollectionSummaries(self): 

2385 """Test recording and retrieval of collection summaries.""" 

2386 self.maxDiff = None 

2387 registry = self.makeRegistry() 

2388 # Importing datasets from yaml should go through the code path where 

2389 # we update collection summaries as we insert datasets. 

2390 self.loadData(registry, "base.yaml") 

2391 self.loadData(registry, "datasets.yaml") 

2392 flat = registry.getDatasetType("flat") 

2393 expected1 = CollectionSummary() 

2394 expected1.dataset_types.add(registry.getDatasetType("bias")) 

2395 expected1.add_data_ids( 

2396 flat, [DataCoordinate.standardize(instrument="Cam1", universe=registry.dimensions)] 

2397 ) 

2398 self.assertEqual(registry.getCollectionSummary("imported_g"), expected1) 

2399 self.assertEqual(registry.getCollectionSummary("imported_r"), expected1) 

2400 # Create a chained collection with both of the imported runs; the 

2401 # summary should be the same, because it's a union with itself. 

2402 chain = "chain" 

2403 registry.registerCollection(chain, CollectionType.CHAINED) 

2404 registry.setCollectionChain(chain, ["imported_r", "imported_g"]) 

2405 self.assertEqual(registry.getCollectionSummary(chain), expected1) 

2406 # Associate flats only into a tagged collection and a calibration 

2407 # collection to check summaries of those. 

2408 tag = "tag" 

2409 registry.registerCollection(tag, CollectionType.TAGGED) 

2410 registry.associate(tag, registry.queryDatasets(flat, collections="imported_g")) 

2411 calibs = "calibs" 

2412 registry.registerCollection(calibs, CollectionType.CALIBRATION) 

2413 registry.certify( 

2414 calibs, registry.queryDatasets(flat, collections="imported_g"), timespan=Timespan(None, None) 

2415 ) 

2416 expected2 = expected1.copy() 

2417 expected2.dataset_types.discard("bias") 

2418 self.assertEqual(registry.getCollectionSummary(tag), expected2) 

2419 self.assertEqual(registry.getCollectionSummary(calibs), expected2) 

2420 # Explicitly calling Registry.refresh() should load those same 

2421 # summaries, via a totally different code path. 

2422 registry.refresh() 

2423 self.assertEqual(registry.getCollectionSummary("imported_g"), expected1) 

2424 self.assertEqual(registry.getCollectionSummary("imported_r"), expected1) 

2425 self.assertEqual(registry.getCollectionSummary(tag), expected2) 

2426 self.assertEqual(registry.getCollectionSummary(calibs), expected2) 

2427 

2428 def testBindInQueryDatasets(self): 

2429 """Test that the bind parameter is correctly forwarded in 

2430 queryDatasets recursion. 

2431 """ 

2432 registry = self.makeRegistry() 

2433 # Importing datasets from yaml should go through the code path where 

2434 # we update collection summaries as we insert datasets. 

2435 self.loadData(registry, "base.yaml") 

2436 self.loadData(registry, "datasets.yaml") 

2437 self.assertEqual( 

2438 set(registry.queryDatasets("flat", band="r", collections=...)), 

2439 set(registry.queryDatasets("flat", where="band=my_band", bind={"my_band": "r"}, collections=...)), 

2440 ) 

2441 

2442 def testQueryResultSummaries(self): 

2443 """Test summary methods like `count`, `any`, and `explain_no_results` 

2444 on `DataCoordinateQueryResults` and `DatasetQueryResults` 

2445 """ 

2446 registry = self.makeRegistry() 

2447 self.loadData(registry, "base.yaml") 

2448 self.loadData(registry, "datasets.yaml") 

2449 self.loadData(registry, "spatial.yaml") 

2450 # Default test dataset has two collections, each with both flats and 

2451 # biases. Add a new collection with only biases. 

2452 registry.registerCollection("biases", CollectionType.TAGGED) 

2453 registry.associate("biases", registry.queryDatasets("bias", collections=["imported_g"])) 

2454 # First query yields two results, and involves no postprocessing. 

2455 query1 = registry.queryDataIds(["physical_filter"], band="r") 

2456 self.assertTrue(query1.any(execute=False, exact=False)) 

2457 self.assertTrue(query1.any(execute=True, exact=False)) 

2458 self.assertTrue(query1.any(execute=True, exact=True)) 

2459 self.assertEqual(query1.count(exact=False), 2) 

2460 self.assertEqual(query1.count(exact=True), 2) 

2461 self.assertFalse(list(query1.explain_no_results())) 

2462 # Second query should yield no results, but this isn't detectable 

2463 # unless we actually run a query. 

2464 query2 = registry.queryDataIds(["physical_filter"], band="h") 

2465 self.assertTrue(query2.any(execute=False, exact=False)) 

2466 self.assertFalse(query2.any(execute=True, exact=False)) 

2467 self.assertFalse(query2.any(execute=True, exact=True)) 

2468 self.assertEqual(query2.count(exact=False), 0) 

2469 self.assertEqual(query2.count(exact=True), 0) 

2470 self.assertFalse(list(query2.explain_no_results())) 

2471 # These queries yield no results due to various problems that can be 

2472 # spotted prior to execution, yielding helpful diagnostics. 

2473 base_query = registry.queryDataIds(["detector", "physical_filter"]) 

2474 queries_and_snippets = [ 

2475 ( 

2476 # Dataset type name doesn't match any existing dataset types. 

2477 registry.queryDatasets("nonexistent", collections=...), 

2478 ["nonexistent"], 

2479 ), 

2480 ( 

2481 # Dataset type object isn't registered. 

2482 registry.queryDatasets( 

2483 DatasetType( 

2484 "nonexistent", 

2485 dimensions=["instrument"], 

2486 universe=registry.dimensions, 

2487 storageClass="Image", 

2488 ), 

2489 collections=..., 

2490 ), 

2491 ["nonexistent"], 

2492 ), 

2493 ( 

2494 # No datasets of this type in this collection. 

2495 registry.queryDatasets("flat", collections=["biases"]), 

2496 ["flat", "biases"], 

2497 ), 

2498 ( 

2499 # No datasets of this type in this collection. 

2500 base_query.findDatasets("flat", collections=["biases"]), 

2501 ["flat", "biases"], 

2502 ), 

2503 ( 

2504 # No collections matching at all. 

2505 registry.queryDatasets("flat", collections=re.compile("potato.+")), 

2506 ["potato"], 

2507 ), 

2508 ] 

2509 # The behavior of these additional queries is slated to change in the 

2510 # future, so we also check for deprecation warnings. 

2511 with self.assertWarns(FutureWarning): 

2512 queries_and_snippets.append( 

2513 ( 

2514 # Dataset type name doesn't match any existing dataset 

2515 # types. 

2516 registry.queryDataIds(["detector"], datasets=["nonexistent"], collections=...), 

2517 ["nonexistent"], 

2518 ) 

2519 ) 

2520 with self.assertWarns(FutureWarning): 

2521 queries_and_snippets.append( 

2522 ( 

2523 # Dataset type name doesn't match any existing dataset 

2524 # types. 

2525 registry.queryDimensionRecords("detector", datasets=["nonexistent"], collections=...), 

2526 ["nonexistent"], 

2527 ) 

2528 ) 

2529 for query, snippets in queries_and_snippets: 

2530 self.assertFalse(query.any(execute=False, exact=False)) 

2531 self.assertFalse(query.any(execute=True, exact=False)) 

2532 self.assertFalse(query.any(execute=True, exact=True)) 

2533 self.assertEqual(query.count(exact=False), 0) 

2534 self.assertEqual(query.count(exact=True), 0) 

2535 messages = list(query.explain_no_results()) 

2536 self.assertTrue(messages) 

2537 # Want all expected snippets to appear in at least one message. 

2538 self.assertTrue( 

2539 any( 

2540 all(snippet in message for snippet in snippets) for message in query.explain_no_results() 

2541 ), 

2542 messages, 

2543 ) 

2544 

2545 # This query does yield results, but should also emit a warning because 

2546 # dataset type patterns to queryDataIds is deprecated; just look for 

2547 # the warning. 

2548 with self.assertWarns(FutureWarning): 

2549 registry.queryDataIds(["detector"], datasets=re.compile("^nonexistent$"), collections=...) 

2550 

2551 # These queries yield no results due to problems that can be identified 

2552 # by cheap follow-up queries, yielding helpful diagnostics. 

2553 for query, snippets in [ 

2554 ( 

2555 # No records for one of the involved dimensions. 

2556 registry.queryDataIds(["subfilter"]), 

2557 ["dimension records", "subfilter"], 

2558 ), 

2559 ( 

2560 # No records for one of the involved dimensions. 

2561 registry.queryDimensionRecords("subfilter"), 

2562 ["dimension records", "subfilter"], 

2563 ), 

2564 ]: 

2565 self.assertFalse(query.any(execute=True, exact=False)) 

2566 self.assertFalse(query.any(execute=True, exact=True)) 

2567 self.assertEqual(query.count(exact=True), 0) 

2568 messages = list(query.explain_no_results()) 

2569 self.assertTrue(messages) 

2570 # Want all expected snippets to appear in at least one message. 

2571 self.assertTrue( 

2572 any( 

2573 all(snippet in message for snippet in snippets) for message in query.explain_no_results() 

2574 ), 

2575 messages, 

2576 ) 

2577 

2578 # This query yields four overlaps in the database, but one is filtered 

2579 # out in postprocessing. The count queries aren't accurate because 

2580 # they don't account for duplication that happens due to an internal 

2581 # join against commonSkyPix. 

2582 query3 = registry.queryDataIds(["visit", "tract"], instrument="Cam1", skymap="SkyMap1") 

2583 self.assertEqual( 

2584 { 

2585 DataCoordinate.standardize( 

2586 instrument="Cam1", 

2587 skymap="SkyMap1", 

2588 visit=v, 

2589 tract=t, 

2590 universe=registry.dimensions, 

2591 ) 

2592 for v, t in [(1, 0), (2, 0), (2, 1)] 

2593 }, 

2594 set(query3), 

2595 ) 

2596 self.assertTrue(query3.any(execute=False, exact=False)) 

2597 self.assertTrue(query3.any(execute=True, exact=False)) 

2598 self.assertTrue(query3.any(execute=True, exact=True)) 

2599 self.assertGreaterEqual(query3.count(exact=False), 4) 

2600 self.assertGreaterEqual(query3.count(exact=True), 3) 

2601 self.assertFalse(list(query3.explain_no_results())) 

2602 # This query yields overlaps in the database, but all are filtered 

2603 # out in postprocessing. The count queries again aren't very useful. 

2604 # We have to use `where=` here to avoid an optimization that 

2605 # (currently) skips the spatial postprocess-filtering because it 

2606 # recognizes that no spatial join is necessary. That's not ideal, but 

2607 # fixing it is out of scope for this ticket. 

2608 query4 = registry.queryDataIds( 

2609 ["visit", "tract"], 

2610 instrument="Cam1", 

2611 skymap="SkyMap1", 

2612 where="visit=1 AND detector=1 AND tract=0 AND patch=4", 

2613 ) 

2614 self.assertFalse(set(query4)) 

2615 self.assertTrue(query4.any(execute=False, exact=False)) 

2616 self.assertTrue(query4.any(execute=True, exact=False)) 

2617 self.assertFalse(query4.any(execute=True, exact=True)) 

2618 self.assertGreaterEqual(query4.count(exact=False), 1) 

2619 self.assertEqual(query4.count(exact=True), 0) 

2620 messages = list(query4.explain_no_results()) 

2621 self.assertTrue(messages) 

2622 self.assertTrue(any("regions did not overlap" in message for message in messages)) 

2623 

2624 # And there are cases when queries make empty results but we do not 

2625 # know how to explain that yet (could we just say miracles happen?) 

2626 query5 = registry.queryDimensionRecords( 

2627 "detector", where="detector.purpose = 'no-purpose'", instrument="Cam1" 

2628 ) 

2629 self.assertEqual(query5.count(exact=True), 0) 

2630 messages = list(query5.explain_no_results()) 

2631 self.assertFalse(messages) 

2632 # This query should yield results from one dataset type but not the 

2633 # other, which is not registered. 

2634 query5 = registry.queryDatasets(["bias", "nonexistent"], collections=["biases"]) 

2635 self.assertTrue(set(query5)) 

2636 self.assertTrue(query5.any(execute=False, exact=False)) 

2637 self.assertTrue(query5.any(execute=True, exact=False)) 

2638 self.assertTrue(query5.any(execute=True, exact=True)) 

2639 self.assertGreaterEqual(query5.count(exact=False), 1) 

2640 self.assertGreaterEqual(query5.count(exact=True), 1) 

2641 self.assertFalse(messages, list(query5.explain_no_results())) 

2642 

2643 def testQueryDataIdsOrderBy(self): 

2644 """Test order_by and limit on result returned by queryDataIds().""" 

2645 registry = self.makeRegistry() 

2646 self.loadData(registry, "base.yaml") 

2647 self.loadData(registry, "datasets.yaml") 

2648 self.loadData(registry, "spatial.yaml") 

2649 

2650 def do_query(dimensions=("visit", "tract"), datasets=None, collections=None): 

2651 return registry.queryDataIds( 

2652 dimensions, datasets=datasets, collections=collections, instrument="Cam1", skymap="SkyMap1" 

2653 ) 

2654 

2655 Test = namedtuple( 

2656 "testQueryDataIdsOrderByTest", 

2657 ("order_by", "keys", "result", "limit", "datasets", "collections"), 

2658 defaults=(None, None, None), 

2659 ) 

2660 

2661 test_data = ( 

2662 Test("tract,visit", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))), 

2663 Test("-tract,visit", "tract,visit", ((1, 2), (1, 2), (0, 1), (0, 1), (0, 2), (0, 2))), 

2664 Test("tract,-visit", "tract,visit", ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2))), 

2665 Test("-tract,-visit", "tract,visit", ((1, 2), (1, 2), (0, 2), (0, 2), (0, 1), (0, 1))), 

2666 Test( 

2667 "tract.id,visit.id", 

2668 "tract,visit", 

2669 ((0, 1), (0, 1), (0, 2)), 

2670 limit=(3,), 

2671 ), 

2672 Test("-tract,-visit", "tract,visit", ((1, 2), (1, 2), (0, 2)), limit=(3,)), 

2673 Test("tract,visit", "tract,visit", ((0, 2), (1, 2), (1, 2)), limit=(3, 3)), 

2674 Test("-tract,-visit", "tract,visit", ((0, 1),), limit=(3, 5)), 

2675 Test( 

2676 "tract,visit.exposure_time", "tract,visit", ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2)) 

2677 ), 

2678 Test( 

2679 "-tract,-visit.exposure_time", "tract,visit", ((1, 2), (1, 2), (0, 1), (0, 1), (0, 2), (0, 2)) 

2680 ), 

2681 Test("tract,-exposure_time", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))), 

2682 Test("tract,visit.name", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))), 

2683 Test( 

2684 "tract,-timespan.begin,timespan.end", 

2685 "tract,visit", 

2686 ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2)), 

2687 ), 

2688 Test("visit.day_obs,exposure.day_obs", "visit,exposure", ()), 

2689 Test("visit.timespan.begin,-exposure.timespan.begin", "visit,exposure", ()), 

2690 Test( 

2691 "tract,detector", 

2692 "tract,detector", 

2693 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)), 

2694 datasets="flat", 

2695 collections="imported_r", 

2696 ), 

2697 Test( 

2698 "tract,detector.full_name", 

2699 "tract,detector", 

2700 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)), 

2701 datasets="flat", 

2702 collections="imported_r", 

2703 ), 

2704 Test( 

2705 "tract,detector.raft,detector.name_in_raft", 

2706 "tract,detector", 

2707 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)), 

2708 datasets="flat", 

2709 collections="imported_r", 

2710 ), 

2711 ) 

2712 

2713 for test in test_data: 

2714 order_by = test.order_by.split(",") 

2715 keys = test.keys.split(",") 

2716 query = do_query(keys, test.datasets, test.collections).order_by(*order_by) 

2717 if test.limit is not None: 

2718 query = query.limit(*test.limit) 

2719 dataIds = tuple(tuple(dataId[k] for k in keys) for dataId in query) 

2720 self.assertEqual(dataIds, test.result) 

2721 

2722 # and materialize 

2723 query = do_query(keys).order_by(*order_by) 

2724 if test.limit is not None: 

2725 query = query.limit(*test.limit) 

2726 with query.materialize() as materialized: 

2727 dataIds = tuple(tuple(dataId[k] for k in keys) for dataId in materialized) 

2728 self.assertEqual(dataIds, test.result) 

2729 

2730 # errors in a name 

2731 for order_by in ("", "-"): 

2732 with self.assertRaisesRegex(ValueError, "Empty dimension name in ORDER BY"): 

2733 list(do_query().order_by(order_by)) 

2734 

2735 for order_by in ("undimension.name", "-undimension.name"): 

2736 with self.assertRaisesRegex(ValueError, "Unknown dimension element name 'undimension'"): 

2737 list(do_query().order_by(order_by)) 

2738 

2739 for order_by in ("attract", "-attract"): 

2740 with self.assertRaisesRegex(ValueError, "Metadata 'attract' cannot be found in any dimension"): 

2741 list(do_query().order_by(order_by)) 

2742 

2743 with self.assertRaisesRegex(ValueError, "Metadata 'exposure_time' exists in more than one dimension"): 

2744 list(do_query(("exposure", "visit")).order_by("exposure_time")) 

2745 

2746 with self.assertRaisesRegex(ValueError, "Timespan exists in more than one dimesion"): 

2747 list(do_query(("exposure", "visit")).order_by("timespan.begin")) 

2748 

2749 with self.assertRaisesRegex( 

2750 ValueError, "Cannot find any temporal dimension element for 'timespan.begin'" 

2751 ): 

2752 list(do_query(("tract")).order_by("timespan.begin")) 

2753 

2754 with self.assertRaisesRegex(ValueError, "Cannot use 'timespan.begin' with non-temporal element"): 

2755 list(do_query(("tract")).order_by("tract.timespan.begin")) 

2756 

2757 with self.assertRaisesRegex(ValueError, "Field 'name' does not exist in 'tract'."): 

2758 list(do_query(("tract")).order_by("tract.name")) 

2759 

2760 def testQueryDataIdsGovernorExceptions(self): 

2761 """Test exceptions raised by queryDataIds() for incorrect governors.""" 

2762 registry = self.makeRegistry() 

2763 self.loadData(registry, "base.yaml") 

2764 self.loadData(registry, "datasets.yaml") 

2765 self.loadData(registry, "spatial.yaml") 

2766 

2767 def do_query(dimensions, dataId=None, where=None, bind=None, **kwargs): 

2768 return registry.queryDataIds(dimensions, dataId=dataId, where=where, bind=bind, **kwargs) 

2769 

2770 Test = namedtuple( 

2771 "testQueryDataIdExceptionsTest", 

2772 ("dimensions", "dataId", "where", "bind", "kwargs", "exception", "count"), 

2773 defaults=(None, None, None, {}, None, 0), 

2774 ) 

2775 

2776 test_data = ( 

2777 Test("tract,visit", count=6), 

2778 Test("tract,visit", kwargs={"instrument": "Cam1", "skymap": "SkyMap1"}, count=6), 

2779 Test( 

2780 "tract,visit", kwargs={"instrument": "Cam2", "skymap": "SkyMap1"}, exception=DataIdValueError 

2781 ), 

2782 Test("tract,visit", dataId={"instrument": "Cam1", "skymap": "SkyMap1"}, count=6), 

2783 Test( 

2784 "tract,visit", dataId={"instrument": "Cam1", "skymap": "SkyMap2"}, exception=DataIdValueError 

2785 ), 

2786 Test("tract,visit", where="instrument='Cam1' AND skymap='SkyMap1'", count=6), 

2787 Test("tract,visit", where="instrument='Cam1' AND skymap='SkyMap5'", exception=DataIdValueError), 

2788 Test( 

2789 "tract,visit", 

2790 where="instrument=cam AND skymap=map", 

2791 bind={"cam": "Cam1", "map": "SkyMap1"}, 

2792 count=6, 

2793 ), 

2794 Test( 

2795 "tract,visit", 

2796 where="instrument=cam AND skymap=map", 

2797 bind={"cam": "Cam", "map": "SkyMap"}, 

2798 exception=DataIdValueError, 

2799 ), 

2800 ) 

2801 

2802 for test in test_data: 

2803 dimensions = test.dimensions.split(",") 

2804 if test.exception: 

2805 with self.assertRaises(test.exception): 

2806 do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs).count() 

2807 else: 

2808 query = do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs) 

2809 self.assertEqual(query.count(), test.count) 

2810 

2811 # and materialize 

2812 if test.exception: 

2813 with self.assertRaises(test.exception): 

2814 query = do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs) 

2815 with query.materialize() as materialized: 

2816 materialized.count() 

2817 else: 

2818 query = do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs) 

2819 with query.materialize() as materialized: 

2820 self.assertEqual(materialized.count(), test.count) 

2821 

2822 def testQueryDimensionRecordsOrderBy(self): 

2823 """Test order_by and limit on result returned by 

2824 queryDimensionRecords(). 

2825 """ 

2826 registry = self.makeRegistry() 

2827 self.loadData(registry, "base.yaml") 

2828 self.loadData(registry, "datasets.yaml") 

2829 self.loadData(registry, "spatial.yaml") 

2830 

2831 def do_query(element, datasets=None, collections=None): 

2832 return registry.queryDimensionRecords( 

2833 element, instrument="Cam1", datasets=datasets, collections=collections 

2834 ) 

2835 

2836 query = do_query("detector") 

2837 self.assertEqual(len(list(query)), 4) 

2838 

2839 Test = namedtuple( 

2840 "testQueryDataIdsOrderByTest", 

2841 ("element", "order_by", "result", "limit", "datasets", "collections"), 

2842 defaults=(None, None, None), 

2843 ) 

2844 

2845 test_data = ( 

2846 Test("detector", "detector", (1, 2, 3, 4)), 

2847 Test("detector", "-detector", (4, 3, 2, 1)), 

2848 Test("detector", "raft,-name_in_raft", (2, 1, 4, 3)), 

2849 Test("detector", "-detector.purpose", (4,), limit=(1,)), 

2850 Test("detector", "-purpose,detector.raft,name_in_raft", (2, 3), limit=(2, 2)), 

2851 Test("visit", "visit", (1, 2)), 

2852 Test("visit", "-visit.id", (2, 1)), 

2853 Test("visit", "zenith_angle", (1, 2)), 

2854 Test("visit", "-visit.name", (2, 1)), 

2855 Test("visit", "day_obs,-timespan.begin", (2, 1)), 

2856 ) 

2857 

2858 for test in test_data: 

2859 order_by = test.order_by.split(",") 

2860 query = do_query(test.element).order_by(*order_by) 

2861 if test.limit is not None: 

2862 query = query.limit(*test.limit) 

2863 dataIds = tuple(rec.id for rec in query) 

2864 self.assertEqual(dataIds, test.result) 

2865 

2866 # errors in a name 

2867 for order_by in ("", "-"): 

2868 with self.assertRaisesRegex(ValueError, "Empty dimension name in ORDER BY"): 

2869 list(do_query("detector").order_by(order_by)) 

2870 

2871 for order_by in ("undimension.name", "-undimension.name"): 

2872 with self.assertRaisesRegex(ValueError, "Element name mismatch: 'undimension'"): 

2873 list(do_query("detector").order_by(order_by)) 

2874 

2875 for order_by in ("attract", "-attract"): 

2876 with self.assertRaisesRegex(ValueError, "Field 'attract' does not exist in 'detector'."): 

2877 list(do_query("detector").order_by(order_by)) 

2878 

2879 def testQueryDimensionRecordsExceptions(self): 

2880 """Test exceptions raised by queryDimensionRecords().""" 

2881 registry = self.makeRegistry() 

2882 self.loadData(registry, "base.yaml") 

2883 self.loadData(registry, "datasets.yaml") 

2884 self.loadData(registry, "spatial.yaml") 

2885 

2886 result = registry.queryDimensionRecords("detector") 

2887 self.assertEqual(result.count(), 4) 

2888 result = registry.queryDimensionRecords("detector", instrument="Cam1") 

2889 self.assertEqual(result.count(), 4) 

2890 result = registry.queryDimensionRecords("detector", dataId={"instrument": "Cam1"}) 

2891 self.assertEqual(result.count(), 4) 

2892 result = registry.queryDimensionRecords("detector", where="instrument='Cam1'") 

2893 self.assertEqual(result.count(), 4) 

2894 result = registry.queryDimensionRecords("detector", where="instrument=instr", bind={"instr": "Cam1"}) 

2895 self.assertEqual(result.count(), 4) 

2896 

2897 with self.assertRaisesRegex(DataIdValueError, "dimension instrument"): 

2898 result = registry.queryDimensionRecords("detector", instrument="NotCam1") 

2899 result.count() 

2900 

2901 with self.assertRaisesRegex(DataIdValueError, "dimension instrument"): 

2902 result = registry.queryDimensionRecords("detector", dataId={"instrument": "NotCam1"}) 

2903 result.count() 

2904 

2905 with self.assertRaisesRegex(DataIdValueError, "Unknown values specified for governor dimension"): 

2906 result = registry.queryDimensionRecords("detector", where="instrument='NotCam1'") 

2907 result.count() 

2908 

2909 with self.assertRaisesRegex(DataIdValueError, "Unknown values specified for governor dimension"): 

2910 result = registry.queryDimensionRecords( 

2911 "detector", where="instrument=instr", bind={"instr": "NotCam1"} 

2912 ) 

2913 result.count() 

2914 

2915 def testDatasetConstrainedDimensionRecordQueries(self): 

2916 """Test that queryDimensionRecords works even when given a dataset 

2917 constraint whose dimensions extend beyond the requested dimension 

2918 element's. 

2919 """ 

2920 registry = self.makeRegistry() 

2921 self.loadData(registry, "base.yaml") 

2922 self.loadData(registry, "datasets.yaml") 

2923 # Query for physical_filter dimension records, using a dataset that 

2924 # has both physical_filter and dataset dimensions. 

2925 records = registry.queryDimensionRecords( 

2926 "physical_filter", 

2927 datasets=["flat"], 

2928 collections="imported_r", 

2929 ) 

2930 self.assertEqual({record.name for record in records}, {"Cam1-R1", "Cam1-R2"}) 

2931 # Trying to constrain by all dataset types is an error. 

2932 with self.assertRaises(TypeError): 

2933 list(registry.queryDimensionRecords("physical_filter", datasets=..., collections="imported_r")) 

2934 

2935 def testSkyPixDatasetQueries(self): 

2936 """Test that we can build queries involving skypix dimensions as long 

2937 as a dataset type that uses those dimensions is included. 

2938 """ 

2939 registry = self.makeRegistry() 

2940 self.loadData(registry, "base.yaml") 

2941 dataset_type = DatasetType( 

2942 "a", dimensions=["htm7", "instrument"], universe=registry.dimensions, storageClass="int" 

2943 ) 

2944 registry.registerDatasetType(dataset_type) 

2945 run = "r" 

2946 registry.registerRun(run) 

2947 # First try queries where there are no datasets; the concern is whether 

2948 # we can even build and execute these queries without raising, even 

2949 # when "doomed" query shortcuts are in play. 

2950 self.assertFalse( 

2951 list(registry.queryDataIds(["htm7", "instrument"], datasets=dataset_type, collections=run)) 

2952 ) 

2953 self.assertFalse(list(registry.queryDatasets(dataset_type, collections=run))) 

2954 # Now add a dataset and see that we can get it back. 

2955 htm7 = registry.dimensions.skypix["htm"][7].pixelization 

2956 data_id = registry.expandDataId(instrument="Cam1", htm7=htm7.universe()[0][0]) 

2957 (ref,) = registry.insertDatasets(dataset_type, [data_id], run=run) 

2958 self.assertEqual( 

2959 set(registry.queryDataIds(["htm7", "instrument"], datasets=dataset_type, collections=run)), 

2960 {data_id}, 

2961 ) 

2962 self.assertEqual(set(registry.queryDatasets(dataset_type, collections=run)), {ref}) 

2963 

2964 def testDatasetIdFactory(self): 

2965 """Simple test for DatasetIdFactory, mostly to catch potential changes 

2966 in its API. 

2967 """ 

2968 registry = self.makeRegistry() 

2969 factory = registry.datasetIdFactory 

2970 dataset_type = DatasetType( 

2971 "datasetType", 

2972 dimensions=["detector", "instrument"], 

2973 universe=registry.dimensions, 

2974 storageClass="int", 

2975 ) 

2976 run = "run" 

2977 data_id = DataCoordinate.standardize(instrument="Cam1", detector=1, graph=dataset_type.dimensions) 

2978 

2979 datasetId = factory.makeDatasetId(run, dataset_type, data_id, DatasetIdGenEnum.UNIQUE) 

2980 self.assertIsInstance(datasetId, uuid.UUID) 

2981 self.assertEqual(datasetId.version, 4) 

2982 

2983 datasetId = factory.makeDatasetId(run, dataset_type, data_id, DatasetIdGenEnum.DATAID_TYPE) 

2984 self.assertIsInstance(datasetId, uuid.UUID) 

2985 self.assertEqual(datasetId.version, 5) 

2986 

2987 datasetId = factory.makeDatasetId(run, dataset_type, data_id, DatasetIdGenEnum.DATAID_TYPE_RUN) 

2988 self.assertIsInstance(datasetId, uuid.UUID) 

2989 self.assertEqual(datasetId.version, 5)