Coverage for python/lsst/daf/butler/registry/tests/_registry.py: 4%

1320 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2022-12-15 02:03 -0800

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = ["RegistryTests"] 

24 

25import itertools 

26import logging 

27import os 

28import re 

29import unittest 

30import uuid 

31from abc import ABC, abstractmethod 

32from collections import defaultdict, namedtuple 

33from datetime import datetime, timedelta 

34from typing import TYPE_CHECKING, Iterator, Optional, Type, Union 

35 

36import astropy.time 

37import sqlalchemy 

38 

39try: 

40 import numpy as np 

41except ImportError: 

42 np = None 

43 

44import lsst.sphgeom 

45 

46from ...core import ( 

47 DataCoordinate, 

48 DataCoordinateSet, 

49 DatasetAssociation, 

50 DatasetRef, 

51 DatasetType, 

52 DimensionGraph, 

53 NamedValueSet, 

54 StorageClass, 

55 Timespan, 

56 ddl, 

57) 

58from .._collection_summary import CollectionSummary 

59from .._collectionType import CollectionType 

60from .._config import RegistryConfig 

61from .._exceptions import ( 

62 ArgumentError, 

63 CollectionError, 

64 CollectionTypeError, 

65 ConflictingDefinitionError, 

66 DataIdValueError, 

67 DatasetTypeError, 

68 InconsistentDataIdError, 

69 MissingCollectionError, 

70 MissingDatasetTypeError, 

71 OrphanedRecordError, 

72) 

73from ..interfaces import ButlerAttributeExistsError, DatasetIdGenEnum 

74 

75if TYPE_CHECKING: 75 ↛ 76line 75 didn't jump to line 76, because the condition on line 75 was never true

76 from .._registry import Registry 

77 

78 

79class RegistryTests(ABC): 

80 """Generic tests for the `Registry` class that can be subclassed to 

81 generate tests for different configurations. 

82 """ 

83 

84 collectionsManager: Optional[str] = None 

85 """Name of the collections manager class, if subclass provides value for 

86 this member then it overrides name specified in default configuration 

87 (`str`). 

88 """ 

89 

90 datasetsManager: Optional[str] = None 

91 """Name of the datasets manager class, if subclass provides value for 

92 this member then it overrides name specified in default configuration 

93 (`str`). 

94 """ 

95 

96 @classmethod 

97 @abstractmethod 

98 def getDataDir(cls) -> str: 

99 """Return the root directory containing test data YAML files.""" 

100 raise NotImplementedError() 

101 

102 def makeRegistryConfig(self) -> RegistryConfig: 

103 """Create RegistryConfig used to create a registry. 

104 

105 This method should be called by a subclass from `makeRegistry`. 

106 Returned instance will be pre-configured based on the values of class 

107 members, and default-configured for all other parameters. Subclasses 

108 that need default configuration should just instantiate 

109 `RegistryConfig` directly. 

110 """ 

111 config = RegistryConfig() 

112 if self.collectionsManager: 

113 config["managers", "collections"] = self.collectionsManager 

114 if self.datasetsManager: 

115 config["managers", "datasets"] = self.datasetsManager 

116 return config 

117 

118 @abstractmethod 

119 def makeRegistry(self, share_repo_with: Optional[Registry] = None) -> Optional[Registry]: 

120 """Return the Registry instance to be tested. 

121 

122 Parameters 

123 ---------- 

124 share_repo_with : `Registry`, optional 

125 If provided, the new registry should point to the same data 

126 repository as this existing registry. 

127 

128 Returns 

129 ------- 

130 registry : `Registry` 

131 New `Registry` instance, or `None` *only* if `share_repo_with` is 

132 not `None` and this test case does not support that argument 

133 (e.g. it is impossible with in-memory SQLite DBs). 

134 """ 

135 raise NotImplementedError() 

136 

137 def loadData(self, registry: Registry, filename: str): 

138 """Load registry test data from ``getDataDir/<filename>``, 

139 which should be a YAML import/export file. 

140 """ 

141 from ...transfers import YamlRepoImportBackend 

142 

143 with open(os.path.join(self.getDataDir(), filename), "r") as stream: 

144 backend = YamlRepoImportBackend(stream, registry) 

145 backend.register() 

146 backend.load(datastore=None) 

147 

148 def checkQueryResults(self, results, expected): 

149 """Check that a query results object contains expected values. 

150 

151 Parameters 

152 ---------- 

153 results : `DataCoordinateQueryResults` or `DatasetQueryResults` 

154 A lazy-evaluation query results object. 

155 expected : `list` 

156 A list of `DataCoordinate` o `DatasetRef` objects that should be 

157 equal to results of the query, aside from ordering. 

158 """ 

159 self.assertCountEqual(list(results), expected) 

160 self.assertEqual(results.count(), len(expected)) 

161 if expected: 

162 self.assertTrue(results.any()) 

163 else: 

164 self.assertFalse(results.any()) 

165 

166 def testOpaque(self): 

167 """Tests for `Registry.registerOpaqueTable`, 

168 `Registry.insertOpaqueData`, `Registry.fetchOpaqueData`, and 

169 `Registry.deleteOpaqueData`. 

170 """ 

171 registry = self.makeRegistry() 

172 table = "opaque_table_for_testing" 

173 registry.registerOpaqueTable( 

174 table, 

175 spec=ddl.TableSpec( 

176 fields=[ 

177 ddl.FieldSpec("id", dtype=sqlalchemy.BigInteger, primaryKey=True), 

178 ddl.FieldSpec("name", dtype=sqlalchemy.String, length=16, nullable=False), 

179 ddl.FieldSpec("count", dtype=sqlalchemy.SmallInteger, nullable=True), 

180 ], 

181 ), 

182 ) 

183 rows = [ 

184 {"id": 1, "name": "one", "count": None}, 

185 {"id": 2, "name": "two", "count": 5}, 

186 {"id": 3, "name": "three", "count": 6}, 

187 ] 

188 registry.insertOpaqueData(table, *rows) 

189 self.assertCountEqual(rows, list(registry.fetchOpaqueData(table))) 

190 self.assertEqual(rows[0:1], list(registry.fetchOpaqueData(table, id=1))) 

191 self.assertEqual(rows[1:2], list(registry.fetchOpaqueData(table, name="two"))) 

192 self.assertEqual(rows[0:1], list(registry.fetchOpaqueData(table, id=(1, 3), name=("one", "two")))) 

193 self.assertEqual(rows, list(registry.fetchOpaqueData(table, id=(1, 2, 3)))) 

194 # Test very long IN clause which exceeds sqlite limit on number of 

195 # parameters. SQLite says the limit is 32k but it looks like it is 

196 # much higher. 

197 self.assertEqual(rows, list(registry.fetchOpaqueData(table, id=list(range(300_000))))) 

198 # Two IN clauses, each longer than 1k batch size, first with 

199 # duplicates, second has matching elements in different batches (after 

200 # sorting). 

201 self.assertEqual( 

202 rows[0:2], 

203 list( 

204 registry.fetchOpaqueData( 

205 table, 

206 id=list(range(1000)) + list(range(100, 0, -1)), 

207 name=["one"] + [f"q{i}" for i in range(2200)] + ["two"], 

208 ) 

209 ), 

210 ) 

211 self.assertEqual([], list(registry.fetchOpaqueData(table, id=1, name="two"))) 

212 registry.deleteOpaqueData(table, id=3) 

213 self.assertCountEqual(rows[:2], list(registry.fetchOpaqueData(table))) 

214 registry.deleteOpaqueData(table) 

215 self.assertEqual([], list(registry.fetchOpaqueData(table))) 

216 

217 def testDatasetType(self): 

218 """Tests for `Registry.registerDatasetType` and 

219 `Registry.getDatasetType`. 

220 """ 

221 registry = self.makeRegistry() 

222 # Check valid insert 

223 datasetTypeName = "test" 

224 storageClass = StorageClass("testDatasetType") 

225 registry.storageClasses.registerStorageClass(storageClass) 

226 dimensions = registry.dimensions.extract(("instrument", "visit")) 

227 differentDimensions = registry.dimensions.extract(("instrument", "patch")) 

228 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

229 # Inserting for the first time should return True 

230 self.assertTrue(registry.registerDatasetType(inDatasetType)) 

231 outDatasetType1 = registry.getDatasetType(datasetTypeName) 

232 self.assertEqual(outDatasetType1, inDatasetType) 

233 

234 # Re-inserting should work 

235 self.assertFalse(registry.registerDatasetType(inDatasetType)) 

236 # Except when they are not identical 

237 with self.assertRaises(ConflictingDefinitionError): 

238 nonIdenticalDatasetType = DatasetType(datasetTypeName, differentDimensions, storageClass) 

239 registry.registerDatasetType(nonIdenticalDatasetType) 

240 

241 # Template can be None 

242 datasetTypeName = "testNoneTemplate" 

243 storageClass = StorageClass("testDatasetType2") 

244 registry.storageClasses.registerStorageClass(storageClass) 

245 dimensions = registry.dimensions.extract(("instrument", "visit")) 

246 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

247 registry.registerDatasetType(inDatasetType) 

248 outDatasetType2 = registry.getDatasetType(datasetTypeName) 

249 self.assertEqual(outDatasetType2, inDatasetType) 

250 

251 allTypes = set(registry.queryDatasetTypes()) 

252 self.assertEqual(allTypes, {outDatasetType1, outDatasetType2}) 

253 

254 def testDimensions(self): 

255 """Tests for `Registry.insertDimensionData`, 

256 `Registry.syncDimensionData`, and `Registry.expandDataId`. 

257 """ 

258 registry = self.makeRegistry() 

259 dimensionName = "instrument" 

260 dimension = registry.dimensions[dimensionName] 

261 dimensionValue = { 

262 "name": "DummyCam", 

263 "visit_max": 10, 

264 "visit_system": 0, 

265 "exposure_max": 10, 

266 "detector_max": 2, 

267 "class_name": "lsst.pipe.base.Instrument", 

268 } 

269 registry.insertDimensionData(dimensionName, dimensionValue) 

270 # Inserting the same value twice should fail 

271 with self.assertRaises(sqlalchemy.exc.IntegrityError): 

272 registry.insertDimensionData(dimensionName, dimensionValue) 

273 # expandDataId should retrieve the record we just inserted 

274 self.assertEqual( 

275 registry.expandDataId(instrument="DummyCam", graph=dimension.graph) 

276 .records[dimensionName] 

277 .toDict(), 

278 dimensionValue, 

279 ) 

280 # expandDataId should raise if there is no record with the given ID. 

281 with self.assertRaises(DataIdValueError): 

282 registry.expandDataId({"instrument": "Unknown"}, graph=dimension.graph) 

283 # band doesn't have a table; insert should fail. 

284 with self.assertRaises(TypeError): 

285 registry.insertDimensionData("band", {"band": "i"}) 

286 dimensionName2 = "physical_filter" 

287 dimension2 = registry.dimensions[dimensionName2] 

288 dimensionValue2 = {"name": "DummyCam_i", "band": "i"} 

289 # Missing required dependency ("instrument") should fail 

290 with self.assertRaises(KeyError): 

291 registry.insertDimensionData(dimensionName2, dimensionValue2) 

292 # Adding required dependency should fix the failure 

293 dimensionValue2["instrument"] = "DummyCam" 

294 registry.insertDimensionData(dimensionName2, dimensionValue2) 

295 # expandDataId should retrieve the record we just inserted. 

296 self.assertEqual( 

297 registry.expandDataId(instrument="DummyCam", physical_filter="DummyCam_i", graph=dimension2.graph) 

298 .records[dimensionName2] 

299 .toDict(), 

300 dimensionValue2, 

301 ) 

302 # Use syncDimensionData to insert a new record successfully. 

303 dimensionName3 = "detector" 

304 dimensionValue3 = { 

305 "instrument": "DummyCam", 

306 "id": 1, 

307 "full_name": "one", 

308 "name_in_raft": "zero", 

309 "purpose": "SCIENCE", 

310 } 

311 self.assertTrue(registry.syncDimensionData(dimensionName3, dimensionValue3)) 

312 # Sync that again. Note that one field ("raft") is NULL, and that 

313 # should be okay. 

314 self.assertFalse(registry.syncDimensionData(dimensionName3, dimensionValue3)) 

315 # Now try that sync with the same primary key but a different value. 

316 # This should fail. 

317 with self.assertRaises(ConflictingDefinitionError): 

318 registry.syncDimensionData( 

319 dimensionName3, 

320 { 

321 "instrument": "DummyCam", 

322 "id": 1, 

323 "full_name": "one", 

324 "name_in_raft": "four", 

325 "purpose": "SCIENCE", 

326 }, 

327 ) 

328 

329 @unittest.skipIf(np is None, "numpy not available.") 

330 def testNumpyDataId(self): 

331 """Test that we can use a numpy int in a dataId.""" 

332 registry = self.makeRegistry() 

333 dimensionEntries = [ 

334 ("instrument", {"instrument": "DummyCam"}), 

335 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}), 

336 # Using an np.int64 here fails unless Records.fromDict is also 

337 # patched to look for numbers.Integral 

338 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}), 

339 ] 

340 for args in dimensionEntries: 

341 registry.insertDimensionData(*args) 

342 

343 # Try a normal integer and something that looks like an int but 

344 # is not. 

345 for visit_id in (42, np.int64(42)): 

346 with self.subTest(visit_id=visit_id, id_type=type(visit_id).__name__): 

347 expanded = registry.expandDataId({"instrument": "DummyCam", "visit": visit_id}) 

348 self.assertEqual(expanded["visit"], int(visit_id)) 

349 self.assertIsInstance(expanded["visit"], int) 

350 

351 def testDataIdRelationships(self): 

352 """Test that `Registry.expandDataId` raises an exception when the given 

353 keys are inconsistent. 

354 """ 

355 registry = self.makeRegistry() 

356 self.loadData(registry, "base.yaml") 

357 # Insert a few more dimension records for the next test. 

358 registry.insertDimensionData( 

359 "exposure", 

360 {"instrument": "Cam1", "id": 1, "obs_id": "one", "physical_filter": "Cam1-G"}, 

361 ) 

362 registry.insertDimensionData( 

363 "exposure", 

364 {"instrument": "Cam1", "id": 2, "obs_id": "two", "physical_filter": "Cam1-G"}, 

365 ) 

366 registry.insertDimensionData( 

367 "visit_system", 

368 {"instrument": "Cam1", "id": 0, "name": "one-to-one"}, 

369 ) 

370 registry.insertDimensionData( 

371 "visit", 

372 {"instrument": "Cam1", "id": 1, "name": "one", "physical_filter": "Cam1-G", "visit_system": 0}, 

373 ) 

374 registry.insertDimensionData( 

375 "visit_definition", 

376 {"instrument": "Cam1", "visit": 1, "exposure": 1, "visit_system": 0}, 

377 ) 

378 with self.assertRaises(InconsistentDataIdError): 

379 registry.expandDataId( 

380 {"instrument": "Cam1", "visit": 1, "exposure": 2}, 

381 ) 

382 

383 def testDataset(self): 

384 """Basic tests for `Registry.insertDatasets`, `Registry.getDataset`, 

385 and `Registry.removeDatasets`. 

386 """ 

387 registry = self.makeRegistry() 

388 self.loadData(registry, "base.yaml") 

389 run = "tésτ" 

390 registry.registerRun(run) 

391 datasetType = registry.getDatasetType("bias") 

392 dataId = {"instrument": "Cam1", "detector": 2} 

393 (ref,) = registry.insertDatasets(datasetType, dataIds=[dataId], run=run) 

394 outRef = registry.getDataset(ref.id) 

395 self.assertIsNotNone(ref.id) 

396 self.assertEqual(ref, outRef) 

397 with self.assertRaises(ConflictingDefinitionError): 

398 registry.insertDatasets(datasetType, dataIds=[dataId], run=run) 

399 registry.removeDatasets([ref]) 

400 self.assertIsNone(registry.findDataset(datasetType, dataId, collections=[run])) 

401 

402 def testFindDataset(self): 

403 """Tests for `Registry.findDataset`.""" 

404 registry = self.makeRegistry() 

405 self.loadData(registry, "base.yaml") 

406 run = "tésτ" 

407 datasetType = registry.getDatasetType("bias") 

408 dataId = {"instrument": "Cam1", "detector": 4} 

409 registry.registerRun(run) 

410 (inputRef,) = registry.insertDatasets(datasetType, dataIds=[dataId], run=run) 

411 outputRef = registry.findDataset(datasetType, dataId, collections=[run]) 

412 self.assertEqual(outputRef, inputRef) 

413 # Check that retrieval with invalid dataId raises 

414 with self.assertRaises(LookupError): 

415 dataId = {"instrument": "Cam1"} # no detector 

416 registry.findDataset(datasetType, dataId, collections=run) 

417 # Check that different dataIds match to different datasets 

418 dataId1 = {"instrument": "Cam1", "detector": 1} 

419 (inputRef1,) = registry.insertDatasets(datasetType, dataIds=[dataId1], run=run) 

420 dataId2 = {"instrument": "Cam1", "detector": 2} 

421 (inputRef2,) = registry.insertDatasets(datasetType, dataIds=[dataId2], run=run) 

422 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef1) 

423 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef2) 

424 self.assertNotEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef2) 

425 self.assertNotEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef1) 

426 # Check that requesting a non-existing dataId returns None 

427 nonExistingDataId = {"instrument": "Cam1", "detector": 3} 

428 self.assertIsNone(registry.findDataset(datasetType, nonExistingDataId, collections=run)) 

429 

430 def testRemoveDatasetTypeSuccess(self): 

431 """Test that Registry.removeDatasetType works when there are no 

432 datasets of that type present. 

433 """ 

434 registry = self.makeRegistry() 

435 self.loadData(registry, "base.yaml") 

436 registry.removeDatasetType("flat") 

437 with self.assertRaises(MissingDatasetTypeError): 

438 registry.getDatasetType("flat") 

439 

440 def testRemoveDatasetTypeFailure(self): 

441 """Test that Registry.removeDatasetType raises when there are datasets 

442 of that type present or if the dataset type is for a component. 

443 """ 

444 registry = self.makeRegistry() 

445 self.loadData(registry, "base.yaml") 

446 self.loadData(registry, "datasets.yaml") 

447 with self.assertRaises(OrphanedRecordError): 

448 registry.removeDatasetType("flat") 

449 with self.assertRaises(ValueError): 

450 registry.removeDatasetType(DatasetType.nameWithComponent("flat", "image")) 

451 

452 def testImportDatasetsUUID(self): 

453 """Test for `Registry._importDatasets` with UUID dataset ID.""" 

454 if not self.datasetsManager.endswith(".ByDimensionsDatasetRecordStorageManagerUUID"): 

455 self.skipTest(f"Unexpected dataset manager {self.datasetsManager}") 

456 

457 registry = self.makeRegistry() 

458 self.loadData(registry, "base.yaml") 

459 for run in range(6): 

460 registry.registerRun(f"run{run}") 

461 datasetTypeBias = registry.getDatasetType("bias") 

462 datasetTypeFlat = registry.getDatasetType("flat") 

463 dataIdBias1 = {"instrument": "Cam1", "detector": 1} 

464 dataIdBias2 = {"instrument": "Cam1", "detector": 2} 

465 dataIdFlat1 = {"instrument": "Cam1", "detector": 1, "physical_filter": "Cam1-G", "band": "g"} 

466 

467 dataset_id = uuid.uuid4() 

468 ref = DatasetRef(datasetTypeBias, dataIdBias1, id=dataset_id, run="run0") 

469 (ref1,) = registry._importDatasets([ref]) 

470 # UUID is used without change 

471 self.assertEqual(ref.id, ref1.id) 

472 

473 # All different failure modes 

474 refs = ( 

475 # Importing same DatasetRef with different dataset ID is an error 

476 DatasetRef(datasetTypeBias, dataIdBias1, id=uuid.uuid4(), run="run0"), 

477 # Same DatasetId but different DataId 

478 DatasetRef(datasetTypeBias, dataIdBias2, id=ref1.id, run="run0"), 

479 DatasetRef(datasetTypeFlat, dataIdFlat1, id=ref1.id, run="run0"), 

480 # Same DatasetRef and DatasetId but different run 

481 DatasetRef(datasetTypeBias, dataIdBias1, id=ref1.id, run="run1"), 

482 ) 

483 for ref in refs: 

484 with self.assertRaises(ConflictingDefinitionError): 

485 registry._importDatasets([ref]) 

486 

487 # Test for non-unique IDs, they can be re-imported multiple times. 

488 for run, idGenMode in ((2, DatasetIdGenEnum.DATAID_TYPE), (4, DatasetIdGenEnum.DATAID_TYPE_RUN)): 

489 with self.subTest(idGenMode=idGenMode): 

490 

491 # Use integer dataset ID to force UUID calculation in _import 

492 ref = DatasetRef(datasetTypeBias, dataIdBias1, id=0, run=f"run{run}") 

493 (ref1,) = registry._importDatasets([ref], idGenerationMode=idGenMode) 

494 self.assertIsInstance(ref1.id, uuid.UUID) 

495 self.assertEqual(ref1.id.version, 5) 

496 

497 # Importing it again is OK 

498 (ref2,) = registry._importDatasets([ref1]) 

499 self.assertEqual(ref2.id, ref1.id) 

500 

501 # Cannot import to different run with the same ID 

502 ref = DatasetRef(datasetTypeBias, dataIdBias1, id=ref1.id, run=f"run{run+1}") 

503 with self.assertRaises(ConflictingDefinitionError): 

504 registry._importDatasets([ref]) 

505 

506 ref = DatasetRef(datasetTypeBias, dataIdBias1, id=0, run=f"run{run+1}") 

507 if idGenMode is DatasetIdGenEnum.DATAID_TYPE: 

508 # Cannot import same DATAID_TYPE ref into a new run 

509 with self.assertRaises(ConflictingDefinitionError): 

510 (ref2,) = registry._importDatasets([ref], idGenerationMode=idGenMode) 

511 else: 

512 # DATAID_TYPE_RUN ref can be imported into a new run 

513 (ref2,) = registry._importDatasets([ref], idGenerationMode=idGenMode) 

514 

515 def testDatasetTypeComponentQueries(self): 

516 """Test component options when querying for dataset types. 

517 

518 All of the behavior here is deprecated, so many of these tests are 

519 currently wrapped in a context to check that we get a warning whenever 

520 a component dataset is actually returned. 

521 """ 

522 registry = self.makeRegistry() 

523 self.loadData(registry, "base.yaml") 

524 self.loadData(registry, "datasets.yaml") 

525 # Test querying for dataset types with different inputs. 

526 # First query for all dataset types; components should only be included 

527 # when components=True. 

528 self.assertEqual({"bias", "flat"}, NamedValueSet(registry.queryDatasetTypes()).names) 

529 self.assertEqual({"bias", "flat"}, NamedValueSet(registry.queryDatasetTypes(components=False)).names) 

530 with self.assertWarns(FutureWarning): 

531 self.assertLess( 

532 {"bias", "flat", "bias.wcs", "flat.photoCalib"}, 

533 NamedValueSet(registry.queryDatasetTypes(components=True)).names, 

534 ) 

535 # Use a pattern that can match either parent or components. Again, 

536 # components are only returned if components=True. 

537 self.assertEqual({"bias"}, NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"))).names) 

538 self.assertEqual( 

539 {"bias"}, NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"), components=False)).names 

540 ) 

541 with self.assertWarns(FutureWarning): 

542 self.assertLess( 

543 {"bias", "bias.wcs"}, 

544 NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"), components=True)).names, 

545 ) 

546 # This pattern matches only a component. In this case we also return 

547 # that component dataset type if components=None. 

548 with self.assertWarns(FutureWarning): 

549 self.assertEqual( 

550 {"bias.wcs"}, NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"))).names 

551 ) 

552 self.assertEqual( 

553 set(), 

554 NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"), components=False)).names, 

555 ) 

556 with self.assertWarns(FutureWarning): 

557 self.assertEqual( 

558 {"bias.wcs"}, 

559 NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"), components=True)).names, 

560 ) 

561 # Add a dataset type using a StorageClass that we'll then remove; check 

562 # that this does not affect our ability to query for dataset types 

563 # (though it will warn). 

564 tempStorageClass = StorageClass( 

565 name="TempStorageClass", 

566 components={ 

567 "data1": registry.storageClasses.getStorageClass("StructuredDataDict"), 

568 "data2": registry.storageClasses.getStorageClass("StructuredDataDict"), 

569 }, 

570 ) 

571 registry.storageClasses.registerStorageClass(tempStorageClass) 

572 datasetType = DatasetType( 

573 "temporary", 

574 dimensions=["instrument"], 

575 storageClass=tempStorageClass, 

576 universe=registry.dimensions, 

577 ) 

578 registry.registerDatasetType(datasetType) 

579 registry.storageClasses._unregisterStorageClass(tempStorageClass.name) 

580 datasetType._storageClass = None 

581 del tempStorageClass 

582 # Querying for all dataset types, including components, should include 

583 # at least all non-component dataset types (and I don't want to 

584 # enumerate all of the Exposure components for bias and flat here). 

585 with self.assertWarns(FutureWarning): 

586 with self.assertLogs("lsst.daf.butler.registry", logging.WARN) as cm: 

587 everything = NamedValueSet(registry.queryDatasetTypes(components=True)) 

588 self.assertIn("TempStorageClass", cm.output[0]) 

589 self.assertLess({"bias", "flat", "temporary"}, everything.names) 

590 # It should not include "temporary.columns", because we tried to remove 

591 # the storage class that would tell it about that. So if the next line 

592 # fails (i.e. "temporary.columns" _is_ in everything.names), it means 

593 # this part of the test isn't doing anything, because the _unregister 

594 # call about isn't simulating the real-life case we want it to 

595 # simulate, in which different versions of daf_butler in entirely 

596 # different Python processes interact with the same repo. 

597 self.assertNotIn("temporary.data", everything.names) 

598 # Query for dataset types that start with "temp". This should again 

599 # not include the component, and also not fail. 

600 with self.assertLogs("lsst.daf.butler.registry", logging.WARN) as cm: 

601 startsWithTemp = NamedValueSet(registry.queryDatasetTypes(re.compile("temp.*"), components=True)) 

602 self.assertIn("TempStorageClass", cm.output[0]) 

603 self.assertEqual({"temporary"}, startsWithTemp.names) 

604 # Querying with no components should not warn at all. 

605 with self.assertLogs("lsst.daf.butler.registries", logging.WARN) as cm: 

606 startsWithTemp = NamedValueSet(registry.queryDatasetTypes(re.compile("temp.*"), components=False)) 

607 # Must issue a warning of our own to be captured. 

608 logging.getLogger("lsst.daf.butler.registries").warning("test message") 

609 self.assertEqual(len(cm.output), 1) 

610 self.assertIn("test message", cm.output[0]) 

611 

612 def testComponentLookups(self): 

613 """Test searching for component datasets via their parents. 

614 

615 All of the behavior here is deprecated, so many of these tests are 

616 currently wrapped in a context to check that we get a warning whenever 

617 a component dataset is actually returned. 

618 """ 

619 registry = self.makeRegistry() 

620 self.loadData(registry, "base.yaml") 

621 self.loadData(registry, "datasets.yaml") 

622 # Test getting the child dataset type (which does still exist in the 

623 # Registry), and check for consistency with 

624 # DatasetRef.makeComponentRef. 

625 collection = "imported_g" 

626 parentType = registry.getDatasetType("bias") 

627 childType = registry.getDatasetType("bias.wcs") 

628 parentRefResolved = registry.findDataset( 

629 parentType, collections=collection, instrument="Cam1", detector=1 

630 ) 

631 self.assertIsInstance(parentRefResolved, DatasetRef) 

632 self.assertEqual(childType, parentRefResolved.makeComponentRef("wcs").datasetType) 

633 # Search for a single dataset with findDataset. 

634 childRef1 = registry.findDataset("bias.wcs", collections=collection, dataId=parentRefResolved.dataId) 

635 self.assertEqual(childRef1, parentRefResolved.makeComponentRef("wcs")) 

636 # Search for detector data IDs constrained by component dataset 

637 # existence with queryDataIds. 

638 with self.assertWarns(FutureWarning): 

639 dataIds = registry.queryDataIds( 

640 ["detector"], 

641 datasets=["bias.wcs"], 

642 collections=collection, 

643 ).toSet() 

644 self.assertEqual( 

645 dataIds, 

646 DataCoordinateSet( 

647 { 

648 DataCoordinate.standardize(instrument="Cam1", detector=d, graph=parentType.dimensions) 

649 for d in (1, 2, 3) 

650 }, 

651 parentType.dimensions, 

652 ), 

653 ) 

654 # Search for multiple datasets of a single type with queryDatasets. 

655 with self.assertWarns(FutureWarning): 

656 childRefs2 = set( 

657 registry.queryDatasets( 

658 "bias.wcs", 

659 collections=collection, 

660 ) 

661 ) 

662 self.assertEqual( 

663 {ref.unresolved() for ref in childRefs2}, {DatasetRef(childType, dataId) for dataId in dataIds} 

664 ) 

665 

666 def testCollections(self): 

667 """Tests for registry methods that manage collections.""" 

668 registry = self.makeRegistry() 

669 other_registry = self.makeRegistry(share_repo_with=registry) 

670 self.loadData(registry, "base.yaml") 

671 self.loadData(registry, "datasets.yaml") 

672 run1 = "imported_g" 

673 run2 = "imported_r" 

674 # Test setting a collection docstring after it has been created. 

675 registry.setCollectionDocumentation(run1, "doc for run1") 

676 self.assertEqual(registry.getCollectionDocumentation(run1), "doc for run1") 

677 registry.setCollectionDocumentation(run1, None) 

678 self.assertIsNone(registry.getCollectionDocumentation(run1)) 

679 datasetType = "bias" 

680 # Find some datasets via their run's collection. 

681 dataId1 = {"instrument": "Cam1", "detector": 1} 

682 ref1 = registry.findDataset(datasetType, dataId1, collections=run1) 

683 self.assertIsNotNone(ref1) 

684 dataId2 = {"instrument": "Cam1", "detector": 2} 

685 ref2 = registry.findDataset(datasetType, dataId2, collections=run1) 

686 self.assertIsNotNone(ref2) 

687 # Associate those into a new collection, then look for them there. 

688 tag1 = "tag1" 

689 registry.registerCollection(tag1, type=CollectionType.TAGGED, doc="doc for tag1") 

690 # Check that we can query for old and new collections by type. 

691 self.assertEqual(set(registry.queryCollections(collectionTypes=CollectionType.RUN)), {run1, run2}) 

692 self.assertEqual( 

693 set(registry.queryCollections(collectionTypes={CollectionType.TAGGED, CollectionType.RUN})), 

694 {tag1, run1, run2}, 

695 ) 

696 self.assertEqual(registry.getCollectionDocumentation(tag1), "doc for tag1") 

697 registry.associate(tag1, [ref1, ref2]) 

698 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1) 

699 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

700 # Disassociate one and verify that we can't it there anymore... 

701 registry.disassociate(tag1, [ref1]) 

702 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=tag1)) 

703 # ...but we can still find ref2 in tag1, and ref1 in the run. 

704 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run1), ref1) 

705 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

706 collections = set(registry.queryCollections()) 

707 self.assertEqual(collections, {run1, run2, tag1}) 

708 # Associate both refs into tag1 again; ref2 is already there, but that 

709 # should be a harmless no-op. 

710 registry.associate(tag1, [ref1, ref2]) 

711 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1) 

712 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

713 # Get a different dataset (from a different run) that has the same 

714 # dataset type and data ID as ref2. 

715 ref2b = registry.findDataset(datasetType, dataId2, collections=run2) 

716 self.assertNotEqual(ref2, ref2b) 

717 # Attempting to associate that into tag1 should be an error. 

718 with self.assertRaises(ConflictingDefinitionError): 

719 registry.associate(tag1, [ref2b]) 

720 # That error shouldn't have messed up what we had before. 

721 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1) 

722 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

723 # Attempt to associate the conflicting dataset again, this time with 

724 # a dataset that isn't in the collection and won't cause a conflict. 

725 # Should also fail without modifying anything. 

726 dataId3 = {"instrument": "Cam1", "detector": 3} 

727 ref3 = registry.findDataset(datasetType, dataId3, collections=run1) 

728 with self.assertRaises(ConflictingDefinitionError): 

729 registry.associate(tag1, [ref3, ref2b]) 

730 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1) 

731 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

732 self.assertIsNone(registry.findDataset(datasetType, dataId3, collections=tag1)) 

733 # Register a chained collection that searches [tag1, run2] 

734 chain1 = "chain1" 

735 registry.registerCollection(chain1, type=CollectionType.CHAINED) 

736 self.assertIs(registry.getCollectionType(chain1), CollectionType.CHAINED) 

737 # Chained collection exists, but has no collections in it. 

738 self.assertFalse(registry.getCollectionChain(chain1)) 

739 # If we query for all collections, we should get the chained collection 

740 # only if we don't ask to flatten it (i.e. yield only its children). 

741 self.assertEqual(set(registry.queryCollections(flattenChains=False)), {tag1, run1, run2, chain1}) 

742 self.assertEqual(set(registry.queryCollections(flattenChains=True)), {tag1, run1, run2}) 

743 # Attempt to set its child collections to something circular; that 

744 # should fail. 

745 with self.assertRaises(ValueError): 

746 registry.setCollectionChain(chain1, [tag1, chain1]) 

747 # Add the child collections. 

748 registry.setCollectionChain(chain1, [tag1, run2]) 

749 self.assertEqual(list(registry.getCollectionChain(chain1)), [tag1, run2]) 

750 self.assertEqual(registry.getCollectionParentChains(tag1), {chain1}) 

751 self.assertEqual(registry.getCollectionParentChains(run2), {chain1}) 

752 # Refresh the other registry that points to the same repo, and make 

753 # sure it can see the things we've done (note that this does require 

754 # an explicit refresh(); that's the documented behavior, because 

755 # caching is ~impossible otherwise). 

756 if other_registry is not None: 

757 other_registry.refresh() 

758 self.assertEqual(list(other_registry.getCollectionChain(chain1)), [tag1, run2]) 

759 self.assertEqual(other_registry.getCollectionParentChains(tag1), {chain1}) 

760 self.assertEqual(other_registry.getCollectionParentChains(run2), {chain1}) 

761 # Searching for dataId1 or dataId2 in the chain should return ref1 and 

762 # ref2, because both are in tag1. 

763 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain1), ref1) 

764 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain1), ref2) 

765 # Now disassociate ref2 from tag1. The search (for bias) with 

766 # dataId2 in chain1 should then: 

767 # 1. not find it in tag1 

768 # 2. find a different dataset in run2 

769 registry.disassociate(tag1, [ref2]) 

770 ref2b = registry.findDataset(datasetType, dataId2, collections=chain1) 

771 self.assertNotEqual(ref2b, ref2) 

772 self.assertEqual(ref2b, registry.findDataset(datasetType, dataId2, collections=run2)) 

773 # Define a new chain so we can test recursive chains. 

774 chain2 = "chain2" 

775 registry.registerCollection(chain2, type=CollectionType.CHAINED) 

776 registry.setCollectionChain(chain2, [run2, chain1]) 

777 self.assertEqual(registry.getCollectionParentChains(chain1), {chain2}) 

778 self.assertEqual(registry.getCollectionParentChains(run2), {chain1, chain2}) 

779 # Query for collections matching a regex. 

780 self.assertCountEqual( 

781 list(registry.queryCollections(re.compile("imported_."), flattenChains=False)), 

782 ["imported_r", "imported_g"], 

783 ) 

784 # Query for collections matching a regex or an explicit str. 

785 self.assertCountEqual( 

786 list(registry.queryCollections([re.compile("imported_."), "chain1"], flattenChains=False)), 

787 ["imported_r", "imported_g", "chain1"], 

788 ) 

789 # Search for bias with dataId1 should find it via tag1 in chain2, 

790 # recursing, because is not in run1. 

791 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=run2)) 

792 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain2), ref1) 

793 # Search for bias with dataId2 should find it in run2 (ref2b). 

794 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain2), ref2b) 

795 # Search for a flat that is in run2. That should not be found 

796 # at the front of chain2, because of the restriction to bias 

797 # on run2 there, but it should be found in at the end of chain1. 

798 dataId4 = {"instrument": "Cam1", "detector": 3, "physical_filter": "Cam1-R2"} 

799 ref4 = registry.findDataset("flat", dataId4, collections=run2) 

800 self.assertIsNotNone(ref4) 

801 self.assertEqual(ref4, registry.findDataset("flat", dataId4, collections=chain2)) 

802 # Deleting a collection that's part of a CHAINED collection is not 

803 # allowed, and is exception-safe. 

804 with self.assertRaises(Exception): 

805 registry.removeCollection(run2) 

806 self.assertEqual(registry.getCollectionType(run2), CollectionType.RUN) 

807 with self.assertRaises(Exception): 

808 registry.removeCollection(chain1) 

809 self.assertEqual(registry.getCollectionType(chain1), CollectionType.CHAINED) 

810 # Actually remove chain2, test that it's gone by asking for its type. 

811 registry.removeCollection(chain2) 

812 with self.assertRaises(MissingCollectionError): 

813 registry.getCollectionType(chain2) 

814 # Actually remove run2 and chain1, which should work now. 

815 registry.removeCollection(chain1) 

816 registry.removeCollection(run2) 

817 with self.assertRaises(MissingCollectionError): 

818 registry.getCollectionType(run2) 

819 with self.assertRaises(MissingCollectionError): 

820 registry.getCollectionType(chain1) 

821 # Remove tag1 as well, just to test that we can remove TAGGED 

822 # collections. 

823 registry.removeCollection(tag1) 

824 with self.assertRaises(MissingCollectionError): 

825 registry.getCollectionType(tag1) 

826 

827 def testCollectionChainFlatten(self): 

828 """Test that Registry.setCollectionChain obeys its 'flatten' option.""" 

829 registry = self.makeRegistry() 

830 registry.registerCollection("inner", CollectionType.CHAINED) 

831 registry.registerCollection("innermost", CollectionType.RUN) 

832 registry.setCollectionChain("inner", ["innermost"]) 

833 registry.registerCollection("outer", CollectionType.CHAINED) 

834 registry.setCollectionChain("outer", ["inner"], flatten=False) 

835 self.assertEqual(list(registry.getCollectionChain("outer")), ["inner"]) 

836 registry.setCollectionChain("outer", ["inner"], flatten=True) 

837 self.assertEqual(list(registry.getCollectionChain("outer")), ["innermost"]) 

838 

839 def testBasicTransaction(self): 

840 """Test that all operations within a single transaction block are 

841 rolled back if an exception propagates out of the block. 

842 """ 

843 registry = self.makeRegistry() 

844 storageClass = StorageClass("testDatasetType") 

845 registry.storageClasses.registerStorageClass(storageClass) 

846 with registry.transaction(): 

847 registry.insertDimensionData("instrument", {"name": "Cam1", "class_name": "A"}) 

848 with self.assertRaises(ValueError): 

849 with registry.transaction(): 

850 registry.insertDimensionData("instrument", {"name": "Cam2"}) 

851 raise ValueError("Oops, something went wrong") 

852 # Cam1 should exist 

853 self.assertEqual(registry.expandDataId(instrument="Cam1").records["instrument"].class_name, "A") 

854 # But Cam2 and Cam3 should both not exist 

855 with self.assertRaises(DataIdValueError): 

856 registry.expandDataId(instrument="Cam2") 

857 with self.assertRaises(DataIdValueError): 

858 registry.expandDataId(instrument="Cam3") 

859 

860 def testNestedTransaction(self): 

861 """Test that operations within a transaction block are not rolled back 

862 if an exception propagates out of an inner transaction block and is 

863 then caught. 

864 """ 

865 registry = self.makeRegistry() 

866 dimension = registry.dimensions["instrument"] 

867 dataId1 = {"instrument": "DummyCam"} 

868 dataId2 = {"instrument": "DummyCam2"} 

869 checkpointReached = False 

870 with registry.transaction(): 

871 # This should be added and (ultimately) committed. 

872 registry.insertDimensionData(dimension, dataId1) 

873 with self.assertRaises(sqlalchemy.exc.IntegrityError): 

874 with registry.transaction(savepoint=True): 

875 # This does not conflict, and should succeed (but not 

876 # be committed). 

877 registry.insertDimensionData(dimension, dataId2) 

878 checkpointReached = True 

879 # This should conflict and raise, triggerring a rollback 

880 # of the previous insertion within the same transaction 

881 # context, but not the original insertion in the outer 

882 # block. 

883 registry.insertDimensionData(dimension, dataId1) 

884 self.assertTrue(checkpointReached) 

885 self.assertIsNotNone(registry.expandDataId(dataId1, graph=dimension.graph)) 

886 with self.assertRaises(DataIdValueError): 

887 registry.expandDataId(dataId2, graph=dimension.graph) 

888 

889 def testInstrumentDimensions(self): 

890 """Test queries involving only instrument dimensions, with no joins to 

891 skymap.""" 

892 registry = self.makeRegistry() 

893 

894 # need a bunch of dimensions and datasets for test 

895 registry.insertDimensionData( 

896 "instrument", dict(name="DummyCam", visit_max=25, exposure_max=300, detector_max=6) 

897 ) 

898 registry.insertDimensionData( 

899 "physical_filter", 

900 dict(instrument="DummyCam", name="dummy_r", band="r"), 

901 dict(instrument="DummyCam", name="dummy_i", band="i"), 

902 ) 

903 registry.insertDimensionData( 

904 "detector", *[dict(instrument="DummyCam", id=i, full_name=str(i)) for i in range(1, 6)] 

905 ) 

906 registry.insertDimensionData( 

907 "visit_system", 

908 dict(instrument="DummyCam", id=1, name="default"), 

909 ) 

910 registry.insertDimensionData( 

911 "visit", 

912 dict(instrument="DummyCam", id=10, name="ten", physical_filter="dummy_i", visit_system=1), 

913 dict(instrument="DummyCam", id=11, name="eleven", physical_filter="dummy_r", visit_system=1), 

914 dict(instrument="DummyCam", id=20, name="twelve", physical_filter="dummy_r", visit_system=1), 

915 ) 

916 registry.insertDimensionData( 

917 "exposure", 

918 dict(instrument="DummyCam", id=100, obs_id="100", physical_filter="dummy_i"), 

919 dict(instrument="DummyCam", id=101, obs_id="101", physical_filter="dummy_i"), 

920 dict(instrument="DummyCam", id=110, obs_id="110", physical_filter="dummy_r"), 

921 dict(instrument="DummyCam", id=111, obs_id="111", physical_filter="dummy_r"), 

922 dict(instrument="DummyCam", id=200, obs_id="200", physical_filter="dummy_r"), 

923 dict(instrument="DummyCam", id=201, obs_id="201", physical_filter="dummy_r"), 

924 ) 

925 registry.insertDimensionData( 

926 "visit_definition", 

927 dict(instrument="DummyCam", exposure=100, visit_system=1, visit=10), 

928 dict(instrument="DummyCam", exposure=101, visit_system=1, visit=10), 

929 dict(instrument="DummyCam", exposure=110, visit_system=1, visit=11), 

930 dict(instrument="DummyCam", exposure=111, visit_system=1, visit=11), 

931 dict(instrument="DummyCam", exposure=200, visit_system=1, visit=20), 

932 dict(instrument="DummyCam", exposure=201, visit_system=1, visit=20), 

933 ) 

934 # dataset types 

935 run1 = "test1_r" 

936 run2 = "test2_r" 

937 tagged2 = "test2_t" 

938 registry.registerRun(run1) 

939 registry.registerRun(run2) 

940 registry.registerCollection(tagged2) 

941 storageClass = StorageClass("testDataset") 

942 registry.storageClasses.registerStorageClass(storageClass) 

943 rawType = DatasetType( 

944 name="RAW", 

945 dimensions=registry.dimensions.extract(("instrument", "exposure", "detector")), 

946 storageClass=storageClass, 

947 ) 

948 registry.registerDatasetType(rawType) 

949 calexpType = DatasetType( 

950 name="CALEXP", 

951 dimensions=registry.dimensions.extract(("instrument", "visit", "detector")), 

952 storageClass=storageClass, 

953 ) 

954 registry.registerDatasetType(calexpType) 

955 

956 # add pre-existing datasets 

957 for exposure in (100, 101, 110, 111): 

958 for detector in (1, 2, 3): 

959 # note that only 3 of 5 detectors have datasets 

960 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector) 

961 (ref,) = registry.insertDatasets(rawType, dataIds=[dataId], run=run1) 

962 # exposures 100 and 101 appear in both run1 and tagged2. 

963 # 100 has different datasets in the different collections 

964 # 101 has the same dataset in both collections. 

965 if exposure == 100: 

966 (ref,) = registry.insertDatasets(rawType, dataIds=[dataId], run=run2) 

967 if exposure in (100, 101): 

968 registry.associate(tagged2, [ref]) 

969 # Add pre-existing datasets to tagged2. 

970 for exposure in (200, 201): 

971 for detector in (3, 4, 5): 

972 # note that only 3 of 5 detectors have datasets 

973 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector) 

974 (ref,) = registry.insertDatasets(rawType, dataIds=[dataId], run=run2) 

975 registry.associate(tagged2, [ref]) 

976 

977 dimensions = DimensionGraph( 

978 registry.dimensions, dimensions=(rawType.dimensions.required | calexpType.dimensions.required) 

979 ) 

980 # Test that single dim string works as well as list of str 

981 rows = registry.queryDataIds("visit", datasets=rawType, collections=run1).expanded().toSet() 

982 rowsI = registry.queryDataIds(["visit"], datasets=rawType, collections=run1).expanded().toSet() 

983 self.assertEqual(rows, rowsI) 

984 # with empty expression 

985 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1).expanded().toSet() 

986 self.assertEqual(len(rows), 4 * 3) # 4 exposures times 3 detectors 

987 for dataId in rows: 

988 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit")) 

989 packer1 = registry.dimensions.makePacker("visit_detector", dataId) 

990 packer2 = registry.dimensions.makePacker("exposure_detector", dataId) 

991 self.assertEqual( 

992 packer1.unpack(packer1.pack(dataId)), 

993 DataCoordinate.standardize(dataId, graph=packer1.dimensions), 

994 ) 

995 self.assertEqual( 

996 packer2.unpack(packer2.pack(dataId)), 

997 DataCoordinate.standardize(dataId, graph=packer2.dimensions), 

998 ) 

999 self.assertNotEqual(packer1.pack(dataId), packer2.pack(dataId)) 

1000 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101, 110, 111)) 

1001 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 11)) 

1002 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3)) 

1003 

1004 # second collection 

1005 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=tagged2).toSet() 

1006 self.assertEqual(len(rows), 4 * 3) # 4 exposures times 3 detectors 

1007 for dataId in rows: 

1008 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit")) 

1009 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101, 200, 201)) 

1010 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 20)) 

1011 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3, 4, 5)) 

1012 

1013 # with two input datasets 

1014 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=[run1, tagged2]).toSet() 

1015 self.assertEqual(len(set(rows)), 6 * 3) # 6 exposures times 3 detectors; set needed to de-dupe 

1016 for dataId in rows: 

1017 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit")) 

1018 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101, 110, 111, 200, 201)) 

1019 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 11, 20)) 

1020 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3, 4, 5)) 

1021 

1022 # limit to single visit 

1023 rows = registry.queryDataIds( 

1024 dimensions, datasets=rawType, collections=run1, where="visit = 10", instrument="DummyCam" 

1025 ).toSet() 

1026 self.assertEqual(len(rows), 2 * 3) # 2 exposures times 3 detectors 

1027 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101)) 

1028 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10,)) 

1029 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3)) 

1030 

1031 # more limiting expression, using link names instead of Table.column 

1032 rows = registry.queryDataIds( 

1033 dimensions, 

1034 datasets=rawType, 

1035 collections=run1, 

1036 where="visit = 10 and detector > 1 and 'DummyCam'=instrument", 

1037 ).toSet() 

1038 self.assertEqual(len(rows), 2 * 2) # 2 exposures times 2 detectors 

1039 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101)) 

1040 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10,)) 

1041 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (2, 3)) 

1042 

1043 # queryDataIds with only one of `datasets` and `collections` is an 

1044 # error. 

1045 with self.assertRaises(CollectionError): 

1046 registry.queryDataIds(dimensions, datasets=rawType) 

1047 with self.assertRaises(ArgumentError): 

1048 registry.queryDataIds(dimensions, collections=run1) 

1049 

1050 # expression excludes everything 

1051 rows = registry.queryDataIds( 

1052 dimensions, datasets=rawType, collections=run1, where="visit > 1000", instrument="DummyCam" 

1053 ).toSet() 

1054 self.assertEqual(len(rows), 0) 

1055 

1056 # Selecting by physical_filter, this is not in the dimensions, but it 

1057 # is a part of the full expression so it should work too. 

1058 rows = registry.queryDataIds( 

1059 dimensions, 

1060 datasets=rawType, 

1061 collections=run1, 

1062 where="physical_filter = 'dummy_r'", 

1063 instrument="DummyCam", 

1064 ).toSet() 

1065 self.assertEqual(len(rows), 2 * 3) # 2 exposures times 3 detectors 

1066 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (110, 111)) 

1067 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (11,)) 

1068 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3)) 

1069 

1070 def testSkyMapDimensions(self): 

1071 """Tests involving only skymap dimensions, no joins to instrument.""" 

1072 registry = self.makeRegistry() 

1073 

1074 # need a bunch of dimensions and datasets for test, we want 

1075 # "band" in the test so also have to add physical_filter 

1076 # dimensions 

1077 registry.insertDimensionData("instrument", dict(instrument="DummyCam")) 

1078 registry.insertDimensionData( 

1079 "physical_filter", 

1080 dict(instrument="DummyCam", name="dummy_r", band="r"), 

1081 dict(instrument="DummyCam", name="dummy_i", band="i"), 

1082 ) 

1083 registry.insertDimensionData("skymap", dict(name="DummyMap", hash="sha!".encode("utf8"))) 

1084 for tract in range(10): 

1085 registry.insertDimensionData("tract", dict(skymap="DummyMap", id=tract)) 

1086 registry.insertDimensionData( 

1087 "patch", 

1088 *[dict(skymap="DummyMap", tract=tract, id=patch, cell_x=0, cell_y=0) for patch in range(10)], 

1089 ) 

1090 

1091 # dataset types 

1092 run = "tésτ" 

1093 registry.registerRun(run) 

1094 storageClass = StorageClass("testDataset") 

1095 registry.storageClasses.registerStorageClass(storageClass) 

1096 calexpType = DatasetType( 

1097 name="deepCoadd_calexp", 

1098 dimensions=registry.dimensions.extract(("skymap", "tract", "patch", "band")), 

1099 storageClass=storageClass, 

1100 ) 

1101 registry.registerDatasetType(calexpType) 

1102 mergeType = DatasetType( 

1103 name="deepCoadd_mergeDet", 

1104 dimensions=registry.dimensions.extract(("skymap", "tract", "patch")), 

1105 storageClass=storageClass, 

1106 ) 

1107 registry.registerDatasetType(mergeType) 

1108 measType = DatasetType( 

1109 name="deepCoadd_meas", 

1110 dimensions=registry.dimensions.extract(("skymap", "tract", "patch", "band")), 

1111 storageClass=storageClass, 

1112 ) 

1113 registry.registerDatasetType(measType) 

1114 

1115 dimensions = DimensionGraph( 

1116 registry.dimensions, 

1117 dimensions=( 

1118 calexpType.dimensions.required | mergeType.dimensions.required | measType.dimensions.required 

1119 ), 

1120 ) 

1121 

1122 # add pre-existing datasets 

1123 for tract in (1, 3, 5): 

1124 for patch in (2, 4, 6, 7): 

1125 dataId = dict(skymap="DummyMap", tract=tract, patch=patch) 

1126 registry.insertDatasets(mergeType, dataIds=[dataId], run=run) 

1127 for aFilter in ("i", "r"): 

1128 dataId = dict(skymap="DummyMap", tract=tract, patch=patch, band=aFilter) 

1129 registry.insertDatasets(calexpType, dataIds=[dataId], run=run) 

1130 

1131 # with empty expression 

1132 rows = registry.queryDataIds(dimensions, datasets=[calexpType, mergeType], collections=run).toSet() 

1133 self.assertEqual(len(rows), 3 * 4 * 2) # 4 tracts x 4 patches x 2 filters 

1134 for dataId in rows: 

1135 self.assertCountEqual(dataId.keys(), ("skymap", "tract", "patch", "band")) 

1136 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 3, 5)) 

1137 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 4, 6, 7)) 

1138 self.assertCountEqual(set(dataId["band"] for dataId in rows), ("i", "r")) 

1139 

1140 # limit to 2 tracts and 2 patches 

1141 rows = registry.queryDataIds( 

1142 dimensions, 

1143 datasets=[calexpType, mergeType], 

1144 collections=run, 

1145 where="tract IN (1, 5) AND patch IN (2, 7)", 

1146 skymap="DummyMap", 

1147 ).toSet() 

1148 self.assertEqual(len(rows), 2 * 2 * 2) # 2 tracts x 2 patches x 2 filters 

1149 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 5)) 

1150 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 7)) 

1151 self.assertCountEqual(set(dataId["band"] for dataId in rows), ("i", "r")) 

1152 

1153 # limit to single filter 

1154 rows = registry.queryDataIds( 

1155 dimensions, datasets=[calexpType, mergeType], collections=run, where="band = 'i'" 

1156 ).toSet() 

1157 self.assertEqual(len(rows), 3 * 4 * 1) # 4 tracts x 4 patches x 2 filters 

1158 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 3, 5)) 

1159 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 4, 6, 7)) 

1160 self.assertCountEqual(set(dataId["band"] for dataId in rows), ("i",)) 

1161 

1162 # Specifying non-existing skymap is an exception 

1163 with self.assertRaisesRegex(DataIdValueError, "Unknown values specified for governor dimension"): 

1164 rows = registry.queryDataIds( 

1165 dimensions, datasets=[calexpType, mergeType], collections=run, where="skymap = 'Mars'" 

1166 ).toSet() 

1167 

1168 def testSpatialJoin(self): 

1169 """Test queries that involve spatial overlap joins.""" 

1170 registry = self.makeRegistry() 

1171 self.loadData(registry, "hsc-rc2-subset.yaml") 

1172 

1173 # Dictionary of spatial DatabaseDimensionElements, keyed by the name of 

1174 # the TopologicalFamily they belong to. We'll relate all elements in 

1175 # each family to all of the elements in each other family. 

1176 families = defaultdict(set) 

1177 # Dictionary of {element.name: {dataId: region}}. 

1178 regions = {} 

1179 for element in registry.dimensions.getDatabaseElements(): 

1180 if element.spatial is not None: 

1181 families[element.spatial.name].add(element) 

1182 regions[element.name] = { 

1183 record.dataId: record.region for record in registry.queryDimensionRecords(element) 

1184 } 

1185 

1186 # If this check fails, it's not necessarily a problem - it may just be 

1187 # a reasonable change to the default dimension definitions - but the 

1188 # test below depends on there being more than one family to do anything 

1189 # useful. 

1190 self.assertEqual(len(families), 2) 

1191 

1192 # Overlap DatabaseDimensionElements with each other. 

1193 for family1, family2 in itertools.combinations(families, 2): 

1194 for element1, element2 in itertools.product(families[family1], families[family2]): 

1195 graph = DimensionGraph.union(element1.graph, element2.graph) 

1196 # Construct expected set of overlapping data IDs via a 

1197 # brute-force comparison of the regions we've already fetched. 

1198 expected = { 

1199 DataCoordinate.standardize({**dataId1.byName(), **dataId2.byName()}, graph=graph) 

1200 for (dataId1, region1), (dataId2, region2) in itertools.product( 

1201 regions[element1.name].items(), regions[element2.name].items() 

1202 ) 

1203 if not region1.isDisjointFrom(region2) 

1204 } 

1205 self.assertGreater(len(expected), 2, msg="Test that we aren't just comparing empty sets.") 

1206 queried = set(registry.queryDataIds(graph)) 

1207 self.assertEqual(expected, queried) 

1208 

1209 # Overlap each DatabaseDimensionElement with the commonSkyPix system. 

1210 commonSkyPix = registry.dimensions.commonSkyPix 

1211 for elementName, regions in regions.items(): 

1212 graph = DimensionGraph.union(registry.dimensions[elementName].graph, commonSkyPix.graph) 

1213 expected = set() 

1214 for dataId, region in regions.items(): 

1215 for begin, end in commonSkyPix.pixelization.envelope(region): 

1216 expected.update( 

1217 DataCoordinate.standardize({commonSkyPix.name: index, **dataId.byName()}, graph=graph) 

1218 for index in range(begin, end) 

1219 ) 

1220 self.assertGreater(len(expected), 2, msg="Test that we aren't just comparing empty sets.") 

1221 queried = set(registry.queryDataIds(graph)) 

1222 self.assertEqual(expected, queried) 

1223 

1224 def testAbstractQuery(self): 

1225 """Test that we can run a query that just lists the known 

1226 bands. This is tricky because band is 

1227 backed by a query against physical_filter. 

1228 """ 

1229 registry = self.makeRegistry() 

1230 registry.insertDimensionData("instrument", dict(name="DummyCam")) 

1231 registry.insertDimensionData( 

1232 "physical_filter", 

1233 dict(instrument="DummyCam", name="dummy_i", band="i"), 

1234 dict(instrument="DummyCam", name="dummy_i2", band="i"), 

1235 dict(instrument="DummyCam", name="dummy_r", band="r"), 

1236 ) 

1237 rows = registry.queryDataIds(["band"]).toSet() 

1238 self.assertCountEqual( 

1239 rows, 

1240 [ 

1241 DataCoordinate.standardize(band="i", universe=registry.dimensions), 

1242 DataCoordinate.standardize(band="r", universe=registry.dimensions), 

1243 ], 

1244 ) 

1245 

1246 def testAttributeManager(self): 

1247 """Test basic functionality of attribute manager.""" 

1248 # number of attributes with schema versions in a fresh database, 

1249 # 6 managers with 3 records per manager, plus config for dimensions 

1250 VERSION_COUNT = 6 * 3 + 1 

1251 

1252 registry = self.makeRegistry() 

1253 attributes = registry._managers.attributes 

1254 

1255 # check what get() returns for non-existing key 

1256 self.assertIsNone(attributes.get("attr")) 

1257 self.assertEqual(attributes.get("attr", ""), "") 

1258 self.assertEqual(attributes.get("attr", "Value"), "Value") 

1259 self.assertEqual(len(list(attributes.items())), VERSION_COUNT) 

1260 

1261 # cannot store empty key or value 

1262 with self.assertRaises(ValueError): 

1263 attributes.set("", "value") 

1264 with self.assertRaises(ValueError): 

1265 attributes.set("attr", "") 

1266 

1267 # set value of non-existing key 

1268 attributes.set("attr", "value") 

1269 self.assertEqual(len(list(attributes.items())), VERSION_COUNT + 1) 

1270 self.assertEqual(attributes.get("attr"), "value") 

1271 

1272 # update value of existing key 

1273 with self.assertRaises(ButlerAttributeExistsError): 

1274 attributes.set("attr", "value2") 

1275 

1276 attributes.set("attr", "value2", force=True) 

1277 self.assertEqual(len(list(attributes.items())), VERSION_COUNT + 1) 

1278 self.assertEqual(attributes.get("attr"), "value2") 

1279 

1280 # delete existing key 

1281 self.assertTrue(attributes.delete("attr")) 

1282 self.assertEqual(len(list(attributes.items())), VERSION_COUNT) 

1283 

1284 # delete non-existing key 

1285 self.assertFalse(attributes.delete("non-attr")) 

1286 

1287 # store bunch of keys and get the list back 

1288 data = [ 

1289 ("version.core", "1.2.3"), 

1290 ("version.dimensions", "3.2.1"), 

1291 ("config.managers.opaque", "ByNameOpaqueTableStorageManager"), 

1292 ] 

1293 for key, value in data: 

1294 attributes.set(key, value) 

1295 items = dict(attributes.items()) 

1296 for key, value in data: 

1297 self.assertEqual(items[key], value) 

1298 

1299 def testQueryDatasetsDeduplication(self): 

1300 """Test that the findFirst option to queryDatasets selects datasets 

1301 from collections in the order given". 

1302 """ 

1303 registry = self.makeRegistry() 

1304 self.loadData(registry, "base.yaml") 

1305 self.loadData(registry, "datasets.yaml") 

1306 self.assertCountEqual( 

1307 list(registry.queryDatasets("bias", collections=["imported_g", "imported_r"])), 

1308 [ 

1309 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"), 

1310 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g"), 

1311 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g"), 

1312 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r"), 

1313 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r"), 

1314 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

1315 ], 

1316 ) 

1317 self.assertCountEqual( 

1318 list(registry.queryDatasets("bias", collections=["imported_g", "imported_r"], findFirst=True)), 

1319 [ 

1320 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"), 

1321 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g"), 

1322 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g"), 

1323 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

1324 ], 

1325 ) 

1326 self.assertCountEqual( 

1327 list(registry.queryDatasets("bias", collections=["imported_r", "imported_g"], findFirst=True)), 

1328 [ 

1329 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"), 

1330 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r"), 

1331 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r"), 

1332 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

1333 ], 

1334 ) 

1335 

1336 def testQueryResults(self): 

1337 """Test querying for data IDs and then manipulating the QueryResults 

1338 object returned to perform other queries. 

1339 """ 

1340 registry = self.makeRegistry() 

1341 self.loadData(registry, "base.yaml") 

1342 self.loadData(registry, "datasets.yaml") 

1343 bias = registry.getDatasetType("bias") 

1344 flat = registry.getDatasetType("flat") 

1345 # Obtain expected results from methods other than those we're testing 

1346 # here. That includes: 

1347 # - the dimensions of the data IDs we want to query: 

1348 expectedGraph = DimensionGraph(registry.dimensions, names=["detector", "physical_filter"]) 

1349 # - the dimensions of some other data IDs we'll extract from that: 

1350 expectedSubsetGraph = DimensionGraph(registry.dimensions, names=["detector"]) 

1351 # - the data IDs we expect to obtain from the first queries: 

1352 expectedDataIds = DataCoordinateSet( 

1353 { 

1354 DataCoordinate.standardize( 

1355 instrument="Cam1", detector=d, physical_filter=p, universe=registry.dimensions 

1356 ) 

1357 for d, p in itertools.product({1, 2, 3}, {"Cam1-G", "Cam1-R1", "Cam1-R2"}) 

1358 }, 

1359 graph=expectedGraph, 

1360 hasFull=False, 

1361 hasRecords=False, 

1362 ) 

1363 # - the flat datasets we expect to find from those data IDs, in just 

1364 # one collection (so deduplication is irrelevant): 

1365 expectedFlats = [ 

1366 registry.findDataset( 

1367 flat, instrument="Cam1", detector=1, physical_filter="Cam1-R1", collections="imported_r" 

1368 ), 

1369 registry.findDataset( 

1370 flat, instrument="Cam1", detector=2, physical_filter="Cam1-R1", collections="imported_r" 

1371 ), 

1372 registry.findDataset( 

1373 flat, instrument="Cam1", detector=3, physical_filter="Cam1-R2", collections="imported_r" 

1374 ), 

1375 ] 

1376 # - the data IDs we expect to extract from that: 

1377 expectedSubsetDataIds = expectedDataIds.subset(expectedSubsetGraph) 

1378 # - the bias datasets we expect to find from those data IDs, after we 

1379 # subset-out the physical_filter dimension, both with duplicates: 

1380 expectedAllBiases = [ 

1381 registry.findDataset(bias, instrument="Cam1", detector=1, collections="imported_g"), 

1382 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_g"), 

1383 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_g"), 

1384 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_r"), 

1385 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_r"), 

1386 ] 

1387 # - ...and without duplicates: 

1388 expectedDeduplicatedBiases = [ 

1389 registry.findDataset(bias, instrument="Cam1", detector=1, collections="imported_g"), 

1390 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_r"), 

1391 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_r"), 

1392 ] 

1393 # Test against those expected results, using a "lazy" query for the 

1394 # data IDs (which re-executes that query each time we use it to do 

1395 # something new). 

1396 dataIds = registry.queryDataIds( 

1397 ["detector", "physical_filter"], 

1398 where="detector.purpose = 'SCIENCE'", # this rejects detector=4 

1399 instrument="Cam1", 

1400 ) 

1401 self.assertEqual(dataIds.graph, expectedGraph) 

1402 self.assertEqual(dataIds.toSet(), expectedDataIds) 

1403 self.assertCountEqual( 

1404 list( 

1405 dataIds.findDatasets( 

1406 flat, 

1407 collections=["imported_r"], 

1408 ) 

1409 ), 

1410 expectedFlats, 

1411 ) 

1412 subsetDataIds = dataIds.subset(expectedSubsetGraph, unique=True) 

1413 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph) 

1414 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds) 

1415 self.assertCountEqual( 

1416 list(subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=False)), 

1417 expectedAllBiases, 

1418 ) 

1419 self.assertCountEqual( 

1420 list(subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=True)), 

1421 expectedDeduplicatedBiases, 

1422 ) 

1423 

1424 # Check dimensions match. 

1425 with self.assertRaises(ValueError): 

1426 subsetDataIds.findDatasets("flat", collections=["imported_r", "imported_g"], findFirst=True) 

1427 

1428 # Use a component dataset type. 

1429 self.assertCountEqual( 

1430 [ 

1431 ref.makeComponentRef("image") 

1432 for ref in subsetDataIds.findDatasets( 

1433 bias, 

1434 collections=["imported_r", "imported_g"], 

1435 findFirst=False, 

1436 ) 

1437 ], 

1438 [ref.makeComponentRef("image") for ref in expectedAllBiases], 

1439 ) 

1440 

1441 # Use a named dataset type that does not exist and a dataset type 

1442 # object that does not exist. 

1443 unknown_type = DatasetType("not_known", dimensions=bias.dimensions, storageClass="Exposure") 

1444 

1445 # Test both string name and dataset type object. 

1446 test_type: Union[str, DatasetType] 

1447 for test_type, test_type_name in ( 

1448 (unknown_type, unknown_type.name), 

1449 (unknown_type.name, unknown_type.name), 

1450 ): 

1451 with self.assertRaisesRegex(DatasetTypeError, expected_regex=test_type_name): 

1452 list( 

1453 subsetDataIds.findDatasets( 

1454 test_type, collections=["imported_r", "imported_g"], findFirst=True 

1455 ) 

1456 ) 

1457 

1458 # Materialize the bias dataset queries (only) by putting the results 

1459 # into temporary tables, then repeat those tests. 

1460 with subsetDataIds.findDatasets( 

1461 bias, collections=["imported_r", "imported_g"], findFirst=False 

1462 ).materialize() as biases: 

1463 self.assertCountEqual(list(biases), expectedAllBiases) 

1464 with subsetDataIds.findDatasets( 

1465 bias, collections=["imported_r", "imported_g"], findFirst=True 

1466 ).materialize() as biases: 

1467 self.assertCountEqual(list(biases), expectedDeduplicatedBiases) 

1468 # Materialize the data ID subset query, but not the dataset queries. 

1469 with subsetDataIds.materialize() as subsetDataIds: 

1470 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph) 

1471 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds) 

1472 self.assertCountEqual( 

1473 list( 

1474 subsetDataIds.findDatasets( 

1475 bias, collections=["imported_r", "imported_g"], findFirst=False 

1476 ) 

1477 ), 

1478 expectedAllBiases, 

1479 ) 

1480 self.assertCountEqual( 

1481 list( 

1482 subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=True) 

1483 ), 

1484 expectedDeduplicatedBiases, 

1485 ) 

1486 # Materialize the dataset queries, too. 

1487 with subsetDataIds.findDatasets( 

1488 bias, collections=["imported_r", "imported_g"], findFirst=False 

1489 ).materialize() as biases: 

1490 self.assertCountEqual(list(biases), expectedAllBiases) 

1491 with subsetDataIds.findDatasets( 

1492 bias, collections=["imported_r", "imported_g"], findFirst=True 

1493 ).materialize() as biases: 

1494 self.assertCountEqual(list(biases), expectedDeduplicatedBiases) 

1495 # Materialize the original query, but none of the follow-up queries. 

1496 with dataIds.materialize() as dataIds: 

1497 self.assertEqual(dataIds.graph, expectedGraph) 

1498 self.assertEqual(dataIds.toSet(), expectedDataIds) 

1499 self.assertCountEqual( 

1500 list( 

1501 dataIds.findDatasets( 

1502 flat, 

1503 collections=["imported_r"], 

1504 ) 

1505 ), 

1506 expectedFlats, 

1507 ) 

1508 subsetDataIds = dataIds.subset(expectedSubsetGraph, unique=True) 

1509 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph) 

1510 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds) 

1511 self.assertCountEqual( 

1512 list( 

1513 subsetDataIds.findDatasets( 

1514 bias, collections=["imported_r", "imported_g"], findFirst=False 

1515 ) 

1516 ), 

1517 expectedAllBiases, 

1518 ) 

1519 self.assertCountEqual( 

1520 list( 

1521 subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=True) 

1522 ), 

1523 expectedDeduplicatedBiases, 

1524 ) 

1525 # Materialize just the bias dataset queries. 

1526 with subsetDataIds.findDatasets( 

1527 bias, collections=["imported_r", "imported_g"], findFirst=False 

1528 ).materialize() as biases: 

1529 self.assertCountEqual(list(biases), expectedAllBiases) 

1530 with subsetDataIds.findDatasets( 

1531 bias, collections=["imported_r", "imported_g"], findFirst=True 

1532 ).materialize() as biases: 

1533 self.assertCountEqual(list(biases), expectedDeduplicatedBiases) 

1534 # Materialize the subset data ID query, but not the dataset 

1535 # queries. 

1536 with subsetDataIds.materialize() as subsetDataIds: 

1537 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph) 

1538 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds) 

1539 self.assertCountEqual( 

1540 list( 

1541 subsetDataIds.findDatasets( 

1542 bias, collections=["imported_r", "imported_g"], findFirst=False 

1543 ) 

1544 ), 

1545 expectedAllBiases, 

1546 ) 

1547 self.assertCountEqual( 

1548 list( 

1549 subsetDataIds.findDatasets( 

1550 bias, collections=["imported_r", "imported_g"], findFirst=True 

1551 ) 

1552 ), 

1553 expectedDeduplicatedBiases, 

1554 ) 

1555 # Materialize the bias dataset queries, too, so now we're 

1556 # materializing every single step. 

1557 with subsetDataIds.findDatasets( 

1558 bias, collections=["imported_r", "imported_g"], findFirst=False 

1559 ).materialize() as biases: 

1560 self.assertCountEqual(list(biases), expectedAllBiases) 

1561 with subsetDataIds.findDatasets( 

1562 bias, collections=["imported_r", "imported_g"], findFirst=True 

1563 ).materialize() as biases: 

1564 self.assertCountEqual(list(biases), expectedDeduplicatedBiases) 

1565 

1566 def testEmptyDimensionsQueries(self): 

1567 """Test Query and QueryResults objects in the case where there are no 

1568 dimensions. 

1569 """ 

1570 # Set up test data: one dataset type, two runs, one dataset in each. 

1571 registry = self.makeRegistry() 

1572 self.loadData(registry, "base.yaml") 

1573 schema = DatasetType("schema", dimensions=registry.dimensions.empty, storageClass="Catalog") 

1574 registry.registerDatasetType(schema) 

1575 dataId = DataCoordinate.makeEmpty(registry.dimensions) 

1576 run1 = "run1" 

1577 run2 = "run2" 

1578 registry.registerRun(run1) 

1579 registry.registerRun(run2) 

1580 (dataset1,) = registry.insertDatasets(schema, dataIds=[dataId], run=run1) 

1581 (dataset2,) = registry.insertDatasets(schema, dataIds=[dataId], run=run2) 

1582 # Query directly for both of the datasets, and each one, one at a time. 

1583 self.checkQueryResults( 

1584 registry.queryDatasets(schema, collections=[run1, run2], findFirst=False), [dataset1, dataset2] 

1585 ) 

1586 self.checkQueryResults( 

1587 registry.queryDatasets(schema, collections=[run1, run2], findFirst=True), 

1588 [dataset1], 

1589 ) 

1590 self.checkQueryResults( 

1591 registry.queryDatasets(schema, collections=[run2, run1], findFirst=True), 

1592 [dataset2], 

1593 ) 

1594 # Query for data IDs with no dimensions. 

1595 dataIds = registry.queryDataIds([]) 

1596 self.checkQueryResults(dataIds, [dataId]) 

1597 # Use queried data IDs to find the datasets. 

1598 self.checkQueryResults( 

1599 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False), 

1600 [dataset1, dataset2], 

1601 ) 

1602 self.checkQueryResults( 

1603 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), 

1604 [dataset1], 

1605 ) 

1606 self.checkQueryResults( 

1607 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), 

1608 [dataset2], 

1609 ) 

1610 # Now materialize the data ID query results and repeat those tests. 

1611 with dataIds.materialize() as dataIds: 

1612 self.checkQueryResults(dataIds, [dataId]) 

1613 self.checkQueryResults( 

1614 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), 

1615 [dataset1], 

1616 ) 

1617 self.checkQueryResults( 

1618 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), 

1619 [dataset2], 

1620 ) 

1621 # Query for non-empty data IDs, then subset that to get the empty one. 

1622 # Repeat the above tests starting from that. 

1623 dataIds = registry.queryDataIds(["instrument"]).subset(registry.dimensions.empty, unique=True) 

1624 self.checkQueryResults(dataIds, [dataId]) 

1625 self.checkQueryResults( 

1626 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False), 

1627 [dataset1, dataset2], 

1628 ) 

1629 self.checkQueryResults( 

1630 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), 

1631 [dataset1], 

1632 ) 

1633 self.checkQueryResults( 

1634 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), 

1635 [dataset2], 

1636 ) 

1637 with dataIds.materialize() as dataIds: 

1638 self.checkQueryResults(dataIds, [dataId]) 

1639 self.checkQueryResults( 

1640 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False), 

1641 [dataset1, dataset2], 

1642 ) 

1643 self.checkQueryResults( 

1644 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), 

1645 [dataset1], 

1646 ) 

1647 self.checkQueryResults( 

1648 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), 

1649 [dataset2], 

1650 ) 

1651 # Query for non-empty data IDs, then materialize, then subset to get 

1652 # the empty one. Repeat again. 

1653 with registry.queryDataIds(["instrument"]).materialize() as nonEmptyDataIds: 

1654 dataIds = nonEmptyDataIds.subset(registry.dimensions.empty, unique=True) 

1655 self.checkQueryResults(dataIds, [dataId]) 

1656 self.checkQueryResults( 

1657 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False), 

1658 [dataset1, dataset2], 

1659 ) 

1660 self.checkQueryResults( 

1661 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), 

1662 [dataset1], 

1663 ) 

1664 self.checkQueryResults( 

1665 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), 

1666 [dataset2], 

1667 ) 

1668 with dataIds.materialize() as dataIds: 

1669 self.checkQueryResults(dataIds, [dataId]) 

1670 self.checkQueryResults( 

1671 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False), 

1672 [dataset1, dataset2], 

1673 ) 

1674 self.checkQueryResults( 

1675 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), 

1676 [dataset1], 

1677 ) 

1678 self.checkQueryResults( 

1679 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), 

1680 [dataset2], 

1681 ) 

1682 # Query for non-empty data IDs with a constraint on an empty-data-ID 

1683 # dataset that exists. 

1684 dataIds = registry.queryDataIds(["instrument"], datasets="schema", collections=...) 

1685 self.checkQueryResults( 

1686 dataIds.subset(unique=True), 

1687 [DataCoordinate.standardize(instrument="Cam1", universe=registry.dimensions)], 

1688 ) 

1689 # Again query for non-empty data IDs with a constraint on empty-data-ID 

1690 # datasets, but when the datasets don't exist. We delete the existing 

1691 # dataset and query just that collection rather than creating a new 

1692 # empty collection because this is a bit less likely for our build-time 

1693 # logic to shortcut-out (via the collection summaries), and such a 

1694 # shortcut would make this test a bit more trivial than we'd like. 

1695 registry.removeDatasets([dataset2]) 

1696 dataIds = registry.queryDataIds(["instrument"], datasets="schema", collections=run2) 

1697 self.checkQueryResults(dataIds, []) 

1698 

1699 def testDimensionDataModifications(self): 

1700 """Test that modifying dimension records via: 

1701 syncDimensionData(..., update=True) and 

1702 insertDimensionData(..., replace=True) works as expected, even in the 

1703 presence of datasets using those dimensions and spatial overlap 

1704 relationships. 

1705 """ 

1706 

1707 def unpack_range_set(ranges: lsst.sphgeom.RangeSet) -> Iterator[int]: 

1708 """Unpack a sphgeom.RangeSet into the integers it contains.""" 

1709 for begin, end in ranges: 

1710 yield from range(begin, end) 

1711 

1712 def range_set_hull( 

1713 ranges: lsst.sphgeom.RangeSet, 

1714 pixelization: lsst.sphgeom.HtmPixelization, 

1715 ) -> lsst.sphgeom.ConvexPolygon: 

1716 """Create a ConvexPolygon hull of the region defined by a set of 

1717 HTM pixelization index ranges. 

1718 """ 

1719 points = [] 

1720 for index in unpack_range_set(ranges): 

1721 points.extend(pixelization.triangle(index).getVertices()) 

1722 return lsst.sphgeom.ConvexPolygon(points) 

1723 

1724 # Use HTM to set up an initial parent region (one arbitrary trixel) 

1725 # and four child regions (the trixels within the parent at the next 

1726 # level. We'll use the parent as a tract/visit region and the children 

1727 # as its patch/visit_detector regions. 

1728 registry = self.makeRegistry() 

1729 htm6 = registry.dimensions.skypix["htm"][6].pixelization 

1730 commonSkyPix = registry.dimensions.commonSkyPix.pixelization 

1731 index = 12288 

1732 child_ranges_small = lsst.sphgeom.RangeSet(index).scaled(4) 

1733 assert htm6.universe().contains(child_ranges_small) 

1734 child_regions_small = [htm6.triangle(i) for i in unpack_range_set(child_ranges_small)] 

1735 parent_region_small = lsst.sphgeom.ConvexPolygon( 

1736 list(itertools.chain.from_iterable(c.getVertices() for c in child_regions_small)) 

1737 ) 

1738 assert all(parent_region_small.contains(c) for c in child_regions_small) 

1739 # Make a larger version of each child region, defined to be the set of 

1740 # htm6 trixels that overlap the original's bounding circle. Make a new 

1741 # parent that's the convex hull of the new children. 

1742 child_regions_large = [ 

1743 range_set_hull(htm6.envelope(c.getBoundingCircle()), htm6) for c in child_regions_small 

1744 ] 

1745 assert all(large.contains(small) for large, small in zip(child_regions_large, child_regions_small)) 

1746 parent_region_large = lsst.sphgeom.ConvexPolygon( 

1747 list(itertools.chain.from_iterable(c.getVertices() for c in child_regions_large)) 

1748 ) 

1749 assert all(parent_region_large.contains(c) for c in child_regions_large) 

1750 assert parent_region_large.contains(parent_region_small) 

1751 assert not parent_region_small.contains(parent_region_large) 

1752 assert not all(parent_region_small.contains(c) for c in child_regions_large) 

1753 # Find some commonSkyPix indices that overlap the large regions but not 

1754 # overlap the small regions. We use commonSkyPix here to make sure the 

1755 # real tests later involve what's in the database, not just post-query 

1756 # region filtering. 

1757 child_difference_indices = [] 

1758 for large, small in zip(child_regions_large, child_regions_small): 

1759 difference = list(unpack_range_set(commonSkyPix.envelope(large) - commonSkyPix.envelope(small))) 

1760 assert difference, "if this is empty, we can't test anything useful with these regions" 

1761 assert all( 

1762 not commonSkyPix.triangle(d).isDisjointFrom(large) 

1763 and commonSkyPix.triangle(d).isDisjointFrom(small) 

1764 for d in difference 

1765 ) 

1766 child_difference_indices.append(difference) 

1767 parent_difference_indices = list( 

1768 unpack_range_set( 

1769 commonSkyPix.envelope(parent_region_large) - commonSkyPix.envelope(parent_region_small) 

1770 ) 

1771 ) 

1772 assert parent_difference_indices, "if this is empty, we can't test anything useful with these regions" 

1773 assert all( 

1774 ( 

1775 not commonSkyPix.triangle(d).isDisjointFrom(parent_region_large) 

1776 and commonSkyPix.triangle(d).isDisjointFrom(parent_region_small) 

1777 ) 

1778 for d in parent_difference_indices 

1779 ) 

1780 # Now that we've finally got those regions, we'll insert the large ones 

1781 # as tract/patch dimension records. 

1782 skymap_name = "testing_v1" 

1783 registry.insertDimensionData( 

1784 "skymap", 

1785 { 

1786 "name": skymap_name, 

1787 "hash": bytes([42]), 

1788 "tract_max": 1, 

1789 "patch_nx_max": 2, 

1790 "patch_ny_max": 2, 

1791 }, 

1792 ) 

1793 registry.insertDimensionData("tract", {"skymap": skymap_name, "id": 0, "region": parent_region_large}) 

1794 registry.insertDimensionData( 

1795 "patch", 

1796 *[ 

1797 {"skymap": skymap_name, "tract": 0, "id": n, "cell_x": n % 2, "cell_y": n // 2, "region": c} 

1798 for n, c in enumerate(child_regions_large) 

1799 ], 

1800 ) 

1801 # Add at dataset that uses these dimensions to make sure that modifying 

1802 # them doesn't disrupt foreign keys (need to make sure DB doesn't 

1803 # implement insert with replace=True as delete-then-insert). 

1804 dataset_type = DatasetType( 

1805 "coadd", 

1806 dimensions=["tract", "patch"], 

1807 universe=registry.dimensions, 

1808 storageClass="Exposure", 

1809 ) 

1810 registry.registerDatasetType(dataset_type) 

1811 registry.registerCollection("the_run", CollectionType.RUN) 

1812 registry.insertDatasets( 

1813 dataset_type, 

1814 [{"skymap": skymap_name, "tract": 0, "patch": 2}], 

1815 run="the_run", 

1816 ) 

1817 # Query for tracts and patches that overlap some "difference" htm9 

1818 # pixels; there should be overlaps, because the database has 

1819 # the "large" suite of regions. 

1820 self.assertEqual( 

1821 {0}, 

1822 { 

1823 data_id["tract"] 

1824 for data_id in registry.queryDataIds( 

1825 ["tract"], 

1826 skymap=skymap_name, 

1827 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]}, 

1828 ) 

1829 }, 

1830 ) 

1831 for patch_id, patch_difference_indices in enumerate(child_difference_indices): 

1832 self.assertIn( 

1833 patch_id, 

1834 { 

1835 data_id["patch"] 

1836 for data_id in registry.queryDataIds( 

1837 ["patch"], 

1838 skymap=skymap_name, 

1839 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]}, 

1840 ) 

1841 }, 

1842 ) 

1843 # Use sync to update the tract region and insert to update the patch 

1844 # regions, to the "small" suite. 

1845 updated = registry.syncDimensionData( 

1846 "tract", 

1847 {"skymap": skymap_name, "id": 0, "region": parent_region_small}, 

1848 update=True, 

1849 ) 

1850 self.assertEqual(updated, {"region": parent_region_large}) 

1851 registry.insertDimensionData( 

1852 "patch", 

1853 *[ 

1854 {"skymap": skymap_name, "tract": 0, "id": n, "cell_x": n % 2, "cell_y": n // 2, "region": c} 

1855 for n, c in enumerate(child_regions_small) 

1856 ], 

1857 replace=True, 

1858 ) 

1859 # Query again; there now should be no such overlaps, because the 

1860 # database has the "small" suite of regions. 

1861 self.assertFalse( 

1862 set( 

1863 registry.queryDataIds( 

1864 ["tract"], 

1865 skymap=skymap_name, 

1866 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]}, 

1867 ) 

1868 ) 

1869 ) 

1870 for patch_id, patch_difference_indices in enumerate(child_difference_indices): 

1871 self.assertNotIn( 

1872 patch_id, 

1873 { 

1874 data_id["patch"] 

1875 for data_id in registry.queryDataIds( 

1876 ["patch"], 

1877 skymap=skymap_name, 

1878 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]}, 

1879 ) 

1880 }, 

1881 ) 

1882 # Update back to the large regions and query one more time. 

1883 updated = registry.syncDimensionData( 

1884 "tract", 

1885 {"skymap": skymap_name, "id": 0, "region": parent_region_large}, 

1886 update=True, 

1887 ) 

1888 self.assertEqual(updated, {"region": parent_region_small}) 

1889 registry.insertDimensionData( 

1890 "patch", 

1891 *[ 

1892 {"skymap": skymap_name, "tract": 0, "id": n, "cell_x": n % 2, "cell_y": n // 2, "region": c} 

1893 for n, c in enumerate(child_regions_large) 

1894 ], 

1895 replace=True, 

1896 ) 

1897 self.assertEqual( 

1898 {0}, 

1899 { 

1900 data_id["tract"] 

1901 for data_id in registry.queryDataIds( 

1902 ["tract"], 

1903 skymap=skymap_name, 

1904 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]}, 

1905 ) 

1906 }, 

1907 ) 

1908 for patch_id, patch_difference_indices in enumerate(child_difference_indices): 

1909 self.assertIn( 

1910 patch_id, 

1911 { 

1912 data_id["patch"] 

1913 for data_id in registry.queryDataIds( 

1914 ["patch"], 

1915 skymap=skymap_name, 

1916 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]}, 

1917 ) 

1918 }, 

1919 ) 

1920 

1921 def testCalibrationCollections(self): 

1922 """Test operations on `~CollectionType.CALIBRATION` collections, 

1923 including `Registry.certify`, `Registry.decertify`, and 

1924 `Registry.findDataset`. 

1925 """ 

1926 # Setup - make a Registry, fill it with some datasets in 

1927 # non-calibration collections. 

1928 registry = self.makeRegistry() 

1929 self.loadData(registry, "base.yaml") 

1930 self.loadData(registry, "datasets.yaml") 

1931 # Set up some timestamps. 

1932 t1 = astropy.time.Time("2020-01-01T01:00:00", format="isot", scale="tai") 

1933 t2 = astropy.time.Time("2020-01-01T02:00:00", format="isot", scale="tai") 

1934 t3 = astropy.time.Time("2020-01-01T03:00:00", format="isot", scale="tai") 

1935 t4 = astropy.time.Time("2020-01-01T04:00:00", format="isot", scale="tai") 

1936 t5 = astropy.time.Time("2020-01-01T05:00:00", format="isot", scale="tai") 

1937 allTimespans = [ 

1938 Timespan(a, b) for a, b in itertools.combinations([None, t1, t2, t3, t4, t5, None], r=2) 

1939 ] 

1940 # Get references to some datasets. 

1941 bias2a = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g") 

1942 bias3a = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g") 

1943 bias2b = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r") 

1944 bias3b = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r") 

1945 # Register the main calibration collection we'll be working with. 

1946 collection = "Cam1/calibs/default" 

1947 registry.registerCollection(collection, type=CollectionType.CALIBRATION) 

1948 # Cannot associate into a calibration collection (no timespan). 

1949 with self.assertRaises(CollectionTypeError): 

1950 registry.associate(collection, [bias2a]) 

1951 # Certify 2a dataset with [t2, t4) validity. 

1952 registry.certify(collection, [bias2a], Timespan(begin=t2, end=t4)) 

1953 # Test that we can query for this dataset via the new collection, both 

1954 # on its own and with a RUN collection, as long as we don't try to join 

1955 # in temporal dimensions or use findFirst=True. 

1956 self.assertEqual( 

1957 set(registry.queryDatasets("bias", findFirst=False, collections=collection)), 

1958 {bias2a}, 

1959 ) 

1960 self.assertEqual( 

1961 set(registry.queryDatasets("bias", findFirst=False, collections=[collection, "imported_r"])), 

1962 { 

1963 bias2a, 

1964 bias2b, 

1965 bias3b, 

1966 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

1967 }, 

1968 ) 

1969 self.assertEqual( 

1970 set(registry.queryDataIds("detector", datasets="bias", collections=collection)), 

1971 {registry.expandDataId(instrument="Cam1", detector=2)}, 

1972 ) 

1973 self.assertEqual( 

1974 set(registry.queryDataIds("detector", datasets="bias", collections=[collection, "imported_r"])), 

1975 { 

1976 registry.expandDataId(instrument="Cam1", detector=2), 

1977 registry.expandDataId(instrument="Cam1", detector=3), 

1978 registry.expandDataId(instrument="Cam1", detector=4), 

1979 }, 

1980 ) 

1981 

1982 # We should not be able to certify 2b with anything overlapping that 

1983 # window. 

1984 with self.assertRaises(ConflictingDefinitionError): 

1985 registry.certify(collection, [bias2b], Timespan(begin=None, end=t3)) 

1986 with self.assertRaises(ConflictingDefinitionError): 

1987 registry.certify(collection, [bias2b], Timespan(begin=None, end=t5)) 

1988 with self.assertRaises(ConflictingDefinitionError): 

1989 registry.certify(collection, [bias2b], Timespan(begin=t1, end=t3)) 

1990 with self.assertRaises(ConflictingDefinitionError): 

1991 registry.certify(collection, [bias2b], Timespan(begin=t1, end=t5)) 

1992 with self.assertRaises(ConflictingDefinitionError): 

1993 registry.certify(collection, [bias2b], Timespan(begin=t1, end=None)) 

1994 with self.assertRaises(ConflictingDefinitionError): 

1995 registry.certify(collection, [bias2b], Timespan(begin=t2, end=t3)) 

1996 with self.assertRaises(ConflictingDefinitionError): 

1997 registry.certify(collection, [bias2b], Timespan(begin=t2, end=t5)) 

1998 with self.assertRaises(ConflictingDefinitionError): 

1999 registry.certify(collection, [bias2b], Timespan(begin=t2, end=None)) 

2000 # We should be able to certify 3a with a range overlapping that window, 

2001 # because it's for a different detector. 

2002 # We'll certify 3a over [t1, t3). 

2003 registry.certify(collection, [bias3a], Timespan(begin=t1, end=t3)) 

2004 # Now we'll certify 2b and 3b together over [t4, ∞). 

2005 registry.certify(collection, [bias2b, bias3b], Timespan(begin=t4, end=None)) 

2006 

2007 # Fetch all associations and check that they are what we expect. 

2008 self.assertCountEqual( 

2009 list( 

2010 registry.queryDatasetAssociations( 

2011 "bias", 

2012 collections=[collection, "imported_g", "imported_r"], 

2013 ) 

2014 ), 

2015 [ 

2016 DatasetAssociation( 

2017 ref=registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"), 

2018 collection="imported_g", 

2019 timespan=None, 

2020 ), 

2021 DatasetAssociation( 

2022 ref=registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

2023 collection="imported_r", 

2024 timespan=None, 

2025 ), 

2026 DatasetAssociation(ref=bias2a, collection="imported_g", timespan=None), 

2027 DatasetAssociation(ref=bias3a, collection="imported_g", timespan=None), 

2028 DatasetAssociation(ref=bias2b, collection="imported_r", timespan=None), 

2029 DatasetAssociation(ref=bias3b, collection="imported_r", timespan=None), 

2030 DatasetAssociation(ref=bias2a, collection=collection, timespan=Timespan(begin=t2, end=t4)), 

2031 DatasetAssociation(ref=bias3a, collection=collection, timespan=Timespan(begin=t1, end=t3)), 

2032 DatasetAssociation(ref=bias2b, collection=collection, timespan=Timespan(begin=t4, end=None)), 

2033 DatasetAssociation(ref=bias3b, collection=collection, timespan=Timespan(begin=t4, end=None)), 

2034 ], 

2035 ) 

2036 

2037 class Ambiguous: 

2038 """Tag class to denote lookups that should be ambiguous.""" 

2039 

2040 pass 

2041 

2042 def assertLookup( 

2043 detector: int, timespan: Timespan, expected: Optional[Union[DatasetRef, Type[Ambiguous]]] 

2044 ) -> None: 

2045 """Local function that asserts that a bias lookup returns the given 

2046 expected result. 

2047 """ 

2048 if expected is Ambiguous: 

2049 with self.assertRaises(RuntimeError): 

2050 registry.findDataset( 

2051 "bias", 

2052 collections=collection, 

2053 instrument="Cam1", 

2054 detector=detector, 

2055 timespan=timespan, 

2056 ) 

2057 else: 

2058 self.assertEqual( 

2059 expected, 

2060 registry.findDataset( 

2061 "bias", 

2062 collections=collection, 

2063 instrument="Cam1", 

2064 detector=detector, 

2065 timespan=timespan, 

2066 ), 

2067 ) 

2068 

2069 # Systematically test lookups against expected results. 

2070 assertLookup(detector=2, timespan=Timespan(None, t1), expected=None) 

2071 assertLookup(detector=2, timespan=Timespan(None, t2), expected=None) 

2072 assertLookup(detector=2, timespan=Timespan(None, t3), expected=bias2a) 

2073 assertLookup(detector=2, timespan=Timespan(None, t4), expected=bias2a) 

2074 assertLookup(detector=2, timespan=Timespan(None, t5), expected=Ambiguous) 

2075 assertLookup(detector=2, timespan=Timespan(None, None), expected=Ambiguous) 

2076 assertLookup(detector=2, timespan=Timespan(t1, t2), expected=None) 

2077 assertLookup(detector=2, timespan=Timespan(t1, t3), expected=bias2a) 

2078 assertLookup(detector=2, timespan=Timespan(t1, t4), expected=bias2a) 

2079 assertLookup(detector=2, timespan=Timespan(t1, t5), expected=Ambiguous) 

2080 assertLookup(detector=2, timespan=Timespan(t1, None), expected=Ambiguous) 

2081 assertLookup(detector=2, timespan=Timespan(t2, t3), expected=bias2a) 

2082 assertLookup(detector=2, timespan=Timespan(t2, t4), expected=bias2a) 

2083 assertLookup(detector=2, timespan=Timespan(t2, t5), expected=Ambiguous) 

2084 assertLookup(detector=2, timespan=Timespan(t2, None), expected=Ambiguous) 

2085 assertLookup(detector=2, timespan=Timespan(t3, t4), expected=bias2a) 

2086 assertLookup(detector=2, timespan=Timespan(t3, t5), expected=Ambiguous) 

2087 assertLookup(detector=2, timespan=Timespan(t3, None), expected=Ambiguous) 

2088 assertLookup(detector=2, timespan=Timespan(t4, t5), expected=bias2b) 

2089 assertLookup(detector=2, timespan=Timespan(t4, None), expected=bias2b) 

2090 assertLookup(detector=2, timespan=Timespan(t5, None), expected=bias2b) 

2091 assertLookup(detector=3, timespan=Timespan(None, t1), expected=None) 

2092 assertLookup(detector=3, timespan=Timespan(None, t2), expected=bias3a) 

2093 assertLookup(detector=3, timespan=Timespan(None, t3), expected=bias3a) 

2094 assertLookup(detector=3, timespan=Timespan(None, t4), expected=bias3a) 

2095 assertLookup(detector=3, timespan=Timespan(None, t5), expected=Ambiguous) 

2096 assertLookup(detector=3, timespan=Timespan(None, None), expected=Ambiguous) 

2097 assertLookup(detector=3, timespan=Timespan(t1, t2), expected=bias3a) 

2098 assertLookup(detector=3, timespan=Timespan(t1, t3), expected=bias3a) 

2099 assertLookup(detector=3, timespan=Timespan(t1, t4), expected=bias3a) 

2100 assertLookup(detector=3, timespan=Timespan(t1, t5), expected=Ambiguous) 

2101 assertLookup(detector=3, timespan=Timespan(t1, None), expected=Ambiguous) 

2102 assertLookup(detector=3, timespan=Timespan(t2, t3), expected=bias3a) 

2103 assertLookup(detector=3, timespan=Timespan(t2, t4), expected=bias3a) 

2104 assertLookup(detector=3, timespan=Timespan(t2, t5), expected=Ambiguous) 

2105 assertLookup(detector=3, timespan=Timespan(t2, None), expected=Ambiguous) 

2106 assertLookup(detector=3, timespan=Timespan(t3, t4), expected=None) 

2107 assertLookup(detector=3, timespan=Timespan(t3, t5), expected=bias3b) 

2108 assertLookup(detector=3, timespan=Timespan(t3, None), expected=bias3b) 

2109 assertLookup(detector=3, timespan=Timespan(t4, t5), expected=bias3b) 

2110 assertLookup(detector=3, timespan=Timespan(t4, None), expected=bias3b) 

2111 assertLookup(detector=3, timespan=Timespan(t5, None), expected=bias3b) 

2112 

2113 # Decertify [t3, t5) for all data IDs, and do test lookups again. 

2114 # This should truncate bias2a to [t2, t3), leave bias3a unchanged at 

2115 # [t1, t3), and truncate bias2b and bias3b to [t5, ∞). 

2116 registry.decertify(collection=collection, datasetType="bias", timespan=Timespan(t3, t5)) 

2117 assertLookup(detector=2, timespan=Timespan(None, t1), expected=None) 

2118 assertLookup(detector=2, timespan=Timespan(None, t2), expected=None) 

2119 assertLookup(detector=2, timespan=Timespan(None, t3), expected=bias2a) 

2120 assertLookup(detector=2, timespan=Timespan(None, t4), expected=bias2a) 

2121 assertLookup(detector=2, timespan=Timespan(None, t5), expected=bias2a) 

2122 assertLookup(detector=2, timespan=Timespan(None, None), expected=Ambiguous) 

2123 assertLookup(detector=2, timespan=Timespan(t1, t2), expected=None) 

2124 assertLookup(detector=2, timespan=Timespan(t1, t3), expected=bias2a) 

2125 assertLookup(detector=2, timespan=Timespan(t1, t4), expected=bias2a) 

2126 assertLookup(detector=2, timespan=Timespan(t1, t5), expected=bias2a) 

2127 assertLookup(detector=2, timespan=Timespan(t1, None), expected=Ambiguous) 

2128 assertLookup(detector=2, timespan=Timespan(t2, t3), expected=bias2a) 

2129 assertLookup(detector=2, timespan=Timespan(t2, t4), expected=bias2a) 

2130 assertLookup(detector=2, timespan=Timespan(t2, t5), expected=bias2a) 

2131 assertLookup(detector=2, timespan=Timespan(t2, None), expected=Ambiguous) 

2132 assertLookup(detector=2, timespan=Timespan(t3, t4), expected=None) 

2133 assertLookup(detector=2, timespan=Timespan(t3, t5), expected=None) 

2134 assertLookup(detector=2, timespan=Timespan(t3, None), expected=bias2b) 

2135 assertLookup(detector=2, timespan=Timespan(t4, t5), expected=None) 

2136 assertLookup(detector=2, timespan=Timespan(t4, None), expected=bias2b) 

2137 assertLookup(detector=2, timespan=Timespan(t5, None), expected=bias2b) 

2138 assertLookup(detector=3, timespan=Timespan(None, t1), expected=None) 

2139 assertLookup(detector=3, timespan=Timespan(None, t2), expected=bias3a) 

2140 assertLookup(detector=3, timespan=Timespan(None, t3), expected=bias3a) 

2141 assertLookup(detector=3, timespan=Timespan(None, t4), expected=bias3a) 

2142 assertLookup(detector=3, timespan=Timespan(None, t5), expected=bias3a) 

2143 assertLookup(detector=3, timespan=Timespan(None, None), expected=Ambiguous) 

2144 assertLookup(detector=3, timespan=Timespan(t1, t2), expected=bias3a) 

2145 assertLookup(detector=3, timespan=Timespan(t1, t3), expected=bias3a) 

2146 assertLookup(detector=3, timespan=Timespan(t1, t4), expected=bias3a) 

2147 assertLookup(detector=3, timespan=Timespan(t1, t5), expected=bias3a) 

2148 assertLookup(detector=3, timespan=Timespan(t1, None), expected=Ambiguous) 

2149 assertLookup(detector=3, timespan=Timespan(t2, t3), expected=bias3a) 

2150 assertLookup(detector=3, timespan=Timespan(t2, t4), expected=bias3a) 

2151 assertLookup(detector=3, timespan=Timespan(t2, t5), expected=bias3a) 

2152 assertLookup(detector=3, timespan=Timespan(t2, None), expected=Ambiguous) 

2153 assertLookup(detector=3, timespan=Timespan(t3, t4), expected=None) 

2154 assertLookup(detector=3, timespan=Timespan(t3, t5), expected=None) 

2155 assertLookup(detector=3, timespan=Timespan(t3, None), expected=bias3b) 

2156 assertLookup(detector=3, timespan=Timespan(t4, t5), expected=None) 

2157 assertLookup(detector=3, timespan=Timespan(t4, None), expected=bias3b) 

2158 assertLookup(detector=3, timespan=Timespan(t5, None), expected=bias3b) 

2159 

2160 # Decertify everything, this time with explicit data IDs, then check 

2161 # that no lookups succeed. 

2162 registry.decertify( 

2163 collection, 

2164 "bias", 

2165 Timespan(None, None), 

2166 dataIds=[ 

2167 dict(instrument="Cam1", detector=2), 

2168 dict(instrument="Cam1", detector=3), 

2169 ], 

2170 ) 

2171 for detector in (2, 3): 

2172 for timespan in allTimespans: 

2173 assertLookup(detector=detector, timespan=timespan, expected=None) 

2174 # Certify bias2a and bias3a over (-∞, ∞), check that all lookups return 

2175 # those. 

2176 registry.certify( 

2177 collection, 

2178 [bias2a, bias3a], 

2179 Timespan(None, None), 

2180 ) 

2181 for timespan in allTimespans: 

2182 assertLookup(detector=2, timespan=timespan, expected=bias2a) 

2183 assertLookup(detector=3, timespan=timespan, expected=bias3a) 

2184 # Decertify just bias2 over [t2, t4). 

2185 # This should split a single certification row into two (and leave the 

2186 # other existing row, for bias3a, alone). 

2187 registry.decertify( 

2188 collection, "bias", Timespan(t2, t4), dataIds=[dict(instrument="Cam1", detector=2)] 

2189 ) 

2190 for timespan in allTimespans: 

2191 assertLookup(detector=3, timespan=timespan, expected=bias3a) 

2192 overlapsBefore = timespan.overlaps(Timespan(None, t2)) 

2193 overlapsAfter = timespan.overlaps(Timespan(t4, None)) 

2194 if overlapsBefore and overlapsAfter: 

2195 expected = Ambiguous 

2196 elif overlapsBefore or overlapsAfter: 

2197 expected = bias2a 

2198 else: 

2199 expected = None 

2200 assertLookup(detector=2, timespan=timespan, expected=expected) 

2201 

2202 def testSkipCalibs(self): 

2203 """Test how queries handle skipping of calibration collections.""" 

2204 registry = self.makeRegistry() 

2205 self.loadData(registry, "base.yaml") 

2206 self.loadData(registry, "datasets.yaml") 

2207 

2208 coll_calib = "Cam1/calibs/default" 

2209 registry.registerCollection(coll_calib, type=CollectionType.CALIBRATION) 

2210 

2211 # Add all biases to the calibration collection. 

2212 # Without this, the logic that prunes dataset subqueries based on 

2213 # datasetType-collection summary information will fire before the logic 

2214 # we want to test below. This is a good thing (it avoids the dreaded 

2215 # NotImplementedError a bit more often) everywhere but here. 

2216 registry.certify(coll_calib, registry.queryDatasets("bias", collections=...), Timespan(None, None)) 

2217 

2218 coll_list = [coll_calib, "imported_g", "imported_r"] 

2219 chain = "Cam1/chain" 

2220 registry.registerCollection(chain, type=CollectionType.CHAINED) 

2221 registry.setCollectionChain(chain, coll_list) 

2222 

2223 # explicit list will raise if findFirst=True or there are temporal 

2224 # dimensions 

2225 with self.assertRaises(NotImplementedError): 

2226 registry.queryDatasets("bias", collections=coll_list, findFirst=True) 

2227 with self.assertRaises(NotImplementedError): 

2228 registry.queryDataIds( 

2229 ["instrument", "detector", "exposure"], datasets="bias", collections=coll_list 

2230 ).count() 

2231 

2232 # chain will skip 

2233 datasets = list(registry.queryDatasets("bias", collections=chain)) 

2234 self.assertGreater(len(datasets), 0) 

2235 

2236 dataIds = list(registry.queryDataIds(["instrument", "detector"], datasets="bias", collections=chain)) 

2237 self.assertGreater(len(dataIds), 0) 

2238 

2239 # glob will skip too 

2240 datasets = list(registry.queryDatasets("bias", collections="*d*")) 

2241 self.assertGreater(len(datasets), 0) 

2242 

2243 # regular expression will skip too 

2244 pattern = re.compile(".*") 

2245 datasets = list(registry.queryDatasets("bias", collections=pattern)) 

2246 self.assertGreater(len(datasets), 0) 

2247 

2248 # ellipsis should work as usual 

2249 datasets = list(registry.queryDatasets("bias", collections=...)) 

2250 self.assertGreater(len(datasets), 0) 

2251 

2252 # few tests with findFirst 

2253 datasets = list(registry.queryDatasets("bias", collections=chain, findFirst=True)) 

2254 self.assertGreater(len(datasets), 0) 

2255 

2256 def testIngestTimeQuery(self): 

2257 

2258 registry = self.makeRegistry() 

2259 self.loadData(registry, "base.yaml") 

2260 dt0 = datetime.utcnow() 

2261 self.loadData(registry, "datasets.yaml") 

2262 dt1 = datetime.utcnow() 

2263 

2264 datasets = list(registry.queryDatasets(..., collections=...)) 

2265 len0 = len(datasets) 

2266 self.assertGreater(len0, 0) 

2267 

2268 where = "ingest_date > T'2000-01-01'" 

2269 datasets = list(registry.queryDatasets(..., collections=..., where=where)) 

2270 len1 = len(datasets) 

2271 self.assertEqual(len0, len1) 

2272 

2273 # no one will ever use this piece of software in 30 years 

2274 where = "ingest_date > T'2050-01-01'" 

2275 datasets = list(registry.queryDatasets(..., collections=..., where=where)) 

2276 len2 = len(datasets) 

2277 self.assertEqual(len2, 0) 

2278 

2279 # Check more exact timing to make sure there is no 37 seconds offset 

2280 # (after fixing DM-30124). SQLite time precision is 1 second, make 

2281 # sure that we don't test with higher precision. 

2282 tests = [ 

2283 # format: (timestamp, operator, expected_len) 

2284 (dt0 - timedelta(seconds=1), ">", len0), 

2285 (dt0 - timedelta(seconds=1), "<", 0), 

2286 (dt1 + timedelta(seconds=1), "<", len0), 

2287 (dt1 + timedelta(seconds=1), ">", 0), 

2288 ] 

2289 for dt, op, expect_len in tests: 

2290 dt_str = dt.isoformat(sep=" ") 

2291 

2292 where = f"ingest_date {op} T'{dt_str}'" 

2293 datasets = list(registry.queryDatasets(..., collections=..., where=where)) 

2294 self.assertEqual(len(datasets), expect_len) 

2295 

2296 # same with bind using datetime or astropy Time 

2297 where = f"ingest_date {op} ingest_time" 

2298 datasets = list( 

2299 registry.queryDatasets(..., collections=..., where=where, bind={"ingest_time": dt}) 

2300 ) 

2301 self.assertEqual(len(datasets), expect_len) 

2302 

2303 dt_astropy = astropy.time.Time(dt, format="datetime") 

2304 datasets = list( 

2305 registry.queryDatasets(..., collections=..., where=where, bind={"ingest_time": dt_astropy}) 

2306 ) 

2307 self.assertEqual(len(datasets), expect_len) 

2308 

2309 def testTimespanQueries(self): 

2310 """Test query expressions involving timespans.""" 

2311 registry = self.makeRegistry() 

2312 self.loadData(registry, "hsc-rc2-subset.yaml") 

2313 # All exposures in the database; mapping from ID to timespan. 

2314 visits = {record.id: record.timespan for record in registry.queryDimensionRecords("visit")} 

2315 # Just those IDs, sorted (which is also temporal sorting, because HSC 

2316 # exposure IDs are monotonically increasing). 

2317 ids = sorted(visits.keys()) 

2318 self.assertGreater(len(ids), 20) 

2319 # Pick some quasi-random indexes into `ids` to play with. 

2320 i1 = int(len(ids) * 0.1) 

2321 i2 = int(len(ids) * 0.3) 

2322 i3 = int(len(ids) * 0.6) 

2323 i4 = int(len(ids) * 0.8) 

2324 # Extract some times from those: just before the beginning of i1 (which 

2325 # should be after the end of the exposure before), exactly the 

2326 # beginning of i2, just after the beginning of i3 (and before its end), 

2327 # and the exact end of i4. 

2328 t1 = visits[ids[i1]].begin - astropy.time.TimeDelta(1.0, format="sec") 

2329 self.assertGreater(t1, visits[ids[i1 - 1]].end) 

2330 t2 = visits[ids[i2]].begin 

2331 t3 = visits[ids[i3]].begin + astropy.time.TimeDelta(1.0, format="sec") 

2332 self.assertLess(t3, visits[ids[i3]].end) 

2333 t4 = visits[ids[i4]].end 

2334 # Make sure those are actually in order. 

2335 self.assertEqual([t1, t2, t3, t4], sorted([t4, t3, t2, t1])) 

2336 

2337 bind = { 

2338 "t1": t1, 

2339 "t2": t2, 

2340 "t3": t3, 

2341 "t4": t4, 

2342 "ts23": Timespan(t2, t3), 

2343 } 

2344 

2345 def query(where): 

2346 """Helper function that queries for visit data IDs and returns 

2347 results as a sorted, deduplicated list of visit IDs. 

2348 """ 

2349 return sorted( 

2350 { 

2351 dataId["visit"] 

2352 for dataId in registry.queryDataIds("visit", instrument="HSC", bind=bind, where=where) 

2353 } 

2354 ) 

2355 

2356 # Try a bunch of timespan queries, mixing up the bounds themselves, 

2357 # where they appear in the expression, and how we get the timespan into 

2358 # the expression. 

2359 

2360 # t1 is before the start of i1, so this should not include i1. 

2361 self.assertEqual(ids[:i1], query("visit.timespan OVERLAPS (null, t1)")) 

2362 # t2 is exactly at the start of i2, but ends are exclusive, so these 

2363 # should not include i2. 

2364 self.assertEqual(ids[i1:i2], query("(t1, t2) OVERLAPS visit.timespan")) 

2365 self.assertEqual(ids[:i2], query("visit.timespan < (t2, t4)")) 

2366 # t3 is in the middle of i3, so this should include i3. 

2367 self.assertEqual(ids[i2 : i3 + 1], query("visit.timespan OVERLAPS ts23")) 

2368 # This one should not include t3 by the same reasoning. 

2369 self.assertEqual(ids[i3 + 1 :], query("visit.timespan > (t1, t3)")) 

2370 # t4 is exactly at the end of i4, so this should include i4. 

2371 self.assertEqual(ids[i3 : i4 + 1], query(f"visit.timespan OVERLAPS (T'{t3.tai.isot}', t4)")) 

2372 # i4's upper bound of t4 is exclusive so this should not include t4. 

2373 self.assertEqual(ids[i4 + 1 :], query("visit.timespan OVERLAPS (t4, NULL)")) 

2374 

2375 # Now some timespan vs. time scalar queries. 

2376 self.assertEqual(ids[:i2], query("visit.timespan < t2")) 

2377 self.assertEqual(ids[:i2], query("t2 > visit.timespan")) 

2378 self.assertEqual(ids[i3 + 1 :], query("visit.timespan > t3")) 

2379 self.assertEqual(ids[i3 + 1 :], query("t3 < visit.timespan")) 

2380 self.assertEqual(ids[i3 : i3 + 1], query("visit.timespan OVERLAPS t3")) 

2381 self.assertEqual(ids[i3 : i3 + 1], query(f"T'{t3.tai.isot}' OVERLAPS visit.timespan")) 

2382 

2383 # Empty timespans should not overlap anything. 

2384 self.assertEqual([], query("visit.timespan OVERLAPS (t3, t2)")) 

2385 

2386 def testCollectionSummaries(self): 

2387 """Test recording and retrieval of collection summaries.""" 

2388 self.maxDiff = None 

2389 registry = self.makeRegistry() 

2390 # Importing datasets from yaml should go through the code path where 

2391 # we update collection summaries as we insert datasets. 

2392 self.loadData(registry, "base.yaml") 

2393 self.loadData(registry, "datasets.yaml") 

2394 flat = registry.getDatasetType("flat") 

2395 expected1 = CollectionSummary() 

2396 expected1.dataset_types.add(registry.getDatasetType("bias")) 

2397 expected1.add_data_ids( 

2398 flat, [DataCoordinate.standardize(instrument="Cam1", universe=registry.dimensions)] 

2399 ) 

2400 self.assertEqual(registry.getCollectionSummary("imported_g"), expected1) 

2401 self.assertEqual(registry.getCollectionSummary("imported_r"), expected1) 

2402 # Create a chained collection with both of the imported runs; the 

2403 # summary should be the same, because it's a union with itself. 

2404 chain = "chain" 

2405 registry.registerCollection(chain, CollectionType.CHAINED) 

2406 registry.setCollectionChain(chain, ["imported_r", "imported_g"]) 

2407 self.assertEqual(registry.getCollectionSummary(chain), expected1) 

2408 # Associate flats only into a tagged collection and a calibration 

2409 # collection to check summaries of those. 

2410 tag = "tag" 

2411 registry.registerCollection(tag, CollectionType.TAGGED) 

2412 registry.associate(tag, registry.queryDatasets(flat, collections="imported_g")) 

2413 calibs = "calibs" 

2414 registry.registerCollection(calibs, CollectionType.CALIBRATION) 

2415 registry.certify( 

2416 calibs, registry.queryDatasets(flat, collections="imported_g"), timespan=Timespan(None, None) 

2417 ) 

2418 expected2 = expected1.copy() 

2419 expected2.dataset_types.discard("bias") 

2420 self.assertEqual(registry.getCollectionSummary(tag), expected2) 

2421 self.assertEqual(registry.getCollectionSummary(calibs), expected2) 

2422 # Explicitly calling Registry.refresh() should load those same 

2423 # summaries, via a totally different code path. 

2424 registry.refresh() 

2425 self.assertEqual(registry.getCollectionSummary("imported_g"), expected1) 

2426 self.assertEqual(registry.getCollectionSummary("imported_r"), expected1) 

2427 self.assertEqual(registry.getCollectionSummary(tag), expected2) 

2428 self.assertEqual(registry.getCollectionSummary(calibs), expected2) 

2429 

2430 def testBindInQueryDatasets(self): 

2431 """Test that the bind parameter is correctly forwarded in 

2432 queryDatasets recursion. 

2433 """ 

2434 registry = self.makeRegistry() 

2435 # Importing datasets from yaml should go through the code path where 

2436 # we update collection summaries as we insert datasets. 

2437 self.loadData(registry, "base.yaml") 

2438 self.loadData(registry, "datasets.yaml") 

2439 self.assertEqual( 

2440 set(registry.queryDatasets("flat", band="r", collections=...)), 

2441 set(registry.queryDatasets("flat", where="band=my_band", bind={"my_band": "r"}, collections=...)), 

2442 ) 

2443 

2444 def testQueryResultSummaries(self): 

2445 """Test summary methods like `count`, `any`, and `explain_no_results` 

2446 on `DataCoordinateQueryResults` and `DatasetQueryResults` 

2447 """ 

2448 registry = self.makeRegistry() 

2449 self.loadData(registry, "base.yaml") 

2450 self.loadData(registry, "datasets.yaml") 

2451 self.loadData(registry, "spatial.yaml") 

2452 # Default test dataset has two collections, each with both flats and 

2453 # biases. Add a new collection with only biases. 

2454 registry.registerCollection("biases", CollectionType.TAGGED) 

2455 registry.associate("biases", registry.queryDatasets("bias", collections=["imported_g"])) 

2456 # First query yields two results, and involves no postprocessing. 

2457 query1 = registry.queryDataIds(["physical_filter"], band="r") 

2458 self.assertTrue(query1.any(execute=False, exact=False)) 

2459 self.assertTrue(query1.any(execute=True, exact=False)) 

2460 self.assertTrue(query1.any(execute=True, exact=True)) 

2461 self.assertEqual(query1.count(exact=False), 2) 

2462 self.assertEqual(query1.count(exact=True), 2) 

2463 self.assertFalse(list(query1.explain_no_results())) 

2464 # Second query should yield no results, but this isn't detectable 

2465 # unless we actually run a query. 

2466 query2 = registry.queryDataIds(["physical_filter"], band="h") 

2467 self.assertTrue(query2.any(execute=False, exact=False)) 

2468 self.assertFalse(query2.any(execute=True, exact=False)) 

2469 self.assertFalse(query2.any(execute=True, exact=True)) 

2470 self.assertEqual(query2.count(exact=False), 0) 

2471 self.assertEqual(query2.count(exact=True), 0) 

2472 self.assertFalse(list(query2.explain_no_results())) 

2473 # These queries yield no results due to various problems that can be 

2474 # spotted prior to execution, yielding helpful diagnostics. 

2475 base_query = registry.queryDataIds(["detector", "physical_filter"]) 

2476 queries_and_snippets = [ 

2477 ( 

2478 # Dataset type name doesn't match any existing dataset types. 

2479 registry.queryDatasets("nonexistent", collections=...), 

2480 ["nonexistent"], 

2481 ), 

2482 ( 

2483 # Dataset type object isn't registered. 

2484 registry.queryDatasets( 

2485 DatasetType( 

2486 "nonexistent", 

2487 dimensions=["instrument"], 

2488 universe=registry.dimensions, 

2489 storageClass="Image", 

2490 ), 

2491 collections=..., 

2492 ), 

2493 ["nonexistent"], 

2494 ), 

2495 ( 

2496 # No datasets of this type in this collection. 

2497 registry.queryDatasets("flat", collections=["biases"]), 

2498 ["flat", "biases"], 

2499 ), 

2500 ( 

2501 # No datasets of this type in this collection. 

2502 base_query.findDatasets("flat", collections=["biases"]), 

2503 ["flat", "biases"], 

2504 ), 

2505 ( 

2506 # No collections matching at all. 

2507 registry.queryDatasets("flat", collections=re.compile("potato.+")), 

2508 ["potato"], 

2509 ), 

2510 ] 

2511 # The behavior of these additional queries is slated to change in the 

2512 # future, so we also check for deprecation warnings. 

2513 with self.assertWarns(FutureWarning): 

2514 queries_and_snippets.append( 

2515 ( 

2516 # Dataset type name doesn't match any existing dataset 

2517 # types. 

2518 registry.queryDataIds(["detector"], datasets=["nonexistent"], collections=...), 

2519 ["nonexistent"], 

2520 ) 

2521 ) 

2522 with self.assertWarns(FutureWarning): 

2523 queries_and_snippets.append( 

2524 ( 

2525 # Dataset type name doesn't match any existing dataset 

2526 # types. 

2527 registry.queryDimensionRecords("detector", datasets=["nonexistent"], collections=...), 

2528 ["nonexistent"], 

2529 ) 

2530 ) 

2531 for query, snippets in queries_and_snippets: 

2532 self.assertFalse(query.any(execute=False, exact=False)) 

2533 self.assertFalse(query.any(execute=True, exact=False)) 

2534 self.assertFalse(query.any(execute=True, exact=True)) 

2535 self.assertEqual(query.count(exact=False), 0) 

2536 self.assertEqual(query.count(exact=True), 0) 

2537 messages = list(query.explain_no_results()) 

2538 self.assertTrue(messages) 

2539 # Want all expected snippets to appear in at least one message. 

2540 self.assertTrue( 

2541 any( 

2542 all(snippet in message for snippet in snippets) for message in query.explain_no_results() 

2543 ), 

2544 messages, 

2545 ) 

2546 

2547 # This query does yield results, but should also emit a warning because 

2548 # dataset type patterns to queryDataIds is deprecated; just look for 

2549 # the warning. 

2550 with self.assertWarns(FutureWarning): 

2551 registry.queryDataIds(["detector"], datasets=re.compile("^nonexistent$"), collections=...) 

2552 

2553 # These queries yield no results due to problems that can be identified 

2554 # by cheap follow-up queries, yielding helpful diagnostics. 

2555 for query, snippets in [ 

2556 ( 

2557 # No records for one of the involved dimensions. 

2558 registry.queryDataIds(["subfilter"]), 

2559 ["dimension records", "subfilter"], 

2560 ), 

2561 ( 

2562 # No records for one of the involved dimensions. 

2563 registry.queryDimensionRecords("subfilter"), 

2564 ["dimension records", "subfilter"], 

2565 ), 

2566 ]: 

2567 self.assertFalse(query.any(execute=True, exact=False)) 

2568 self.assertFalse(query.any(execute=True, exact=True)) 

2569 self.assertEqual(query.count(exact=True), 0) 

2570 messages = list(query.explain_no_results()) 

2571 self.assertTrue(messages) 

2572 # Want all expected snippets to appear in at least one message. 

2573 self.assertTrue( 

2574 any( 

2575 all(snippet in message for snippet in snippets) for message in query.explain_no_results() 

2576 ), 

2577 messages, 

2578 ) 

2579 

2580 # This query yields four overlaps in the database, but one is filtered 

2581 # out in postprocessing. The count queries aren't accurate because 

2582 # they don't account for duplication that happens due to an internal 

2583 # join against commonSkyPix. 

2584 query3 = registry.queryDataIds(["visit", "tract"], instrument="Cam1", skymap="SkyMap1") 

2585 self.assertEqual( 

2586 { 

2587 DataCoordinate.standardize( 

2588 instrument="Cam1", 

2589 skymap="SkyMap1", 

2590 visit=v, 

2591 tract=t, 

2592 universe=registry.dimensions, 

2593 ) 

2594 for v, t in [(1, 0), (2, 0), (2, 1)] 

2595 }, 

2596 set(query3), 

2597 ) 

2598 self.assertTrue(query3.any(execute=False, exact=False)) 

2599 self.assertTrue(query3.any(execute=True, exact=False)) 

2600 self.assertTrue(query3.any(execute=True, exact=True)) 

2601 self.assertGreaterEqual(query3.count(exact=False), 4) 

2602 self.assertGreaterEqual(query3.count(exact=True), 3) 

2603 self.assertFalse(list(query3.explain_no_results())) 

2604 # This query yields overlaps in the database, but all are filtered 

2605 # out in postprocessing. The count queries again aren't very useful. 

2606 # We have to use `where=` here to avoid an optimization that 

2607 # (currently) skips the spatial postprocess-filtering because it 

2608 # recognizes that no spatial join is necessary. That's not ideal, but 

2609 # fixing it is out of scope for this ticket. 

2610 query4 = registry.queryDataIds( 

2611 ["visit", "tract"], 

2612 instrument="Cam1", 

2613 skymap="SkyMap1", 

2614 where="visit=1 AND detector=1 AND tract=0 AND patch=4", 

2615 ) 

2616 self.assertFalse(set(query4)) 

2617 self.assertTrue(query4.any(execute=False, exact=False)) 

2618 self.assertTrue(query4.any(execute=True, exact=False)) 

2619 self.assertFalse(query4.any(execute=True, exact=True)) 

2620 self.assertGreaterEqual(query4.count(exact=False), 1) 

2621 self.assertEqual(query4.count(exact=True), 0) 

2622 messages = list(query4.explain_no_results()) 

2623 self.assertTrue(messages) 

2624 self.assertTrue(any("regions did not overlap" in message for message in messages)) 

2625 

2626 # And there are cases when queries make empty results but we do not 

2627 # know how to explain that yet (could we just say miracles happen?) 

2628 query5 = registry.queryDimensionRecords( 

2629 "detector", where="detector.purpose = 'no-purpose'", instrument="Cam1" 

2630 ) 

2631 self.assertEqual(query5.count(exact=True), 0) 

2632 messages = list(query5.explain_no_results()) 

2633 self.assertFalse(messages) 

2634 # This query should yield results from one dataset type but not the 

2635 # other, which is not registered. 

2636 query5 = registry.queryDatasets(["bias", "nonexistent"], collections=["biases"]) 

2637 self.assertTrue(set(query5)) 

2638 self.assertTrue(query5.any(execute=False, exact=False)) 

2639 self.assertTrue(query5.any(execute=True, exact=False)) 

2640 self.assertTrue(query5.any(execute=True, exact=True)) 

2641 self.assertGreaterEqual(query5.count(exact=False), 1) 

2642 self.assertGreaterEqual(query5.count(exact=True), 1) 

2643 self.assertFalse(messages, list(query5.explain_no_results())) 

2644 

2645 def testQueryDataIdsOrderBy(self): 

2646 """Test order_by and limit on result returned by queryDataIds().""" 

2647 registry = self.makeRegistry() 

2648 self.loadData(registry, "base.yaml") 

2649 self.loadData(registry, "datasets.yaml") 

2650 self.loadData(registry, "spatial.yaml") 

2651 

2652 def do_query(dimensions=("visit", "tract"), datasets=None, collections=None): 

2653 return registry.queryDataIds( 

2654 dimensions, datasets=datasets, collections=collections, instrument="Cam1", skymap="SkyMap1" 

2655 ) 

2656 

2657 Test = namedtuple( 

2658 "testQueryDataIdsOrderByTest", 

2659 ("order_by", "keys", "result", "limit", "datasets", "collections"), 

2660 defaults=(None, None, None), 

2661 ) 

2662 

2663 test_data = ( 

2664 Test("tract,visit", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))), 

2665 Test("-tract,visit", "tract,visit", ((1, 2), (1, 2), (0, 1), (0, 1), (0, 2), (0, 2))), 

2666 Test("tract,-visit", "tract,visit", ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2))), 

2667 Test("-tract,-visit", "tract,visit", ((1, 2), (1, 2), (0, 2), (0, 2), (0, 1), (0, 1))), 

2668 Test( 

2669 "tract.id,visit.id", 

2670 "tract,visit", 

2671 ((0, 1), (0, 1), (0, 2)), 

2672 limit=(3,), 

2673 ), 

2674 Test("-tract,-visit", "tract,visit", ((1, 2), (1, 2), (0, 2)), limit=(3,)), 

2675 Test("tract,visit", "tract,visit", ((0, 2), (1, 2), (1, 2)), limit=(3, 3)), 

2676 Test("-tract,-visit", "tract,visit", ((0, 1),), limit=(3, 5)), 

2677 Test( 

2678 "tract,visit.exposure_time", "tract,visit", ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2)) 

2679 ), 

2680 Test( 

2681 "-tract,-visit.exposure_time", "tract,visit", ((1, 2), (1, 2), (0, 1), (0, 1), (0, 2), (0, 2)) 

2682 ), 

2683 Test("tract,-exposure_time", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))), 

2684 Test("tract,visit.name", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))), 

2685 Test( 

2686 "tract,-timespan.begin,timespan.end", 

2687 "tract,visit", 

2688 ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2)), 

2689 ), 

2690 Test("visit.day_obs,exposure.day_obs", "visit,exposure", ()), 

2691 Test("visit.timespan.begin,-exposure.timespan.begin", "visit,exposure", ()), 

2692 Test( 

2693 "tract,detector", 

2694 "tract,detector", 

2695 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)), 

2696 datasets="flat", 

2697 collections="imported_r", 

2698 ), 

2699 Test( 

2700 "tract,detector.full_name", 

2701 "tract,detector", 

2702 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)), 

2703 datasets="flat", 

2704 collections="imported_r", 

2705 ), 

2706 Test( 

2707 "tract,detector.raft,detector.name_in_raft", 

2708 "tract,detector", 

2709 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)), 

2710 datasets="flat", 

2711 collections="imported_r", 

2712 ), 

2713 ) 

2714 

2715 for test in test_data: 

2716 order_by = test.order_by.split(",") 

2717 keys = test.keys.split(",") 

2718 query = do_query(keys, test.datasets, test.collections).order_by(*order_by) 

2719 if test.limit is not None: 

2720 query = query.limit(*test.limit) 

2721 dataIds = tuple(tuple(dataId[k] for k in keys) for dataId in query) 

2722 self.assertEqual(dataIds, test.result) 

2723 

2724 # and materialize 

2725 query = do_query(keys).order_by(*order_by) 

2726 if test.limit is not None: 

2727 query = query.limit(*test.limit) 

2728 with query.materialize() as materialized: 

2729 dataIds = tuple(tuple(dataId[k] for k in keys) for dataId in materialized) 

2730 self.assertEqual(dataIds, test.result) 

2731 

2732 # errors in a name 

2733 for order_by in ("", "-"): 

2734 with self.assertRaisesRegex(ValueError, "Empty dimension name in ORDER BY"): 

2735 list(do_query().order_by(order_by)) 

2736 

2737 for order_by in ("undimension.name", "-undimension.name"): 

2738 with self.assertRaisesRegex(ValueError, "Unknown dimension element name 'undimension'"): 

2739 list(do_query().order_by(order_by)) 

2740 

2741 for order_by in ("attract", "-attract"): 

2742 with self.assertRaisesRegex(ValueError, "Metadata 'attract' cannot be found in any dimension"): 

2743 list(do_query().order_by(order_by)) 

2744 

2745 with self.assertRaisesRegex(ValueError, "Metadata 'exposure_time' exists in more than one dimension"): 

2746 list(do_query(("exposure", "visit")).order_by("exposure_time")) 

2747 

2748 with self.assertRaisesRegex(ValueError, "Timespan exists in more than one dimesion"): 

2749 list(do_query(("exposure", "visit")).order_by("timespan.begin")) 

2750 

2751 with self.assertRaisesRegex( 

2752 ValueError, "Cannot find any temporal dimension element for 'timespan.begin'" 

2753 ): 

2754 list(do_query(("tract")).order_by("timespan.begin")) 

2755 

2756 with self.assertRaisesRegex(ValueError, "Cannot use 'timespan.begin' with non-temporal element"): 

2757 list(do_query(("tract")).order_by("tract.timespan.begin")) 

2758 

2759 with self.assertRaisesRegex(ValueError, "Field 'name' does not exist in 'tract'."): 

2760 list(do_query(("tract")).order_by("tract.name")) 

2761 

2762 def testQueryDataIdsGovernorExceptions(self): 

2763 """Test exceptions raised by queryDataIds() for incorrect governors.""" 

2764 registry = self.makeRegistry() 

2765 self.loadData(registry, "base.yaml") 

2766 self.loadData(registry, "datasets.yaml") 

2767 self.loadData(registry, "spatial.yaml") 

2768 

2769 def do_query(dimensions, dataId=None, where=None, bind=None, **kwargs): 

2770 return registry.queryDataIds(dimensions, dataId=dataId, where=where, bind=bind, **kwargs) 

2771 

2772 Test = namedtuple( 

2773 "testQueryDataIdExceptionsTest", 

2774 ("dimensions", "dataId", "where", "bind", "kwargs", "exception", "count"), 

2775 defaults=(None, None, None, {}, None, 0), 

2776 ) 

2777 

2778 test_data = ( 

2779 Test("tract,visit", count=6), 

2780 Test("tract,visit", kwargs={"instrument": "Cam1", "skymap": "SkyMap1"}, count=6), 

2781 Test( 

2782 "tract,visit", kwargs={"instrument": "Cam2", "skymap": "SkyMap1"}, exception=DataIdValueError 

2783 ), 

2784 Test("tract,visit", dataId={"instrument": "Cam1", "skymap": "SkyMap1"}, count=6), 

2785 Test( 

2786 "tract,visit", dataId={"instrument": "Cam1", "skymap": "SkyMap2"}, exception=DataIdValueError 

2787 ), 

2788 Test("tract,visit", where="instrument='Cam1' AND skymap='SkyMap1'", count=6), 

2789 Test("tract,visit", where="instrument='Cam1' AND skymap='SkyMap5'", exception=DataIdValueError), 

2790 Test( 

2791 "tract,visit", 

2792 where="instrument=cam AND skymap=map", 

2793 bind={"cam": "Cam1", "map": "SkyMap1"}, 

2794 count=6, 

2795 ), 

2796 Test( 

2797 "tract,visit", 

2798 where="instrument=cam AND skymap=map", 

2799 bind={"cam": "Cam", "map": "SkyMap"}, 

2800 exception=DataIdValueError, 

2801 ), 

2802 ) 

2803 

2804 for test in test_data: 

2805 dimensions = test.dimensions.split(",") 

2806 if test.exception: 

2807 with self.assertRaises(test.exception): 

2808 do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs).count() 

2809 else: 

2810 query = do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs) 

2811 self.assertEqual(query.count(), test.count) 

2812 

2813 # and materialize 

2814 if test.exception: 

2815 with self.assertRaises(test.exception): 

2816 query = do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs) 

2817 with query.materialize() as materialized: 

2818 materialized.count() 

2819 else: 

2820 query = do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs) 

2821 with query.materialize() as materialized: 

2822 self.assertEqual(materialized.count(), test.count) 

2823 

2824 def testQueryDimensionRecordsOrderBy(self): 

2825 """Test order_by and limit on result returned by 

2826 queryDimensionRecords(). 

2827 """ 

2828 registry = self.makeRegistry() 

2829 self.loadData(registry, "base.yaml") 

2830 self.loadData(registry, "datasets.yaml") 

2831 self.loadData(registry, "spatial.yaml") 

2832 

2833 def do_query(element, datasets=None, collections=None): 

2834 return registry.queryDimensionRecords( 

2835 element, instrument="Cam1", datasets=datasets, collections=collections 

2836 ) 

2837 

2838 query = do_query("detector") 

2839 self.assertEqual(len(list(query)), 4) 

2840 

2841 Test = namedtuple( 

2842 "testQueryDataIdsOrderByTest", 

2843 ("element", "order_by", "result", "limit", "datasets", "collections"), 

2844 defaults=(None, None, None), 

2845 ) 

2846 

2847 test_data = ( 

2848 Test("detector", "detector", (1, 2, 3, 4)), 

2849 Test("detector", "-detector", (4, 3, 2, 1)), 

2850 Test("detector", "raft,-name_in_raft", (2, 1, 4, 3)), 

2851 Test("detector", "-detector.purpose", (4,), limit=(1,)), 

2852 Test("detector", "-purpose,detector.raft,name_in_raft", (2, 3), limit=(2, 2)), 

2853 Test("visit", "visit", (1, 2)), 

2854 Test("visit", "-visit.id", (2, 1)), 

2855 Test("visit", "zenith_angle", (1, 2)), 

2856 Test("visit", "-visit.name", (2, 1)), 

2857 Test("visit", "day_obs,-timespan.begin", (2, 1)), 

2858 ) 

2859 

2860 for test in test_data: 

2861 order_by = test.order_by.split(",") 

2862 query = do_query(test.element).order_by(*order_by) 

2863 if test.limit is not None: 

2864 query = query.limit(*test.limit) 

2865 dataIds = tuple(rec.id for rec in query) 

2866 self.assertEqual(dataIds, test.result) 

2867 

2868 # errors in a name 

2869 for order_by in ("", "-"): 

2870 with self.assertRaisesRegex(ValueError, "Empty dimension name in ORDER BY"): 

2871 list(do_query("detector").order_by(order_by)) 

2872 

2873 for order_by in ("undimension.name", "-undimension.name"): 

2874 with self.assertRaisesRegex(ValueError, "Element name mismatch: 'undimension'"): 

2875 list(do_query("detector").order_by(order_by)) 

2876 

2877 for order_by in ("attract", "-attract"): 

2878 with self.assertRaisesRegex(ValueError, "Field 'attract' does not exist in 'detector'."): 

2879 list(do_query("detector").order_by(order_by)) 

2880 

2881 def testQueryDimensionRecordsExceptions(self): 

2882 """Test exceptions raised by queryDimensionRecords().""" 

2883 registry = self.makeRegistry() 

2884 self.loadData(registry, "base.yaml") 

2885 self.loadData(registry, "datasets.yaml") 

2886 self.loadData(registry, "spatial.yaml") 

2887 

2888 result = registry.queryDimensionRecords("detector") 

2889 self.assertEqual(result.count(), 4) 

2890 result = registry.queryDimensionRecords("detector", instrument="Cam1") 

2891 self.assertEqual(result.count(), 4) 

2892 result = registry.queryDimensionRecords("detector", dataId={"instrument": "Cam1"}) 

2893 self.assertEqual(result.count(), 4) 

2894 result = registry.queryDimensionRecords("detector", where="instrument='Cam1'") 

2895 self.assertEqual(result.count(), 4) 

2896 result = registry.queryDimensionRecords("detector", where="instrument=instr", bind={"instr": "Cam1"}) 

2897 self.assertEqual(result.count(), 4) 

2898 

2899 with self.assertRaisesRegex(DataIdValueError, "dimension instrument"): 

2900 result = registry.queryDimensionRecords("detector", instrument="NotCam1") 

2901 result.count() 

2902 

2903 with self.assertRaisesRegex(DataIdValueError, "dimension instrument"): 

2904 result = registry.queryDimensionRecords("detector", dataId={"instrument": "NotCam1"}) 

2905 result.count() 

2906 

2907 with self.assertRaisesRegex(DataIdValueError, "Unknown values specified for governor dimension"): 

2908 result = registry.queryDimensionRecords("detector", where="instrument='NotCam1'") 

2909 result.count() 

2910 

2911 with self.assertRaisesRegex(DataIdValueError, "Unknown values specified for governor dimension"): 

2912 result = registry.queryDimensionRecords( 

2913 "detector", where="instrument=instr", bind={"instr": "NotCam1"} 

2914 ) 

2915 result.count() 

2916 

2917 def testDatasetConstrainedDimensionRecordQueries(self): 

2918 """Test that queryDimensionRecords works even when given a dataset 

2919 constraint whose dimensions extend beyond the requested dimension 

2920 element's. 

2921 """ 

2922 registry = self.makeRegistry() 

2923 self.loadData(registry, "base.yaml") 

2924 self.loadData(registry, "datasets.yaml") 

2925 # Query for physical_filter dimension records, using a dataset that 

2926 # has both physical_filter and dataset dimensions. 

2927 records = registry.queryDimensionRecords( 

2928 "physical_filter", 

2929 datasets=["flat"], 

2930 collections="imported_r", 

2931 ) 

2932 self.assertEqual({record.name for record in records}, {"Cam1-R1", "Cam1-R2"}) 

2933 # Trying to constrain by all dataset types is an error. 

2934 with self.assertRaises(TypeError): 

2935 list(registry.queryDimensionRecords("physical_filter", datasets=..., collections="imported_r")) 

2936 

2937 def testSkyPixDatasetQueries(self): 

2938 """Test that we can build queries involving skypix dimensions as long 

2939 as a dataset type that uses those dimensions is included. 

2940 """ 

2941 registry = self.makeRegistry() 

2942 self.loadData(registry, "base.yaml") 

2943 dataset_type = DatasetType( 

2944 "a", dimensions=["htm7", "instrument"], universe=registry.dimensions, storageClass="int" 

2945 ) 

2946 registry.registerDatasetType(dataset_type) 

2947 run = "r" 

2948 registry.registerRun(run) 

2949 # First try queries where there are no datasets; the concern is whether 

2950 # we can even build and execute these queries without raising, even 

2951 # when "doomed" query shortcuts are in play. 

2952 self.assertFalse( 

2953 list(registry.queryDataIds(["htm7", "instrument"], datasets=dataset_type, collections=run)) 

2954 ) 

2955 self.assertFalse(list(registry.queryDatasets(dataset_type, collections=run))) 

2956 # Now add a dataset and see that we can get it back. 

2957 htm7 = registry.dimensions.skypix["htm"][7].pixelization 

2958 data_id = registry.expandDataId(instrument="Cam1", htm7=htm7.universe()[0][0]) 

2959 (ref,) = registry.insertDatasets(dataset_type, [data_id], run=run) 

2960 self.assertEqual( 

2961 set(registry.queryDataIds(["htm7", "instrument"], datasets=dataset_type, collections=run)), 

2962 {data_id}, 

2963 ) 

2964 self.assertEqual(set(registry.queryDatasets(dataset_type, collections=run)), {ref}) 

2965 

2966 def testDatasetIdFactory(self): 

2967 """Simple test for DatasetIdFactory, mostly to catch potential changes 

2968 in its API. 

2969 """ 

2970 registry = self.makeRegistry() 

2971 factory = registry.datasetIdFactory 

2972 dataset_type = DatasetType( 

2973 "datasetType", 

2974 dimensions=["detector", "instrument"], 

2975 universe=registry.dimensions, 

2976 storageClass="int", 

2977 ) 

2978 run = "run" 

2979 data_id = DataCoordinate.standardize(instrument="Cam1", detector=1, graph=dataset_type.dimensions) 

2980 

2981 datasetId = factory.makeDatasetId(run, dataset_type, data_id, DatasetIdGenEnum.UNIQUE) 

2982 self.assertIsInstance(datasetId, uuid.UUID) 

2983 self.assertEqual(datasetId.version, 4) 

2984 

2985 datasetId = factory.makeDatasetId(run, dataset_type, data_id, DatasetIdGenEnum.DATAID_TYPE) 

2986 self.assertIsInstance(datasetId, uuid.UUID) 

2987 self.assertEqual(datasetId.version, 5) 

2988 

2989 datasetId = factory.makeDatasetId(run, dataset_type, data_id, DatasetIdGenEnum.DATAID_TYPE_RUN) 

2990 self.assertIsInstance(datasetId, uuid.UUID) 

2991 self.assertEqual(datasetId.version, 5)