Coverage for python/lsst/daf/butler/registry/tests/_registry.py: 4%

1345 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2022-10-07 09:47 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = ["RegistryTests"] 

24 

25import itertools 

26import logging 

27import os 

28import re 

29import unittest 

30import uuid 

31from abc import ABC, abstractmethod 

32from collections import defaultdict, namedtuple 

33from datetime import datetime, timedelta 

34from typing import TYPE_CHECKING, Iterator, Optional, Type, Union 

35 

36import astropy.time 

37import sqlalchemy 

38 

39try: 

40 import numpy as np 

41except ImportError: 

42 np = None 

43 

44import lsst.sphgeom 

45 

46from ...core import ( 

47 DataCoordinate, 

48 DataCoordinateSet, 

49 DatasetAssociation, 

50 DatasetRef, 

51 DatasetType, 

52 DimensionGraph, 

53 NamedValueSet, 

54 StorageClass, 

55 Timespan, 

56 ddl, 

57) 

58from .._collection_summary import CollectionSummary 

59from .._collectionType import CollectionType 

60from .._config import RegistryConfig 

61from .._exceptions import ( 

62 ArgumentError, 

63 CollectionError, 

64 CollectionTypeError, 

65 ConflictingDefinitionError, 

66 DataIdValueError, 

67 DatasetTypeError, 

68 InconsistentDataIdError, 

69 MissingCollectionError, 

70 MissingDatasetTypeError, 

71 OrphanedRecordError, 

72) 

73from ..interfaces import ButlerAttributeExistsError, DatasetIdGenEnum 

74 

75if TYPE_CHECKING: 75 ↛ 76line 75 didn't jump to line 76, because the condition on line 75 was never true

76 from .._registry import Registry 

77 

78 

79class RegistryTests(ABC): 

80 """Generic tests for the `Registry` class that can be subclassed to 

81 generate tests for different configurations. 

82 """ 

83 

84 collectionsManager: Optional[str] = None 

85 """Name of the collections manager class, if subclass provides value for 

86 this member then it overrides name specified in default configuration 

87 (`str`). 

88 """ 

89 

90 datasetsManager: Optional[str] = None 

91 """Name of the datasets manager class, if subclass provides value for 

92 this member then it overrides name specified in default configuration 

93 (`str`). 

94 """ 

95 

96 @classmethod 

97 @abstractmethod 

98 def getDataDir(cls) -> str: 

99 """Return the root directory containing test data YAML files.""" 

100 raise NotImplementedError() 

101 

102 def makeRegistryConfig(self) -> RegistryConfig: 

103 """Create RegistryConfig used to create a registry. 

104 

105 This method should be called by a subclass from `makeRegistry`. 

106 Returned instance will be pre-configured based on the values of class 

107 members, and default-configured for all other parameters. Subclasses 

108 that need default configuration should just instantiate 

109 `RegistryConfig` directly. 

110 """ 

111 config = RegistryConfig() 

112 if self.collectionsManager: 

113 config["managers", "collections"] = self.collectionsManager 

114 if self.datasetsManager: 

115 config["managers", "datasets"] = self.datasetsManager 

116 return config 

117 

118 @abstractmethod 

119 def makeRegistry(self, share_repo_with: Optional[Registry] = None) -> Optional[Registry]: 

120 """Return the Registry instance to be tested. 

121 

122 Parameters 

123 ---------- 

124 share_repo_with : `Registry`, optional 

125 If provided, the new registry should point to the same data 

126 repository as this existing registry. 

127 

128 Returns 

129 ------- 

130 registry : `Registry` 

131 New `Registry` instance, or `None` *only* if `share_repo_with` is 

132 not `None` and this test case does not support that argument 

133 (e.g. it is impossible with in-memory SQLite DBs). 

134 """ 

135 raise NotImplementedError() 

136 

137 def loadData(self, registry: Registry, filename: str): 

138 """Load registry test data from ``getDataDir/<filename>``, 

139 which should be a YAML import/export file. 

140 """ 

141 from ...transfers import YamlRepoImportBackend 

142 

143 with open(os.path.join(self.getDataDir(), filename), "r") as stream: 

144 backend = YamlRepoImportBackend(stream, registry) 

145 backend.register() 

146 backend.load(datastore=None) 

147 

148 def checkQueryResults(self, results, expected): 

149 """Check that a query results object contains expected values. 

150 

151 Parameters 

152 ---------- 

153 results : `DataCoordinateQueryResults` or `DatasetQueryResults` 

154 A lazy-evaluation query results object. 

155 expected : `list` 

156 A list of `DataCoordinate` o `DatasetRef` objects that should be 

157 equal to results of the query, aside from ordering. 

158 """ 

159 self.assertCountEqual(list(results), expected) 

160 self.assertEqual(results.count(), len(expected)) 

161 if expected: 

162 self.assertTrue(results.any()) 

163 else: 

164 self.assertFalse(results.any()) 

165 

166 def testOpaque(self): 

167 """Tests for `Registry.registerOpaqueTable`, 

168 `Registry.insertOpaqueData`, `Registry.fetchOpaqueData`, and 

169 `Registry.deleteOpaqueData`. 

170 """ 

171 registry = self.makeRegistry() 

172 table = "opaque_table_for_testing" 

173 registry.registerOpaqueTable( 

174 table, 

175 spec=ddl.TableSpec( 

176 fields=[ 

177 ddl.FieldSpec("id", dtype=sqlalchemy.BigInteger, primaryKey=True), 

178 ddl.FieldSpec("name", dtype=sqlalchemy.String, length=16, nullable=False), 

179 ddl.FieldSpec("count", dtype=sqlalchemy.SmallInteger, nullable=True), 

180 ], 

181 ), 

182 ) 

183 rows = [ 

184 {"id": 1, "name": "one", "count": None}, 

185 {"id": 2, "name": "two", "count": 5}, 

186 {"id": 3, "name": "three", "count": 6}, 

187 ] 

188 registry.insertOpaqueData(table, *rows) 

189 self.assertCountEqual(rows, list(registry.fetchOpaqueData(table))) 

190 self.assertEqual(rows[0:1], list(registry.fetchOpaqueData(table, id=1))) 

191 self.assertEqual(rows[1:2], list(registry.fetchOpaqueData(table, name="two"))) 

192 self.assertEqual(rows[0:1], list(registry.fetchOpaqueData(table, id=(1, 3), name=("one", "two")))) 

193 self.assertEqual(rows, list(registry.fetchOpaqueData(table, id=(1, 2, 3)))) 

194 # Test very long IN clause which exceeds sqlite limit on number of 

195 # parameters. SQLite says the limit is 32k but it looks like it is 

196 # much higher. 

197 self.assertEqual(rows, list(registry.fetchOpaqueData(table, id=list(range(300_000))))) 

198 # Two IN clauses, each longer than 1k batch size, first with 

199 # duplicates, second has matching elements in different batches (after 

200 # sorting). 

201 self.assertEqual( 

202 rows[0:2], 

203 list( 

204 registry.fetchOpaqueData( 

205 table, 

206 id=list(range(1000)) + list(range(100, 0, -1)), 

207 name=["one"] + [f"q{i}" for i in range(2200)] + ["two"], 

208 ) 

209 ), 

210 ) 

211 self.assertEqual([], list(registry.fetchOpaqueData(table, id=1, name="two"))) 

212 registry.deleteOpaqueData(table, id=3) 

213 self.assertCountEqual(rows[:2], list(registry.fetchOpaqueData(table))) 

214 registry.deleteOpaqueData(table) 

215 self.assertEqual([], list(registry.fetchOpaqueData(table))) 

216 

217 def testDatasetType(self): 

218 """Tests for `Registry.registerDatasetType` and 

219 `Registry.getDatasetType`. 

220 """ 

221 registry = self.makeRegistry() 

222 # Check valid insert 

223 datasetTypeName = "test" 

224 storageClass = StorageClass("testDatasetType") 

225 registry.storageClasses.registerStorageClass(storageClass) 

226 dimensions = registry.dimensions.extract(("instrument", "visit")) 

227 differentDimensions = registry.dimensions.extract(("instrument", "patch")) 

228 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

229 # Inserting for the first time should return True 

230 self.assertTrue(registry.registerDatasetType(inDatasetType)) 

231 outDatasetType1 = registry.getDatasetType(datasetTypeName) 

232 self.assertEqual(outDatasetType1, inDatasetType) 

233 

234 # Re-inserting should work 

235 self.assertFalse(registry.registerDatasetType(inDatasetType)) 

236 # Except when they are not identical 

237 with self.assertRaises(ConflictingDefinitionError): 

238 nonIdenticalDatasetType = DatasetType(datasetTypeName, differentDimensions, storageClass) 

239 registry.registerDatasetType(nonIdenticalDatasetType) 

240 

241 # Template can be None 

242 datasetTypeName = "testNoneTemplate" 

243 storageClass = StorageClass("testDatasetType2") 

244 registry.storageClasses.registerStorageClass(storageClass) 

245 dimensions = registry.dimensions.extract(("instrument", "visit")) 

246 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

247 registry.registerDatasetType(inDatasetType) 

248 outDatasetType2 = registry.getDatasetType(datasetTypeName) 

249 self.assertEqual(outDatasetType2, inDatasetType) 

250 

251 allTypes = set(registry.queryDatasetTypes()) 

252 self.assertEqual(allTypes, {outDatasetType1, outDatasetType2}) 

253 

254 def testDimensions(self): 

255 """Tests for `Registry.insertDimensionData`, 

256 `Registry.syncDimensionData`, and `Registry.expandDataId`. 

257 """ 

258 registry = self.makeRegistry() 

259 dimensionName = "instrument" 

260 dimension = registry.dimensions[dimensionName] 

261 dimensionValue = { 

262 "name": "DummyCam", 

263 "visit_max": 10, 

264 "visit_system": 0, 

265 "exposure_max": 10, 

266 "detector_max": 2, 

267 "class_name": "lsst.pipe.base.Instrument", 

268 } 

269 registry.insertDimensionData(dimensionName, dimensionValue) 

270 # Inserting the same value twice should fail 

271 with self.assertRaises(sqlalchemy.exc.IntegrityError): 

272 registry.insertDimensionData(dimensionName, dimensionValue) 

273 # expandDataId should retrieve the record we just inserted 

274 self.assertEqual( 

275 registry.expandDataId(instrument="DummyCam", graph=dimension.graph) 

276 .records[dimensionName] 

277 .toDict(), 

278 dimensionValue, 

279 ) 

280 # expandDataId should raise if there is no record with the given ID. 

281 with self.assertRaises(DataIdValueError): 

282 registry.expandDataId({"instrument": "Unknown"}, graph=dimension.graph) 

283 # band doesn't have a table; insert should fail. 

284 with self.assertRaises(TypeError): 

285 registry.insertDimensionData("band", {"band": "i"}) 

286 dimensionName2 = "physical_filter" 

287 dimension2 = registry.dimensions[dimensionName2] 

288 dimensionValue2 = {"name": "DummyCam_i", "band": "i"} 

289 # Missing required dependency ("instrument") should fail 

290 with self.assertRaises(KeyError): 

291 registry.insertDimensionData(dimensionName2, dimensionValue2) 

292 # Adding required dependency should fix the failure 

293 dimensionValue2["instrument"] = "DummyCam" 

294 registry.insertDimensionData(dimensionName2, dimensionValue2) 

295 # expandDataId should retrieve the record we just inserted. 

296 self.assertEqual( 

297 registry.expandDataId(instrument="DummyCam", physical_filter="DummyCam_i", graph=dimension2.graph) 

298 .records[dimensionName2] 

299 .toDict(), 

300 dimensionValue2, 

301 ) 

302 # Use syncDimensionData to insert a new record successfully. 

303 dimensionName3 = "detector" 

304 dimensionValue3 = { 

305 "instrument": "DummyCam", 

306 "id": 1, 

307 "full_name": "one", 

308 "name_in_raft": "zero", 

309 "purpose": "SCIENCE", 

310 } 

311 self.assertTrue(registry.syncDimensionData(dimensionName3, dimensionValue3)) 

312 # Sync that again. Note that one field ("raft") is NULL, and that 

313 # should be okay. 

314 self.assertFalse(registry.syncDimensionData(dimensionName3, dimensionValue3)) 

315 # Now try that sync with the same primary key but a different value. 

316 # This should fail. 

317 with self.assertRaises(ConflictingDefinitionError): 

318 registry.syncDimensionData( 

319 dimensionName3, 

320 { 

321 "instrument": "DummyCam", 

322 "id": 1, 

323 "full_name": "one", 

324 "name_in_raft": "four", 

325 "purpose": "SCIENCE", 

326 }, 

327 ) 

328 

329 @unittest.skipIf(np is None, "numpy not available.") 

330 def testNumpyDataId(self): 

331 """Test that we can use a numpy int in a dataId.""" 

332 registry = self.makeRegistry() 

333 dimensionEntries = [ 

334 ("instrument", {"instrument": "DummyCam"}), 

335 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}), 

336 # Using an np.int64 here fails unless Records.fromDict is also 

337 # patched to look for numbers.Integral 

338 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}), 

339 ] 

340 for args in dimensionEntries: 

341 registry.insertDimensionData(*args) 

342 

343 # Try a normal integer and something that looks like an int but 

344 # is not. 

345 for visit_id in (42, np.int64(42)): 

346 with self.subTest(visit_id=visit_id, id_type=type(visit_id).__name__): 

347 expanded = registry.expandDataId({"instrument": "DummyCam", "visit": visit_id}) 

348 self.assertEqual(expanded["visit"], int(visit_id)) 

349 self.assertIsInstance(expanded["visit"], int) 

350 

351 def testDataIdRelationships(self): 

352 """Test that `Registry.expandDataId` raises an exception when the given 

353 keys are inconsistent. 

354 """ 

355 registry = self.makeRegistry() 

356 self.loadData(registry, "base.yaml") 

357 # Insert a few more dimension records for the next test. 

358 registry.insertDimensionData( 

359 "exposure", 

360 {"instrument": "Cam1", "id": 1, "obs_id": "one", "physical_filter": "Cam1-G"}, 

361 ) 

362 registry.insertDimensionData( 

363 "exposure", 

364 {"instrument": "Cam1", "id": 2, "obs_id": "two", "physical_filter": "Cam1-G"}, 

365 ) 

366 registry.insertDimensionData( 

367 "visit_system", 

368 {"instrument": "Cam1", "id": 0, "name": "one-to-one"}, 

369 ) 

370 registry.insertDimensionData( 

371 "visit", 

372 {"instrument": "Cam1", "id": 1, "name": "one", "physical_filter": "Cam1-G", "visit_system": 0}, 

373 ) 

374 registry.insertDimensionData( 

375 "visit_definition", 

376 {"instrument": "Cam1", "visit": 1, "exposure": 1, "visit_system": 0}, 

377 ) 

378 with self.assertRaises(InconsistentDataIdError): 

379 registry.expandDataId( 

380 {"instrument": "Cam1", "visit": 1, "exposure": 2}, 

381 ) 

382 

383 def testDataset(self): 

384 """Basic tests for `Registry.insertDatasets`, `Registry.getDataset`, 

385 and `Registry.removeDatasets`. 

386 """ 

387 registry = self.makeRegistry() 

388 self.loadData(registry, "base.yaml") 

389 run = "tésτ" 

390 registry.registerRun(run) 

391 datasetType = registry.getDatasetType("bias") 

392 dataId = {"instrument": "Cam1", "detector": 2} 

393 (ref,) = registry.insertDatasets(datasetType, dataIds=[dataId], run=run) 

394 outRef = registry.getDataset(ref.id) 

395 self.assertIsNotNone(ref.id) 

396 self.assertEqual(ref, outRef) 

397 with self.assertRaises(ConflictingDefinitionError): 

398 registry.insertDatasets(datasetType, dataIds=[dataId], run=run) 

399 registry.removeDatasets([ref]) 

400 self.assertIsNone(registry.findDataset(datasetType, dataId, collections=[run])) 

401 

402 def testFindDataset(self): 

403 """Tests for `Registry.findDataset`.""" 

404 registry = self.makeRegistry() 

405 self.loadData(registry, "base.yaml") 

406 run = "tésτ" 

407 datasetType = registry.getDatasetType("bias") 

408 dataId = {"instrument": "Cam1", "detector": 4} 

409 registry.registerRun(run) 

410 (inputRef,) = registry.insertDatasets(datasetType, dataIds=[dataId], run=run) 

411 outputRef = registry.findDataset(datasetType, dataId, collections=[run]) 

412 self.assertEqual(outputRef, inputRef) 

413 # Check that retrieval with invalid dataId raises 

414 with self.assertRaises(LookupError): 

415 dataId = {"instrument": "Cam1"} # no detector 

416 registry.findDataset(datasetType, dataId, collections=run) 

417 # Check that different dataIds match to different datasets 

418 dataId1 = {"instrument": "Cam1", "detector": 1} 

419 (inputRef1,) = registry.insertDatasets(datasetType, dataIds=[dataId1], run=run) 

420 dataId2 = {"instrument": "Cam1", "detector": 2} 

421 (inputRef2,) = registry.insertDatasets(datasetType, dataIds=[dataId2], run=run) 

422 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef1) 

423 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef2) 

424 self.assertNotEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef2) 

425 self.assertNotEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef1) 

426 # Check that requesting a non-existing dataId returns None 

427 nonExistingDataId = {"instrument": "Cam1", "detector": 3} 

428 self.assertIsNone(registry.findDataset(datasetType, nonExistingDataId, collections=run)) 

429 

430 def testRemoveDatasetTypeSuccess(self): 

431 """Test that Registry.removeDatasetType works when there are no 

432 datasets of that type present. 

433 """ 

434 registry = self.makeRegistry() 

435 self.loadData(registry, "base.yaml") 

436 registry.removeDatasetType("flat") 

437 with self.assertRaises(MissingDatasetTypeError): 

438 registry.getDatasetType("flat") 

439 

440 def testRemoveDatasetTypeFailure(self): 

441 """Test that Registry.removeDatasetType raises when there are datasets 

442 of that type present or if the dataset type is for a component. 

443 """ 

444 registry = self.makeRegistry() 

445 self.loadData(registry, "base.yaml") 

446 self.loadData(registry, "datasets.yaml") 

447 with self.assertRaises(OrphanedRecordError): 

448 registry.removeDatasetType("flat") 

449 with self.assertRaises(ValueError): 

450 registry.removeDatasetType(DatasetType.nameWithComponent("flat", "image")) 

451 

452 def testImportDatasetsUUID(self): 

453 """Test for `Registry._importDatasets` with UUID dataset ID.""" 

454 if not self.datasetsManager.endswith(".ByDimensionsDatasetRecordStorageManagerUUID"): 

455 self.skipTest(f"Unexpected dataset manager {self.datasetsManager}") 

456 

457 registry = self.makeRegistry() 

458 self.loadData(registry, "base.yaml") 

459 for run in range(6): 

460 registry.registerRun(f"run{run}") 

461 datasetTypeBias = registry.getDatasetType("bias") 

462 datasetTypeFlat = registry.getDatasetType("flat") 

463 dataIdBias1 = {"instrument": "Cam1", "detector": 1} 

464 dataIdBias2 = {"instrument": "Cam1", "detector": 2} 

465 dataIdFlat1 = {"instrument": "Cam1", "detector": 1, "physical_filter": "Cam1-G", "band": "g"} 

466 

467 dataset_id = uuid.uuid4() 

468 ref = DatasetRef(datasetTypeBias, dataIdBias1, id=dataset_id, run="run0") 

469 (ref1,) = registry._importDatasets([ref]) 

470 # UUID is used without change 

471 self.assertEqual(ref.id, ref1.id) 

472 

473 # All different failure modes 

474 refs = ( 

475 # Importing same DatasetRef with different dataset ID is an error 

476 DatasetRef(datasetTypeBias, dataIdBias1, id=uuid.uuid4(), run="run0"), 

477 # Same DatasetId but different DataId 

478 DatasetRef(datasetTypeBias, dataIdBias2, id=ref1.id, run="run0"), 

479 DatasetRef(datasetTypeFlat, dataIdFlat1, id=ref1.id, run="run0"), 

480 # Same DatasetRef and DatasetId but different run 

481 DatasetRef(datasetTypeBias, dataIdBias1, id=ref1.id, run="run1"), 

482 ) 

483 for ref in refs: 

484 with self.assertRaises(ConflictingDefinitionError): 

485 registry._importDatasets([ref]) 

486 

487 # Test for non-unique IDs, they can be re-imported multiple times. 

488 for run, idGenMode in ((2, DatasetIdGenEnum.DATAID_TYPE), (4, DatasetIdGenEnum.DATAID_TYPE_RUN)): 

489 with self.subTest(idGenMode=idGenMode): 

490 

491 # Use integer dataset ID to force UUID calculation in _import 

492 ref = DatasetRef(datasetTypeBias, dataIdBias1, id=0, run=f"run{run}") 

493 (ref1,) = registry._importDatasets([ref], idGenerationMode=idGenMode) 

494 self.assertIsInstance(ref1.id, uuid.UUID) 

495 self.assertEqual(ref1.id.version, 5) 

496 

497 # Importing it again is OK 

498 (ref2,) = registry._importDatasets([ref1]) 

499 self.assertEqual(ref2.id, ref1.id) 

500 

501 # Cannot import to different run with the same ID 

502 ref = DatasetRef(datasetTypeBias, dataIdBias1, id=ref1.id, run=f"run{run+1}") 

503 with self.assertRaises(ConflictingDefinitionError): 

504 registry._importDatasets([ref]) 

505 

506 ref = DatasetRef(datasetTypeBias, dataIdBias1, id=0, run=f"run{run+1}") 

507 if idGenMode is DatasetIdGenEnum.DATAID_TYPE: 

508 # Cannot import same DATAID_TYPE ref into a new run 

509 with self.assertRaises(ConflictingDefinitionError): 

510 (ref2,) = registry._importDatasets([ref], idGenerationMode=idGenMode) 

511 else: 

512 # DATAID_TYPE_RUN ref can be imported into a new run 

513 (ref2,) = registry._importDatasets([ref], idGenerationMode=idGenMode) 

514 

515 def testImportDatasetsInt(self): 

516 """Test for `Registry._importDatasets` with integer dataset ID.""" 

517 if not self.datasetsManager.endswith(".ByDimensionsDatasetRecordStorageManager"): 

518 self.skipTest(f"Unexpected dataset manager {self.datasetsManager}") 

519 

520 registry = self.makeRegistry() 

521 self.loadData(registry, "base.yaml") 

522 run = "tésτ" 

523 registry.registerRun(run) 

524 datasetTypeBias = registry.getDatasetType("bias") 

525 datasetTypeFlat = registry.getDatasetType("flat") 

526 dataIdBias1 = {"instrument": "Cam1", "detector": 1} 

527 dataIdBias2 = {"instrument": "Cam1", "detector": 2} 

528 dataIdFlat1 = {"instrument": "Cam1", "detector": 1, "physical_filter": "Cam1-G", "band": "g"} 

529 dataset_id = 999999999 

530 

531 ref = DatasetRef(datasetTypeBias, dataIdBias1, id=dataset_id, run=run) 

532 (ref1,) = registry._importDatasets([ref]) 

533 # Should make new integer ID. 

534 self.assertNotEqual(ref1.id, ref.id) 

535 

536 # Ingesting same dataId with different dataset ID is an error 

537 ref2 = ref1.unresolved().resolved(dataset_id, run=run) 

538 with self.assertRaises(ConflictingDefinitionError): 

539 registry._importDatasets([ref2]) 

540 

541 # Ingesting different dataId with the same dataset ID should work 

542 ref3 = DatasetRef(datasetTypeBias, dataIdBias2, id=ref1.id, run=run) 

543 (ref4,) = registry._importDatasets([ref3]) 

544 self.assertNotEqual(ref4.id, ref1.id) 

545 

546 ref3 = DatasetRef(datasetTypeFlat, dataIdFlat1, id=ref1.id, run=run) 

547 (ref4,) = registry._importDatasets([ref3]) 

548 self.assertNotEqual(ref4.id, ref1.id) 

549 

550 def testDatasetTypeComponentQueries(self): 

551 """Test component options when querying for dataset types. 

552 

553 All of the behavior here is deprecated, so many of these tests are 

554 currently wrapped in a context to check that we get a warning whenever 

555 a component dataset is actually returned. 

556 """ 

557 registry = self.makeRegistry() 

558 self.loadData(registry, "base.yaml") 

559 self.loadData(registry, "datasets.yaml") 

560 # Test querying for dataset types with different inputs. 

561 # First query for all dataset types; components should only be included 

562 # when components=True. 

563 self.assertEqual({"bias", "flat"}, NamedValueSet(registry.queryDatasetTypes()).names) 

564 self.assertEqual({"bias", "flat"}, NamedValueSet(registry.queryDatasetTypes(components=False)).names) 

565 with self.assertWarns(FutureWarning): 

566 self.assertLess( 

567 {"bias", "flat", "bias.wcs", "flat.photoCalib"}, 

568 NamedValueSet(registry.queryDatasetTypes(components=True)).names, 

569 ) 

570 # Use a pattern that can match either parent or components. Again, 

571 # components are only returned if components=True. 

572 self.assertEqual({"bias"}, NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"))).names) 

573 self.assertEqual( 

574 {"bias"}, NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"), components=False)).names 

575 ) 

576 with self.assertWarns(FutureWarning): 

577 self.assertLess( 

578 {"bias", "bias.wcs"}, 

579 NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"), components=True)).names, 

580 ) 

581 # This pattern matches only a component. In this case we also return 

582 # that component dataset type if components=None. 

583 with self.assertWarns(FutureWarning): 

584 self.assertEqual( 

585 {"bias.wcs"}, NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"))).names 

586 ) 

587 self.assertEqual( 

588 set(), 

589 NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"), components=False)).names, 

590 ) 

591 with self.assertWarns(FutureWarning): 

592 self.assertEqual( 

593 {"bias.wcs"}, 

594 NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"), components=True)).names, 

595 ) 

596 # Add a dataset type using a StorageClass that we'll then remove; check 

597 # that this does not affect our ability to query for dataset types 

598 # (though it will warn). 

599 tempStorageClass = StorageClass( 

600 name="TempStorageClass", 

601 components={ 

602 "data1": registry.storageClasses.getStorageClass("StructuredDataDict"), 

603 "data2": registry.storageClasses.getStorageClass("StructuredDataDict"), 

604 }, 

605 ) 

606 registry.storageClasses.registerStorageClass(tempStorageClass) 

607 datasetType = DatasetType( 

608 "temporary", 

609 dimensions=["instrument"], 

610 storageClass=tempStorageClass, 

611 universe=registry.dimensions, 

612 ) 

613 registry.registerDatasetType(datasetType) 

614 registry.storageClasses._unregisterStorageClass(tempStorageClass.name) 

615 datasetType._storageClass = None 

616 del tempStorageClass 

617 # Querying for all dataset types, including components, should include 

618 # at least all non-component dataset types (and I don't want to 

619 # enumerate all of the Exposure components for bias and flat here). 

620 with self.assertWarns(FutureWarning): 

621 with self.assertLogs("lsst.daf.butler.registry", logging.WARN) as cm: 

622 everything = NamedValueSet(registry.queryDatasetTypes(components=True)) 

623 self.assertIn("TempStorageClass", cm.output[0]) 

624 self.assertLess({"bias", "flat", "temporary"}, everything.names) 

625 # It should not include "temporary.columns", because we tried to remove 

626 # the storage class that would tell it about that. So if the next line 

627 # fails (i.e. "temporary.columns" _is_ in everything.names), it means 

628 # this part of the test isn't doing anything, because the _unregister 

629 # call about isn't simulating the real-life case we want it to 

630 # simulate, in which different versions of daf_butler in entirely 

631 # different Python processes interact with the same repo. 

632 self.assertNotIn("temporary.data", everything.names) 

633 # Query for dataset types that start with "temp". This should again 

634 # not include the component, and also not fail. 

635 with self.assertLogs("lsst.daf.butler.registry", logging.WARN) as cm: 

636 startsWithTemp = NamedValueSet(registry.queryDatasetTypes(re.compile("temp.*"), components=True)) 

637 self.assertIn("TempStorageClass", cm.output[0]) 

638 self.assertEqual({"temporary"}, startsWithTemp.names) 

639 # Querying with no components should not warn at all. 

640 with self.assertLogs("lsst.daf.butler.registries", logging.WARN) as cm: 

641 startsWithTemp = NamedValueSet(registry.queryDatasetTypes(re.compile("temp.*"), components=False)) 

642 # Must issue a warning of our own to be captured. 

643 logging.getLogger("lsst.daf.butler.registries").warning("test message") 

644 self.assertEqual(len(cm.output), 1) 

645 self.assertIn("test message", cm.output[0]) 

646 

647 def testComponentLookups(self): 

648 """Test searching for component datasets via their parents. 

649 

650 All of the behavior here is deprecated, so many of these tests are 

651 currently wrapped in a context to check that we get a warning whenever 

652 a component dataset is actually returned. 

653 """ 

654 registry = self.makeRegistry() 

655 self.loadData(registry, "base.yaml") 

656 self.loadData(registry, "datasets.yaml") 

657 # Test getting the child dataset type (which does still exist in the 

658 # Registry), and check for consistency with 

659 # DatasetRef.makeComponentRef. 

660 collection = "imported_g" 

661 parentType = registry.getDatasetType("bias") 

662 childType = registry.getDatasetType("bias.wcs") 

663 parentRefResolved = registry.findDataset( 

664 parentType, collections=collection, instrument="Cam1", detector=1 

665 ) 

666 self.assertIsInstance(parentRefResolved, DatasetRef) 

667 self.assertEqual(childType, parentRefResolved.makeComponentRef("wcs").datasetType) 

668 # Search for a single dataset with findDataset. 

669 childRef1 = registry.findDataset("bias.wcs", collections=collection, dataId=parentRefResolved.dataId) 

670 self.assertEqual(childRef1, parentRefResolved.makeComponentRef("wcs")) 

671 # Search for detector data IDs constrained by component dataset 

672 # existence with queryDataIds. 

673 with self.assertWarns(FutureWarning): 

674 dataIds = registry.queryDataIds( 

675 ["detector"], 

676 datasets=["bias.wcs"], 

677 collections=collection, 

678 ).toSet() 

679 self.assertEqual( 

680 dataIds, 

681 DataCoordinateSet( 

682 { 

683 DataCoordinate.standardize(instrument="Cam1", detector=d, graph=parentType.dimensions) 

684 for d in (1, 2, 3) 

685 }, 

686 parentType.dimensions, 

687 ), 

688 ) 

689 # Search for multiple datasets of a single type with queryDatasets. 

690 with self.assertWarns(FutureWarning): 

691 childRefs2 = set( 

692 registry.queryDatasets( 

693 "bias.wcs", 

694 collections=collection, 

695 ) 

696 ) 

697 self.assertEqual( 

698 {ref.unresolved() for ref in childRefs2}, {DatasetRef(childType, dataId) for dataId in dataIds} 

699 ) 

700 

701 def testCollections(self): 

702 """Tests for registry methods that manage collections.""" 

703 registry = self.makeRegistry() 

704 other_registry = self.makeRegistry(share_repo_with=registry) 

705 self.loadData(registry, "base.yaml") 

706 self.loadData(registry, "datasets.yaml") 

707 run1 = "imported_g" 

708 run2 = "imported_r" 

709 # Test setting a collection docstring after it has been created. 

710 registry.setCollectionDocumentation(run1, "doc for run1") 

711 self.assertEqual(registry.getCollectionDocumentation(run1), "doc for run1") 

712 registry.setCollectionDocumentation(run1, None) 

713 self.assertIsNone(registry.getCollectionDocumentation(run1)) 

714 datasetType = "bias" 

715 # Find some datasets via their run's collection. 

716 dataId1 = {"instrument": "Cam1", "detector": 1} 

717 ref1 = registry.findDataset(datasetType, dataId1, collections=run1) 

718 self.assertIsNotNone(ref1) 

719 dataId2 = {"instrument": "Cam1", "detector": 2} 

720 ref2 = registry.findDataset(datasetType, dataId2, collections=run1) 

721 self.assertIsNotNone(ref2) 

722 # Associate those into a new collection, then look for them there. 

723 tag1 = "tag1" 

724 registry.registerCollection(tag1, type=CollectionType.TAGGED, doc="doc for tag1") 

725 # Check that we can query for old and new collections by type. 

726 self.assertEqual(set(registry.queryCollections(collectionTypes=CollectionType.RUN)), {run1, run2}) 

727 self.assertEqual( 

728 set(registry.queryCollections(collectionTypes={CollectionType.TAGGED, CollectionType.RUN})), 

729 {tag1, run1, run2}, 

730 ) 

731 self.assertEqual(registry.getCollectionDocumentation(tag1), "doc for tag1") 

732 registry.associate(tag1, [ref1, ref2]) 

733 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1) 

734 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

735 # Disassociate one and verify that we can't it there anymore... 

736 registry.disassociate(tag1, [ref1]) 

737 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=tag1)) 

738 # ...but we can still find ref2 in tag1, and ref1 in the run. 

739 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run1), ref1) 

740 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

741 collections = set(registry.queryCollections()) 

742 self.assertEqual(collections, {run1, run2, tag1}) 

743 # Associate both refs into tag1 again; ref2 is already there, but that 

744 # should be a harmless no-op. 

745 registry.associate(tag1, [ref1, ref2]) 

746 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1) 

747 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

748 # Get a different dataset (from a different run) that has the same 

749 # dataset type and data ID as ref2. 

750 ref2b = registry.findDataset(datasetType, dataId2, collections=run2) 

751 self.assertNotEqual(ref2, ref2b) 

752 # Attempting to associate that into tag1 should be an error. 

753 with self.assertRaises(ConflictingDefinitionError): 

754 registry.associate(tag1, [ref2b]) 

755 # That error shouldn't have messed up what we had before. 

756 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1) 

757 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

758 # Attempt to associate the conflicting dataset again, this time with 

759 # a dataset that isn't in the collection and won't cause a conflict. 

760 # Should also fail without modifying anything. 

761 dataId3 = {"instrument": "Cam1", "detector": 3} 

762 ref3 = registry.findDataset(datasetType, dataId3, collections=run1) 

763 with self.assertRaises(ConflictingDefinitionError): 

764 registry.associate(tag1, [ref3, ref2b]) 

765 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1) 

766 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

767 self.assertIsNone(registry.findDataset(datasetType, dataId3, collections=tag1)) 

768 # Register a chained collection that searches [tag1, run2] 

769 chain1 = "chain1" 

770 registry.registerCollection(chain1, type=CollectionType.CHAINED) 

771 self.assertIs(registry.getCollectionType(chain1), CollectionType.CHAINED) 

772 # Chained collection exists, but has no collections in it. 

773 self.assertFalse(registry.getCollectionChain(chain1)) 

774 # If we query for all collections, we should get the chained collection 

775 # only if we don't ask to flatten it (i.e. yield only its children). 

776 self.assertEqual(set(registry.queryCollections(flattenChains=False)), {tag1, run1, run2, chain1}) 

777 self.assertEqual(set(registry.queryCollections(flattenChains=True)), {tag1, run1, run2}) 

778 # Attempt to set its child collections to something circular; that 

779 # should fail. 

780 with self.assertRaises(ValueError): 

781 registry.setCollectionChain(chain1, [tag1, chain1]) 

782 # Add the child collections. 

783 registry.setCollectionChain(chain1, [tag1, run2]) 

784 self.assertEqual(list(registry.getCollectionChain(chain1)), [tag1, run2]) 

785 self.assertEqual(registry.getCollectionParentChains(tag1), {chain1}) 

786 self.assertEqual(registry.getCollectionParentChains(run2), {chain1}) 

787 # Refresh the other registry that points to the same repo, and make 

788 # sure it can see the things we've done (note that this does require 

789 # an explicit refresh(); that's the documented behavior, because 

790 # caching is ~impossible otherwise). 

791 if other_registry is not None: 

792 other_registry.refresh() 

793 self.assertEqual(list(other_registry.getCollectionChain(chain1)), [tag1, run2]) 

794 self.assertEqual(other_registry.getCollectionParentChains(tag1), {chain1}) 

795 self.assertEqual(other_registry.getCollectionParentChains(run2), {chain1}) 

796 # Searching for dataId1 or dataId2 in the chain should return ref1 and 

797 # ref2, because both are in tag1. 

798 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain1), ref1) 

799 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain1), ref2) 

800 # Now disassociate ref2 from tag1. The search (for bias) with 

801 # dataId2 in chain1 should then: 

802 # 1. not find it in tag1 

803 # 2. find a different dataset in run2 

804 registry.disassociate(tag1, [ref2]) 

805 ref2b = registry.findDataset(datasetType, dataId2, collections=chain1) 

806 self.assertNotEqual(ref2b, ref2) 

807 self.assertEqual(ref2b, registry.findDataset(datasetType, dataId2, collections=run2)) 

808 # Define a new chain so we can test recursive chains. 

809 chain2 = "chain2" 

810 registry.registerCollection(chain2, type=CollectionType.CHAINED) 

811 registry.setCollectionChain(chain2, [run2, chain1]) 

812 self.assertEqual(registry.getCollectionParentChains(chain1), {chain2}) 

813 self.assertEqual(registry.getCollectionParentChains(run2), {chain1, chain2}) 

814 # Query for collections matching a regex. 

815 self.assertCountEqual( 

816 list(registry.queryCollections(re.compile("imported_."), flattenChains=False)), 

817 ["imported_r", "imported_g"], 

818 ) 

819 # Query for collections matching a regex or an explicit str. 

820 self.assertCountEqual( 

821 list(registry.queryCollections([re.compile("imported_."), "chain1"], flattenChains=False)), 

822 ["imported_r", "imported_g", "chain1"], 

823 ) 

824 # Search for bias with dataId1 should find it via tag1 in chain2, 

825 # recursing, because is not in run1. 

826 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=run2)) 

827 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain2), ref1) 

828 # Search for bias with dataId2 should find it in run2 (ref2b). 

829 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain2), ref2b) 

830 # Search for a flat that is in run2. That should not be found 

831 # at the front of chain2, because of the restriction to bias 

832 # on run2 there, but it should be found in at the end of chain1. 

833 dataId4 = {"instrument": "Cam1", "detector": 3, "physical_filter": "Cam1-R2"} 

834 ref4 = registry.findDataset("flat", dataId4, collections=run2) 

835 self.assertIsNotNone(ref4) 

836 self.assertEqual(ref4, registry.findDataset("flat", dataId4, collections=chain2)) 

837 # Deleting a collection that's part of a CHAINED collection is not 

838 # allowed, and is exception-safe. 

839 with self.assertRaises(Exception): 

840 registry.removeCollection(run2) 

841 self.assertEqual(registry.getCollectionType(run2), CollectionType.RUN) 

842 with self.assertRaises(Exception): 

843 registry.removeCollection(chain1) 

844 self.assertEqual(registry.getCollectionType(chain1), CollectionType.CHAINED) 

845 # Actually remove chain2, test that it's gone by asking for its type. 

846 registry.removeCollection(chain2) 

847 with self.assertRaises(MissingCollectionError): 

848 registry.getCollectionType(chain2) 

849 # Actually remove run2 and chain1, which should work now. 

850 registry.removeCollection(chain1) 

851 registry.removeCollection(run2) 

852 with self.assertRaises(MissingCollectionError): 

853 registry.getCollectionType(run2) 

854 with self.assertRaises(MissingCollectionError): 

855 registry.getCollectionType(chain1) 

856 # Remove tag1 as well, just to test that we can remove TAGGED 

857 # collections. 

858 registry.removeCollection(tag1) 

859 with self.assertRaises(MissingCollectionError): 

860 registry.getCollectionType(tag1) 

861 

862 def testCollectionChainFlatten(self): 

863 """Test that Registry.setCollectionChain obeys its 'flatten' option.""" 

864 registry = self.makeRegistry() 

865 registry.registerCollection("inner", CollectionType.CHAINED) 

866 registry.registerCollection("innermost", CollectionType.RUN) 

867 registry.setCollectionChain("inner", ["innermost"]) 

868 registry.registerCollection("outer", CollectionType.CHAINED) 

869 registry.setCollectionChain("outer", ["inner"], flatten=False) 

870 self.assertEqual(list(registry.getCollectionChain("outer")), ["inner"]) 

871 registry.setCollectionChain("outer", ["inner"], flatten=True) 

872 self.assertEqual(list(registry.getCollectionChain("outer")), ["innermost"]) 

873 

874 def testBasicTransaction(self): 

875 """Test that all operations within a single transaction block are 

876 rolled back if an exception propagates out of the block. 

877 """ 

878 registry = self.makeRegistry() 

879 storageClass = StorageClass("testDatasetType") 

880 registry.storageClasses.registerStorageClass(storageClass) 

881 with registry.transaction(): 

882 registry.insertDimensionData("instrument", {"name": "Cam1", "class_name": "A"}) 

883 with self.assertRaises(ValueError): 

884 with registry.transaction(): 

885 registry.insertDimensionData("instrument", {"name": "Cam2"}) 

886 raise ValueError("Oops, something went wrong") 

887 # Cam1 should exist 

888 self.assertEqual(registry.expandDataId(instrument="Cam1").records["instrument"].class_name, "A") 

889 # But Cam2 and Cam3 should both not exist 

890 with self.assertRaises(DataIdValueError): 

891 registry.expandDataId(instrument="Cam2") 

892 with self.assertRaises(DataIdValueError): 

893 registry.expandDataId(instrument="Cam3") 

894 

895 def testNestedTransaction(self): 

896 """Test that operations within a transaction block are not rolled back 

897 if an exception propagates out of an inner transaction block and is 

898 then caught. 

899 """ 

900 registry = self.makeRegistry() 

901 dimension = registry.dimensions["instrument"] 

902 dataId1 = {"instrument": "DummyCam"} 

903 dataId2 = {"instrument": "DummyCam2"} 

904 checkpointReached = False 

905 with registry.transaction(): 

906 # This should be added and (ultimately) committed. 

907 registry.insertDimensionData(dimension, dataId1) 

908 with self.assertRaises(sqlalchemy.exc.IntegrityError): 

909 with registry.transaction(savepoint=True): 

910 # This does not conflict, and should succeed (but not 

911 # be committed). 

912 registry.insertDimensionData(dimension, dataId2) 

913 checkpointReached = True 

914 # This should conflict and raise, triggerring a rollback 

915 # of the previous insertion within the same transaction 

916 # context, but not the original insertion in the outer 

917 # block. 

918 registry.insertDimensionData(dimension, dataId1) 

919 self.assertTrue(checkpointReached) 

920 self.assertIsNotNone(registry.expandDataId(dataId1, graph=dimension.graph)) 

921 with self.assertRaises(DataIdValueError): 

922 registry.expandDataId(dataId2, graph=dimension.graph) 

923 

924 def testInstrumentDimensions(self): 

925 """Test queries involving only instrument dimensions, with no joins to 

926 skymap.""" 

927 registry = self.makeRegistry() 

928 

929 # need a bunch of dimensions and datasets for test 

930 registry.insertDimensionData( 

931 "instrument", dict(name="DummyCam", visit_max=25, exposure_max=300, detector_max=6) 

932 ) 

933 registry.insertDimensionData( 

934 "physical_filter", 

935 dict(instrument="DummyCam", name="dummy_r", band="r"), 

936 dict(instrument="DummyCam", name="dummy_i", band="i"), 

937 ) 

938 registry.insertDimensionData( 

939 "detector", *[dict(instrument="DummyCam", id=i, full_name=str(i)) for i in range(1, 6)] 

940 ) 

941 registry.insertDimensionData( 

942 "visit_system", 

943 dict(instrument="DummyCam", id=1, name="default"), 

944 ) 

945 registry.insertDimensionData( 

946 "visit", 

947 dict(instrument="DummyCam", id=10, name="ten", physical_filter="dummy_i", visit_system=1), 

948 dict(instrument="DummyCam", id=11, name="eleven", physical_filter="dummy_r", visit_system=1), 

949 dict(instrument="DummyCam", id=20, name="twelve", physical_filter="dummy_r", visit_system=1), 

950 ) 

951 registry.insertDimensionData( 

952 "exposure", 

953 dict(instrument="DummyCam", id=100, obs_id="100", physical_filter="dummy_i"), 

954 dict(instrument="DummyCam", id=101, obs_id="101", physical_filter="dummy_i"), 

955 dict(instrument="DummyCam", id=110, obs_id="110", physical_filter="dummy_r"), 

956 dict(instrument="DummyCam", id=111, obs_id="111", physical_filter="dummy_r"), 

957 dict(instrument="DummyCam", id=200, obs_id="200", physical_filter="dummy_r"), 

958 dict(instrument="DummyCam", id=201, obs_id="201", physical_filter="dummy_r"), 

959 ) 

960 registry.insertDimensionData( 

961 "visit_definition", 

962 dict(instrument="DummyCam", exposure=100, visit_system=1, visit=10), 

963 dict(instrument="DummyCam", exposure=101, visit_system=1, visit=10), 

964 dict(instrument="DummyCam", exposure=110, visit_system=1, visit=11), 

965 dict(instrument="DummyCam", exposure=111, visit_system=1, visit=11), 

966 dict(instrument="DummyCam", exposure=200, visit_system=1, visit=20), 

967 dict(instrument="DummyCam", exposure=201, visit_system=1, visit=20), 

968 ) 

969 # dataset types 

970 run1 = "test1_r" 

971 run2 = "test2_r" 

972 tagged2 = "test2_t" 

973 registry.registerRun(run1) 

974 registry.registerRun(run2) 

975 registry.registerCollection(tagged2) 

976 storageClass = StorageClass("testDataset") 

977 registry.storageClasses.registerStorageClass(storageClass) 

978 rawType = DatasetType( 

979 name="RAW", 

980 dimensions=registry.dimensions.extract(("instrument", "exposure", "detector")), 

981 storageClass=storageClass, 

982 ) 

983 registry.registerDatasetType(rawType) 

984 calexpType = DatasetType( 

985 name="CALEXP", 

986 dimensions=registry.dimensions.extract(("instrument", "visit", "detector")), 

987 storageClass=storageClass, 

988 ) 

989 registry.registerDatasetType(calexpType) 

990 

991 # add pre-existing datasets 

992 for exposure in (100, 101, 110, 111): 

993 for detector in (1, 2, 3): 

994 # note that only 3 of 5 detectors have datasets 

995 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector) 

996 (ref,) = registry.insertDatasets(rawType, dataIds=[dataId], run=run1) 

997 # exposures 100 and 101 appear in both run1 and tagged2. 

998 # 100 has different datasets in the different collections 

999 # 101 has the same dataset in both collections. 

1000 if exposure == 100: 

1001 (ref,) = registry.insertDatasets(rawType, dataIds=[dataId], run=run2) 

1002 if exposure in (100, 101): 

1003 registry.associate(tagged2, [ref]) 

1004 # Add pre-existing datasets to tagged2. 

1005 for exposure in (200, 201): 

1006 for detector in (3, 4, 5): 

1007 # note that only 3 of 5 detectors have datasets 

1008 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector) 

1009 (ref,) = registry.insertDatasets(rawType, dataIds=[dataId], run=run2) 

1010 registry.associate(tagged2, [ref]) 

1011 

1012 dimensions = DimensionGraph( 

1013 registry.dimensions, dimensions=(rawType.dimensions.required | calexpType.dimensions.required) 

1014 ) 

1015 # Test that single dim string works as well as list of str 

1016 rows = registry.queryDataIds("visit", datasets=rawType, collections=run1).expanded().toSet() 

1017 rowsI = registry.queryDataIds(["visit"], datasets=rawType, collections=run1).expanded().toSet() 

1018 self.assertEqual(rows, rowsI) 

1019 # with empty expression 

1020 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1).expanded().toSet() 

1021 self.assertEqual(len(rows), 4 * 3) # 4 exposures times 3 detectors 

1022 for dataId in rows: 

1023 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit")) 

1024 packer1 = registry.dimensions.makePacker("visit_detector", dataId) 

1025 packer2 = registry.dimensions.makePacker("exposure_detector", dataId) 

1026 self.assertEqual( 

1027 packer1.unpack(packer1.pack(dataId)), 

1028 DataCoordinate.standardize(dataId, graph=packer1.dimensions), 

1029 ) 

1030 self.assertEqual( 

1031 packer2.unpack(packer2.pack(dataId)), 

1032 DataCoordinate.standardize(dataId, graph=packer2.dimensions), 

1033 ) 

1034 self.assertNotEqual(packer1.pack(dataId), packer2.pack(dataId)) 

1035 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101, 110, 111)) 

1036 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 11)) 

1037 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3)) 

1038 

1039 # second collection 

1040 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=tagged2).toSet() 

1041 self.assertEqual(len(rows), 4 * 3) # 4 exposures times 3 detectors 

1042 for dataId in rows: 

1043 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit")) 

1044 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101, 200, 201)) 

1045 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 20)) 

1046 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3, 4, 5)) 

1047 

1048 # with two input datasets 

1049 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=[run1, tagged2]).toSet() 

1050 self.assertEqual(len(set(rows)), 6 * 3) # 6 exposures times 3 detectors; set needed to de-dupe 

1051 for dataId in rows: 

1052 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit")) 

1053 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101, 110, 111, 200, 201)) 

1054 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 11, 20)) 

1055 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3, 4, 5)) 

1056 

1057 # limit to single visit 

1058 rows = registry.queryDataIds( 

1059 dimensions, datasets=rawType, collections=run1, where="visit = 10", instrument="DummyCam" 

1060 ).toSet() 

1061 self.assertEqual(len(rows), 2 * 3) # 2 exposures times 3 detectors 

1062 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101)) 

1063 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10,)) 

1064 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3)) 

1065 

1066 # more limiting expression, using link names instead of Table.column 

1067 rows = registry.queryDataIds( 

1068 dimensions, 

1069 datasets=rawType, 

1070 collections=run1, 

1071 where="visit = 10 and detector > 1 and 'DummyCam'=instrument", 

1072 ).toSet() 

1073 self.assertEqual(len(rows), 2 * 2) # 2 exposures times 2 detectors 

1074 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101)) 

1075 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10,)) 

1076 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (2, 3)) 

1077 

1078 # queryDataIds with only one of `datasets` and `collections` is an 

1079 # error. 

1080 with self.assertRaises(CollectionError): 

1081 registry.queryDataIds(dimensions, datasets=rawType) 

1082 with self.assertRaises(ArgumentError): 

1083 registry.queryDataIds(dimensions, collections=run1) 

1084 

1085 # expression excludes everything 

1086 rows = registry.queryDataIds( 

1087 dimensions, datasets=rawType, collections=run1, where="visit > 1000", instrument="DummyCam" 

1088 ).toSet() 

1089 self.assertEqual(len(rows), 0) 

1090 

1091 # Selecting by physical_filter, this is not in the dimensions, but it 

1092 # is a part of the full expression so it should work too. 

1093 rows = registry.queryDataIds( 

1094 dimensions, 

1095 datasets=rawType, 

1096 collections=run1, 

1097 where="physical_filter = 'dummy_r'", 

1098 instrument="DummyCam", 

1099 ).toSet() 

1100 self.assertEqual(len(rows), 2 * 3) # 2 exposures times 3 detectors 

1101 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (110, 111)) 

1102 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (11,)) 

1103 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3)) 

1104 

1105 def testSkyMapDimensions(self): 

1106 """Tests involving only skymap dimensions, no joins to instrument.""" 

1107 registry = self.makeRegistry() 

1108 

1109 # need a bunch of dimensions and datasets for test, we want 

1110 # "band" in the test so also have to add physical_filter 

1111 # dimensions 

1112 registry.insertDimensionData("instrument", dict(instrument="DummyCam")) 

1113 registry.insertDimensionData( 

1114 "physical_filter", 

1115 dict(instrument="DummyCam", name="dummy_r", band="r"), 

1116 dict(instrument="DummyCam", name="dummy_i", band="i"), 

1117 ) 

1118 registry.insertDimensionData("skymap", dict(name="DummyMap", hash="sha!".encode("utf8"))) 

1119 for tract in range(10): 

1120 registry.insertDimensionData("tract", dict(skymap="DummyMap", id=tract)) 

1121 registry.insertDimensionData( 

1122 "patch", 

1123 *[dict(skymap="DummyMap", tract=tract, id=patch, cell_x=0, cell_y=0) for patch in range(10)], 

1124 ) 

1125 

1126 # dataset types 

1127 run = "tésτ" 

1128 registry.registerRun(run) 

1129 storageClass = StorageClass("testDataset") 

1130 registry.storageClasses.registerStorageClass(storageClass) 

1131 calexpType = DatasetType( 

1132 name="deepCoadd_calexp", 

1133 dimensions=registry.dimensions.extract(("skymap", "tract", "patch", "band")), 

1134 storageClass=storageClass, 

1135 ) 

1136 registry.registerDatasetType(calexpType) 

1137 mergeType = DatasetType( 

1138 name="deepCoadd_mergeDet", 

1139 dimensions=registry.dimensions.extract(("skymap", "tract", "patch")), 

1140 storageClass=storageClass, 

1141 ) 

1142 registry.registerDatasetType(mergeType) 

1143 measType = DatasetType( 

1144 name="deepCoadd_meas", 

1145 dimensions=registry.dimensions.extract(("skymap", "tract", "patch", "band")), 

1146 storageClass=storageClass, 

1147 ) 

1148 registry.registerDatasetType(measType) 

1149 

1150 dimensions = DimensionGraph( 

1151 registry.dimensions, 

1152 dimensions=( 

1153 calexpType.dimensions.required | mergeType.dimensions.required | measType.dimensions.required 

1154 ), 

1155 ) 

1156 

1157 # add pre-existing datasets 

1158 for tract in (1, 3, 5): 

1159 for patch in (2, 4, 6, 7): 

1160 dataId = dict(skymap="DummyMap", tract=tract, patch=patch) 

1161 registry.insertDatasets(mergeType, dataIds=[dataId], run=run) 

1162 for aFilter in ("i", "r"): 

1163 dataId = dict(skymap="DummyMap", tract=tract, patch=patch, band=aFilter) 

1164 registry.insertDatasets(calexpType, dataIds=[dataId], run=run) 

1165 

1166 # with empty expression 

1167 rows = registry.queryDataIds(dimensions, datasets=[calexpType, mergeType], collections=run).toSet() 

1168 self.assertEqual(len(rows), 3 * 4 * 2) # 4 tracts x 4 patches x 2 filters 

1169 for dataId in rows: 

1170 self.assertCountEqual(dataId.keys(), ("skymap", "tract", "patch", "band")) 

1171 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 3, 5)) 

1172 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 4, 6, 7)) 

1173 self.assertCountEqual(set(dataId["band"] for dataId in rows), ("i", "r")) 

1174 

1175 # limit to 2 tracts and 2 patches 

1176 rows = registry.queryDataIds( 

1177 dimensions, 

1178 datasets=[calexpType, mergeType], 

1179 collections=run, 

1180 where="tract IN (1, 5) AND patch IN (2, 7)", 

1181 skymap="DummyMap", 

1182 ).toSet() 

1183 self.assertEqual(len(rows), 2 * 2 * 2) # 2 tracts x 2 patches x 2 filters 

1184 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 5)) 

1185 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 7)) 

1186 self.assertCountEqual(set(dataId["band"] for dataId in rows), ("i", "r")) 

1187 

1188 # limit to single filter 

1189 rows = registry.queryDataIds( 

1190 dimensions, datasets=[calexpType, mergeType], collections=run, where="band = 'i'" 

1191 ).toSet() 

1192 self.assertEqual(len(rows), 3 * 4 * 1) # 4 tracts x 4 patches x 2 filters 

1193 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 3, 5)) 

1194 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 4, 6, 7)) 

1195 self.assertCountEqual(set(dataId["band"] for dataId in rows), ("i",)) 

1196 

1197 # Specifying non-existing skymap is an exception 

1198 with self.assertRaisesRegex(DataIdValueError, "Unknown values specified for governor dimension"): 

1199 rows = registry.queryDataIds( 

1200 dimensions, datasets=[calexpType, mergeType], collections=run, where="skymap = 'Mars'" 

1201 ).toSet() 

1202 

1203 def testSpatialJoin(self): 

1204 """Test queries that involve spatial overlap joins.""" 

1205 registry = self.makeRegistry() 

1206 self.loadData(registry, "hsc-rc2-subset.yaml") 

1207 

1208 # Dictionary of spatial DatabaseDimensionElements, keyed by the name of 

1209 # the TopologicalFamily they belong to. We'll relate all elements in 

1210 # each family to all of the elements in each other family. 

1211 families = defaultdict(set) 

1212 # Dictionary of {element.name: {dataId: region}}. 

1213 regions = {} 

1214 for element in registry.dimensions.getDatabaseElements(): 

1215 if element.spatial is not None: 

1216 families[element.spatial.name].add(element) 

1217 regions[element.name] = { 

1218 record.dataId: record.region for record in registry.queryDimensionRecords(element) 

1219 } 

1220 

1221 # If this check fails, it's not necessarily a problem - it may just be 

1222 # a reasonable change to the default dimension definitions - but the 

1223 # test below depends on there being more than one family to do anything 

1224 # useful. 

1225 self.assertEqual(len(families), 2) 

1226 

1227 # Overlap DatabaseDimensionElements with each other. 

1228 for family1, family2 in itertools.combinations(families, 2): 

1229 for element1, element2 in itertools.product(families[family1], families[family2]): 

1230 graph = DimensionGraph.union(element1.graph, element2.graph) 

1231 # Construct expected set of overlapping data IDs via a 

1232 # brute-force comparison of the regions we've already fetched. 

1233 expected = { 

1234 DataCoordinate.standardize({**dataId1.byName(), **dataId2.byName()}, graph=graph) 

1235 for (dataId1, region1), (dataId2, region2) in itertools.product( 

1236 regions[element1.name].items(), regions[element2.name].items() 

1237 ) 

1238 if not region1.isDisjointFrom(region2) 

1239 } 

1240 self.assertGreater(len(expected), 2, msg="Test that we aren't just comparing empty sets.") 

1241 queried = set(registry.queryDataIds(graph)) 

1242 self.assertEqual(expected, queried) 

1243 

1244 # Overlap each DatabaseDimensionElement with the commonSkyPix system. 

1245 commonSkyPix = registry.dimensions.commonSkyPix 

1246 for elementName, regions in regions.items(): 

1247 graph = DimensionGraph.union(registry.dimensions[elementName].graph, commonSkyPix.graph) 

1248 expected = set() 

1249 for dataId, region in regions.items(): 

1250 for begin, end in commonSkyPix.pixelization.envelope(region): 

1251 expected.update( 

1252 DataCoordinate.standardize({commonSkyPix.name: index, **dataId.byName()}, graph=graph) 

1253 for index in range(begin, end) 

1254 ) 

1255 self.assertGreater(len(expected), 2, msg="Test that we aren't just comparing empty sets.") 

1256 queried = set(registry.queryDataIds(graph)) 

1257 self.assertEqual(expected, queried) 

1258 

1259 def testAbstractQuery(self): 

1260 """Test that we can run a query that just lists the known 

1261 bands. This is tricky because band is 

1262 backed by a query against physical_filter. 

1263 """ 

1264 registry = self.makeRegistry() 

1265 registry.insertDimensionData("instrument", dict(name="DummyCam")) 

1266 registry.insertDimensionData( 

1267 "physical_filter", 

1268 dict(instrument="DummyCam", name="dummy_i", band="i"), 

1269 dict(instrument="DummyCam", name="dummy_i2", band="i"), 

1270 dict(instrument="DummyCam", name="dummy_r", band="r"), 

1271 ) 

1272 rows = registry.queryDataIds(["band"]).toSet() 

1273 self.assertCountEqual( 

1274 rows, 

1275 [ 

1276 DataCoordinate.standardize(band="i", universe=registry.dimensions), 

1277 DataCoordinate.standardize(band="r", universe=registry.dimensions), 

1278 ], 

1279 ) 

1280 

1281 def testAttributeManager(self): 

1282 """Test basic functionality of attribute manager.""" 

1283 # number of attributes with schema versions in a fresh database, 

1284 # 6 managers with 3 records per manager, plus config for dimensions 

1285 VERSION_COUNT = 6 * 3 + 1 

1286 

1287 registry = self.makeRegistry() 

1288 attributes = registry._managers.attributes 

1289 

1290 # check what get() returns for non-existing key 

1291 self.assertIsNone(attributes.get("attr")) 

1292 self.assertEqual(attributes.get("attr", ""), "") 

1293 self.assertEqual(attributes.get("attr", "Value"), "Value") 

1294 self.assertEqual(len(list(attributes.items())), VERSION_COUNT) 

1295 

1296 # cannot store empty key or value 

1297 with self.assertRaises(ValueError): 

1298 attributes.set("", "value") 

1299 with self.assertRaises(ValueError): 

1300 attributes.set("attr", "") 

1301 

1302 # set value of non-existing key 

1303 attributes.set("attr", "value") 

1304 self.assertEqual(len(list(attributes.items())), VERSION_COUNT + 1) 

1305 self.assertEqual(attributes.get("attr"), "value") 

1306 

1307 # update value of existing key 

1308 with self.assertRaises(ButlerAttributeExistsError): 

1309 attributes.set("attr", "value2") 

1310 

1311 attributes.set("attr", "value2", force=True) 

1312 self.assertEqual(len(list(attributes.items())), VERSION_COUNT + 1) 

1313 self.assertEqual(attributes.get("attr"), "value2") 

1314 

1315 # delete existing key 

1316 self.assertTrue(attributes.delete("attr")) 

1317 self.assertEqual(len(list(attributes.items())), VERSION_COUNT) 

1318 

1319 # delete non-existing key 

1320 self.assertFalse(attributes.delete("non-attr")) 

1321 

1322 # store bunch of keys and get the list back 

1323 data = [ 

1324 ("version.core", "1.2.3"), 

1325 ("version.dimensions", "3.2.1"), 

1326 ("config.managers.opaque", "ByNameOpaqueTableStorageManager"), 

1327 ] 

1328 for key, value in data: 

1329 attributes.set(key, value) 

1330 items = dict(attributes.items()) 

1331 for key, value in data: 

1332 self.assertEqual(items[key], value) 

1333 

1334 def testQueryDatasetsDeduplication(self): 

1335 """Test that the findFirst option to queryDatasets selects datasets 

1336 from collections in the order given". 

1337 """ 

1338 registry = self.makeRegistry() 

1339 self.loadData(registry, "base.yaml") 

1340 self.loadData(registry, "datasets.yaml") 

1341 self.assertCountEqual( 

1342 list(registry.queryDatasets("bias", collections=["imported_g", "imported_r"])), 

1343 [ 

1344 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"), 

1345 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g"), 

1346 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g"), 

1347 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r"), 

1348 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r"), 

1349 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

1350 ], 

1351 ) 

1352 self.assertCountEqual( 

1353 list(registry.queryDatasets("bias", collections=["imported_g", "imported_r"], findFirst=True)), 

1354 [ 

1355 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"), 

1356 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g"), 

1357 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g"), 

1358 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

1359 ], 

1360 ) 

1361 self.assertCountEqual( 

1362 list(registry.queryDatasets("bias", collections=["imported_r", "imported_g"], findFirst=True)), 

1363 [ 

1364 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"), 

1365 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r"), 

1366 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r"), 

1367 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

1368 ], 

1369 ) 

1370 

1371 def testQueryResults(self): 

1372 """Test querying for data IDs and then manipulating the QueryResults 

1373 object returned to perform other queries. 

1374 """ 

1375 registry = self.makeRegistry() 

1376 self.loadData(registry, "base.yaml") 

1377 self.loadData(registry, "datasets.yaml") 

1378 bias = registry.getDatasetType("bias") 

1379 flat = registry.getDatasetType("flat") 

1380 # Obtain expected results from methods other than those we're testing 

1381 # here. That includes: 

1382 # - the dimensions of the data IDs we want to query: 

1383 expectedGraph = DimensionGraph(registry.dimensions, names=["detector", "physical_filter"]) 

1384 # - the dimensions of some other data IDs we'll extract from that: 

1385 expectedSubsetGraph = DimensionGraph(registry.dimensions, names=["detector"]) 

1386 # - the data IDs we expect to obtain from the first queries: 

1387 expectedDataIds = DataCoordinateSet( 

1388 { 

1389 DataCoordinate.standardize( 

1390 instrument="Cam1", detector=d, physical_filter=p, universe=registry.dimensions 

1391 ) 

1392 for d, p in itertools.product({1, 2, 3}, {"Cam1-G", "Cam1-R1", "Cam1-R2"}) 

1393 }, 

1394 graph=expectedGraph, 

1395 hasFull=False, 

1396 hasRecords=False, 

1397 ) 

1398 # - the flat datasets we expect to find from those data IDs, in just 

1399 # one collection (so deduplication is irrelevant): 

1400 expectedFlats = [ 

1401 registry.findDataset( 

1402 flat, instrument="Cam1", detector=1, physical_filter="Cam1-R1", collections="imported_r" 

1403 ), 

1404 registry.findDataset( 

1405 flat, instrument="Cam1", detector=2, physical_filter="Cam1-R1", collections="imported_r" 

1406 ), 

1407 registry.findDataset( 

1408 flat, instrument="Cam1", detector=3, physical_filter="Cam1-R2", collections="imported_r" 

1409 ), 

1410 ] 

1411 # - the data IDs we expect to extract from that: 

1412 expectedSubsetDataIds = expectedDataIds.subset(expectedSubsetGraph) 

1413 # - the bias datasets we expect to find from those data IDs, after we 

1414 # subset-out the physical_filter dimension, both with duplicates: 

1415 expectedAllBiases = [ 

1416 registry.findDataset(bias, instrument="Cam1", detector=1, collections="imported_g"), 

1417 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_g"), 

1418 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_g"), 

1419 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_r"), 

1420 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_r"), 

1421 ] 

1422 # - ...and without duplicates: 

1423 expectedDeduplicatedBiases = [ 

1424 registry.findDataset(bias, instrument="Cam1", detector=1, collections="imported_g"), 

1425 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_r"), 

1426 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_r"), 

1427 ] 

1428 # Test against those expected results, using a "lazy" query for the 

1429 # data IDs (which re-executes that query each time we use it to do 

1430 # something new). 

1431 dataIds = registry.queryDataIds( 

1432 ["detector", "physical_filter"], 

1433 where="detector.purpose = 'SCIENCE'", # this rejects detector=4 

1434 instrument="Cam1", 

1435 ) 

1436 self.assertEqual(dataIds.graph, expectedGraph) 

1437 self.assertEqual(dataIds.toSet(), expectedDataIds) 

1438 self.assertCountEqual( 

1439 list( 

1440 dataIds.findDatasets( 

1441 flat, 

1442 collections=["imported_r"], 

1443 ) 

1444 ), 

1445 expectedFlats, 

1446 ) 

1447 subsetDataIds = dataIds.subset(expectedSubsetGraph, unique=True) 

1448 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph) 

1449 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds) 

1450 self.assertCountEqual( 

1451 list(subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=False)), 

1452 expectedAllBiases, 

1453 ) 

1454 self.assertCountEqual( 

1455 list(subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=True)), 

1456 expectedDeduplicatedBiases, 

1457 ) 

1458 

1459 # Check dimensions match. 

1460 with self.assertRaises(ValueError): 

1461 subsetDataIds.findDatasets("flat", collections=["imported_r", "imported_g"], findFirst=True) 

1462 

1463 # Use a component dataset type. 

1464 self.assertCountEqual( 

1465 [ 

1466 ref.makeComponentRef("image") 

1467 for ref in subsetDataIds.findDatasets( 

1468 bias, 

1469 collections=["imported_r", "imported_g"], 

1470 findFirst=False, 

1471 ) 

1472 ], 

1473 [ref.makeComponentRef("image") for ref in expectedAllBiases], 

1474 ) 

1475 

1476 # Use a named dataset type that does not exist and a dataset type 

1477 # object that does not exist. 

1478 unknown_type = DatasetType("not_known", dimensions=bias.dimensions, storageClass="Exposure") 

1479 

1480 # Test both string name and dataset type object. 

1481 test_type: Union[str, DatasetType] 

1482 for test_type, test_type_name in ( 

1483 (unknown_type, unknown_type.name), 

1484 (unknown_type.name, unknown_type.name), 

1485 ): 

1486 with self.assertRaisesRegex(DatasetTypeError, expected_regex=test_type_name): 

1487 list( 

1488 subsetDataIds.findDatasets( 

1489 test_type, collections=["imported_r", "imported_g"], findFirst=True 

1490 ) 

1491 ) 

1492 

1493 # Materialize the bias dataset queries (only) by putting the results 

1494 # into temporary tables, then repeat those tests. 

1495 with subsetDataIds.findDatasets( 

1496 bias, collections=["imported_r", "imported_g"], findFirst=False 

1497 ).materialize() as biases: 

1498 self.assertCountEqual(list(biases), expectedAllBiases) 

1499 with subsetDataIds.findDatasets( 

1500 bias, collections=["imported_r", "imported_g"], findFirst=True 

1501 ).materialize() as biases: 

1502 self.assertCountEqual(list(biases), expectedDeduplicatedBiases) 

1503 # Materialize the data ID subset query, but not the dataset queries. 

1504 with subsetDataIds.materialize() as subsetDataIds: 

1505 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph) 

1506 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds) 

1507 self.assertCountEqual( 

1508 list( 

1509 subsetDataIds.findDatasets( 

1510 bias, collections=["imported_r", "imported_g"], findFirst=False 

1511 ) 

1512 ), 

1513 expectedAllBiases, 

1514 ) 

1515 self.assertCountEqual( 

1516 list( 

1517 subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=True) 

1518 ), 

1519 expectedDeduplicatedBiases, 

1520 ) 

1521 # Materialize the dataset queries, too. 

1522 with subsetDataIds.findDatasets( 

1523 bias, collections=["imported_r", "imported_g"], findFirst=False 

1524 ).materialize() as biases: 

1525 self.assertCountEqual(list(biases), expectedAllBiases) 

1526 with subsetDataIds.findDatasets( 

1527 bias, collections=["imported_r", "imported_g"], findFirst=True 

1528 ).materialize() as biases: 

1529 self.assertCountEqual(list(biases), expectedDeduplicatedBiases) 

1530 # Materialize the original query, but none of the follow-up queries. 

1531 with dataIds.materialize() as dataIds: 

1532 self.assertEqual(dataIds.graph, expectedGraph) 

1533 self.assertEqual(dataIds.toSet(), expectedDataIds) 

1534 self.assertCountEqual( 

1535 list( 

1536 dataIds.findDatasets( 

1537 flat, 

1538 collections=["imported_r"], 

1539 ) 

1540 ), 

1541 expectedFlats, 

1542 ) 

1543 subsetDataIds = dataIds.subset(expectedSubsetGraph, unique=True) 

1544 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph) 

1545 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds) 

1546 self.assertCountEqual( 

1547 list( 

1548 subsetDataIds.findDatasets( 

1549 bias, collections=["imported_r", "imported_g"], findFirst=False 

1550 ) 

1551 ), 

1552 expectedAllBiases, 

1553 ) 

1554 self.assertCountEqual( 

1555 list( 

1556 subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=True) 

1557 ), 

1558 expectedDeduplicatedBiases, 

1559 ) 

1560 # Materialize just the bias dataset queries. 

1561 with subsetDataIds.findDatasets( 

1562 bias, collections=["imported_r", "imported_g"], findFirst=False 

1563 ).materialize() as biases: 

1564 self.assertCountEqual(list(biases), expectedAllBiases) 

1565 with subsetDataIds.findDatasets( 

1566 bias, collections=["imported_r", "imported_g"], findFirst=True 

1567 ).materialize() as biases: 

1568 self.assertCountEqual(list(biases), expectedDeduplicatedBiases) 

1569 # Materialize the subset data ID query, but not the dataset 

1570 # queries. 

1571 with subsetDataIds.materialize() as subsetDataIds: 

1572 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph) 

1573 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds) 

1574 self.assertCountEqual( 

1575 list( 

1576 subsetDataIds.findDatasets( 

1577 bias, collections=["imported_r", "imported_g"], findFirst=False 

1578 ) 

1579 ), 

1580 expectedAllBiases, 

1581 ) 

1582 self.assertCountEqual( 

1583 list( 

1584 subsetDataIds.findDatasets( 

1585 bias, collections=["imported_r", "imported_g"], findFirst=True 

1586 ) 

1587 ), 

1588 expectedDeduplicatedBiases, 

1589 ) 

1590 # Materialize the bias dataset queries, too, so now we're 

1591 # materializing every single step. 

1592 with subsetDataIds.findDatasets( 

1593 bias, collections=["imported_r", "imported_g"], findFirst=False 

1594 ).materialize() as biases: 

1595 self.assertCountEqual(list(biases), expectedAllBiases) 

1596 with subsetDataIds.findDatasets( 

1597 bias, collections=["imported_r", "imported_g"], findFirst=True 

1598 ).materialize() as biases: 

1599 self.assertCountEqual(list(biases), expectedDeduplicatedBiases) 

1600 

1601 def testEmptyDimensionsQueries(self): 

1602 """Test Query and QueryResults objects in the case where there are no 

1603 dimensions. 

1604 """ 

1605 # Set up test data: one dataset type, two runs, one dataset in each. 

1606 registry = self.makeRegistry() 

1607 self.loadData(registry, "base.yaml") 

1608 schema = DatasetType("schema", dimensions=registry.dimensions.empty, storageClass="Catalog") 

1609 registry.registerDatasetType(schema) 

1610 dataId = DataCoordinate.makeEmpty(registry.dimensions) 

1611 run1 = "run1" 

1612 run2 = "run2" 

1613 registry.registerRun(run1) 

1614 registry.registerRun(run2) 

1615 (dataset1,) = registry.insertDatasets(schema, dataIds=[dataId], run=run1) 

1616 (dataset2,) = registry.insertDatasets(schema, dataIds=[dataId], run=run2) 

1617 # Query directly for both of the datasets, and each one, one at a time. 

1618 self.checkQueryResults( 

1619 registry.queryDatasets(schema, collections=[run1, run2], findFirst=False), [dataset1, dataset2] 

1620 ) 

1621 self.checkQueryResults( 

1622 registry.queryDatasets(schema, collections=[run1, run2], findFirst=True), 

1623 [dataset1], 

1624 ) 

1625 self.checkQueryResults( 

1626 registry.queryDatasets(schema, collections=[run2, run1], findFirst=True), 

1627 [dataset2], 

1628 ) 

1629 # Query for data IDs with no dimensions. 

1630 dataIds = registry.queryDataIds([]) 

1631 self.checkQueryResults(dataIds, [dataId]) 

1632 # Use queried data IDs to find the datasets. 

1633 self.checkQueryResults( 

1634 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False), 

1635 [dataset1, dataset2], 

1636 ) 

1637 self.checkQueryResults( 

1638 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), 

1639 [dataset1], 

1640 ) 

1641 self.checkQueryResults( 

1642 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), 

1643 [dataset2], 

1644 ) 

1645 # Now materialize the data ID query results and repeat those tests. 

1646 with dataIds.materialize() as dataIds: 

1647 self.checkQueryResults(dataIds, [dataId]) 

1648 self.checkQueryResults( 

1649 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), 

1650 [dataset1], 

1651 ) 

1652 self.checkQueryResults( 

1653 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), 

1654 [dataset2], 

1655 ) 

1656 # Query for non-empty data IDs, then subset that to get the empty one. 

1657 # Repeat the above tests starting from that. 

1658 dataIds = registry.queryDataIds(["instrument"]).subset(registry.dimensions.empty, unique=True) 

1659 self.checkQueryResults(dataIds, [dataId]) 

1660 self.checkQueryResults( 

1661 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False), 

1662 [dataset1, dataset2], 

1663 ) 

1664 self.checkQueryResults( 

1665 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), 

1666 [dataset1], 

1667 ) 

1668 self.checkQueryResults( 

1669 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), 

1670 [dataset2], 

1671 ) 

1672 with dataIds.materialize() as dataIds: 

1673 self.checkQueryResults(dataIds, [dataId]) 

1674 self.checkQueryResults( 

1675 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False), 

1676 [dataset1, dataset2], 

1677 ) 

1678 self.checkQueryResults( 

1679 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), 

1680 [dataset1], 

1681 ) 

1682 self.checkQueryResults( 

1683 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), 

1684 [dataset2], 

1685 ) 

1686 # Query for non-empty data IDs, then materialize, then subset to get 

1687 # the empty one. Repeat again. 

1688 with registry.queryDataIds(["instrument"]).materialize() as nonEmptyDataIds: 

1689 dataIds = nonEmptyDataIds.subset(registry.dimensions.empty, unique=True) 

1690 self.checkQueryResults(dataIds, [dataId]) 

1691 self.checkQueryResults( 

1692 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False), 

1693 [dataset1, dataset2], 

1694 ) 

1695 self.checkQueryResults( 

1696 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), 

1697 [dataset1], 

1698 ) 

1699 self.checkQueryResults( 

1700 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), 

1701 [dataset2], 

1702 ) 

1703 with dataIds.materialize() as dataIds: 

1704 self.checkQueryResults(dataIds, [dataId]) 

1705 self.checkQueryResults( 

1706 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False), 

1707 [dataset1, dataset2], 

1708 ) 

1709 self.checkQueryResults( 

1710 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), 

1711 [dataset1], 

1712 ) 

1713 self.checkQueryResults( 

1714 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), 

1715 [dataset2], 

1716 ) 

1717 # Query for non-empty data IDs with a constraint on an empty-data-ID 

1718 # dataset that exists. 

1719 dataIds = registry.queryDataIds(["instrument"], datasets="schema", collections=...) 

1720 self.checkQueryResults( 

1721 dataIds.subset(unique=True), 

1722 [DataCoordinate.standardize(instrument="Cam1", universe=registry.dimensions)], 

1723 ) 

1724 # Again query for non-empty data IDs with a constraint on empty-data-ID 

1725 # datasets, but when the datasets don't exist. We delete the existing 

1726 # dataset and query just that collection rather than creating a new 

1727 # empty collection because this is a bit less likely for our build-time 

1728 # logic to shortcut-out (via the collection summaries), and such a 

1729 # shortcut would make this test a bit more trivial than we'd like. 

1730 registry.removeDatasets([dataset2]) 

1731 dataIds = registry.queryDataIds(["instrument"], datasets="schema", collections=run2) 

1732 self.checkQueryResults(dataIds, []) 

1733 

1734 def testDimensionDataModifications(self): 

1735 """Test that modifying dimension records via: 

1736 syncDimensionData(..., update=True) and 

1737 insertDimensionData(..., replace=True) works as expected, even in the 

1738 presence of datasets using those dimensions and spatial overlap 

1739 relationships. 

1740 """ 

1741 

1742 def unpack_range_set(ranges: lsst.sphgeom.RangeSet) -> Iterator[int]: 

1743 """Unpack a sphgeom.RangeSet into the integers it contains.""" 

1744 for begin, end in ranges: 

1745 yield from range(begin, end) 

1746 

1747 def range_set_hull( 

1748 ranges: lsst.sphgeom.RangeSet, 

1749 pixelization: lsst.sphgeom.HtmPixelization, 

1750 ) -> lsst.sphgeom.ConvexPolygon: 

1751 """Create a ConvexPolygon hull of the region defined by a set of 

1752 HTM pixelization index ranges. 

1753 """ 

1754 points = [] 

1755 for index in unpack_range_set(ranges): 

1756 points.extend(pixelization.triangle(index).getVertices()) 

1757 return lsst.sphgeom.ConvexPolygon(points) 

1758 

1759 # Use HTM to set up an initial parent region (one arbitrary trixel) 

1760 # and four child regions (the trixels within the parent at the next 

1761 # level. We'll use the parent as a tract/visit region and the children 

1762 # as its patch/visit_detector regions. 

1763 registry = self.makeRegistry() 

1764 htm6 = registry.dimensions.skypix["htm"][6].pixelization 

1765 commonSkyPix = registry.dimensions.commonSkyPix.pixelization 

1766 index = 12288 

1767 child_ranges_small = lsst.sphgeom.RangeSet(index).scaled(4) 

1768 assert htm6.universe().contains(child_ranges_small) 

1769 child_regions_small = [htm6.triangle(i) for i in unpack_range_set(child_ranges_small)] 

1770 parent_region_small = lsst.sphgeom.ConvexPolygon( 

1771 list(itertools.chain.from_iterable(c.getVertices() for c in child_regions_small)) 

1772 ) 

1773 assert all(parent_region_small.contains(c) for c in child_regions_small) 

1774 # Make a larger version of each child region, defined to be the set of 

1775 # htm6 trixels that overlap the original's bounding circle. Make a new 

1776 # parent that's the convex hull of the new children. 

1777 child_regions_large = [ 

1778 range_set_hull(htm6.envelope(c.getBoundingCircle()), htm6) for c in child_regions_small 

1779 ] 

1780 assert all(large.contains(small) for large, small in zip(child_regions_large, child_regions_small)) 

1781 parent_region_large = lsst.sphgeom.ConvexPolygon( 

1782 list(itertools.chain.from_iterable(c.getVertices() for c in child_regions_large)) 

1783 ) 

1784 assert all(parent_region_large.contains(c) for c in child_regions_large) 

1785 assert parent_region_large.contains(parent_region_small) 

1786 assert not parent_region_small.contains(parent_region_large) 

1787 assert not all(parent_region_small.contains(c) for c in child_regions_large) 

1788 # Find some commonSkyPix indices that overlap the large regions but not 

1789 # overlap the small regions. We use commonSkyPix here to make sure the 

1790 # real tests later involve what's in the database, not just post-query 

1791 # region filtering. 

1792 child_difference_indices = [] 

1793 for large, small in zip(child_regions_large, child_regions_small): 

1794 difference = list(unpack_range_set(commonSkyPix.envelope(large) - commonSkyPix.envelope(small))) 

1795 assert difference, "if this is empty, we can't test anything useful with these regions" 

1796 assert all( 

1797 not commonSkyPix.triangle(d).isDisjointFrom(large) 

1798 and commonSkyPix.triangle(d).isDisjointFrom(small) 

1799 for d in difference 

1800 ) 

1801 child_difference_indices.append(difference) 

1802 parent_difference_indices = list( 

1803 unpack_range_set( 

1804 commonSkyPix.envelope(parent_region_large) - commonSkyPix.envelope(parent_region_small) 

1805 ) 

1806 ) 

1807 assert parent_difference_indices, "if this is empty, we can't test anything useful with these regions" 

1808 assert all( 

1809 ( 

1810 not commonSkyPix.triangle(d).isDisjointFrom(parent_region_large) 

1811 and commonSkyPix.triangle(d).isDisjointFrom(parent_region_small) 

1812 ) 

1813 for d in parent_difference_indices 

1814 ) 

1815 # Now that we've finally got those regions, we'll insert the large ones 

1816 # as tract/patch dimension records. 

1817 skymap_name = "testing_v1" 

1818 registry.insertDimensionData( 

1819 "skymap", 

1820 { 

1821 "name": skymap_name, 

1822 "hash": bytes([42]), 

1823 "tract_max": 1, 

1824 "patch_nx_max": 2, 

1825 "patch_ny_max": 2, 

1826 }, 

1827 ) 

1828 registry.insertDimensionData("tract", {"skymap": skymap_name, "id": 0, "region": parent_region_large}) 

1829 registry.insertDimensionData( 

1830 "patch", 

1831 *[ 

1832 {"skymap": skymap_name, "tract": 0, "id": n, "cell_x": n % 2, "cell_y": n // 2, "region": c} 

1833 for n, c in enumerate(child_regions_large) 

1834 ], 

1835 ) 

1836 # Add at dataset that uses these dimensions to make sure that modifying 

1837 # them doesn't disrupt foreign keys (need to make sure DB doesn't 

1838 # implement insert with replace=True as delete-then-insert). 

1839 dataset_type = DatasetType( 

1840 "coadd", 

1841 dimensions=["tract", "patch"], 

1842 universe=registry.dimensions, 

1843 storageClass="Exposure", 

1844 ) 

1845 registry.registerDatasetType(dataset_type) 

1846 registry.registerCollection("the_run", CollectionType.RUN) 

1847 registry.insertDatasets( 

1848 dataset_type, 

1849 [{"skymap": skymap_name, "tract": 0, "patch": 2}], 

1850 run="the_run", 

1851 ) 

1852 # Query for tracts and patches that overlap some "difference" htm9 

1853 # pixels; there should be overlaps, because the database has 

1854 # the "large" suite of regions. 

1855 self.assertEqual( 

1856 {0}, 

1857 { 

1858 data_id["tract"] 

1859 for data_id in registry.queryDataIds( 

1860 ["tract"], 

1861 skymap=skymap_name, 

1862 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]}, 

1863 ) 

1864 }, 

1865 ) 

1866 for patch_id, patch_difference_indices in enumerate(child_difference_indices): 

1867 self.assertIn( 

1868 patch_id, 

1869 { 

1870 data_id["patch"] 

1871 for data_id in registry.queryDataIds( 

1872 ["patch"], 

1873 skymap=skymap_name, 

1874 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]}, 

1875 ) 

1876 }, 

1877 ) 

1878 # Use sync to update the tract region and insert to update the patch 

1879 # regions, to the "small" suite. 

1880 updated = registry.syncDimensionData( 

1881 "tract", 

1882 {"skymap": skymap_name, "id": 0, "region": parent_region_small}, 

1883 update=True, 

1884 ) 

1885 self.assertEqual(updated, {"region": parent_region_large}) 

1886 registry.insertDimensionData( 

1887 "patch", 

1888 *[ 

1889 {"skymap": skymap_name, "tract": 0, "id": n, "cell_x": n % 2, "cell_y": n // 2, "region": c} 

1890 for n, c in enumerate(child_regions_small) 

1891 ], 

1892 replace=True, 

1893 ) 

1894 # Query again; there now should be no such overlaps, because the 

1895 # database has the "small" suite of regions. 

1896 self.assertFalse( 

1897 set( 

1898 registry.queryDataIds( 

1899 ["tract"], 

1900 skymap=skymap_name, 

1901 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]}, 

1902 ) 

1903 ) 

1904 ) 

1905 for patch_id, patch_difference_indices in enumerate(child_difference_indices): 

1906 self.assertNotIn( 

1907 patch_id, 

1908 { 

1909 data_id["patch"] 

1910 for data_id in registry.queryDataIds( 

1911 ["patch"], 

1912 skymap=skymap_name, 

1913 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]}, 

1914 ) 

1915 }, 

1916 ) 

1917 # Update back to the large regions and query one more time. 

1918 updated = registry.syncDimensionData( 

1919 "tract", 

1920 {"skymap": skymap_name, "id": 0, "region": parent_region_large}, 

1921 update=True, 

1922 ) 

1923 self.assertEqual(updated, {"region": parent_region_small}) 

1924 registry.insertDimensionData( 

1925 "patch", 

1926 *[ 

1927 {"skymap": skymap_name, "tract": 0, "id": n, "cell_x": n % 2, "cell_y": n // 2, "region": c} 

1928 for n, c in enumerate(child_regions_large) 

1929 ], 

1930 replace=True, 

1931 ) 

1932 self.assertEqual( 

1933 {0}, 

1934 { 

1935 data_id["tract"] 

1936 for data_id in registry.queryDataIds( 

1937 ["tract"], 

1938 skymap=skymap_name, 

1939 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]}, 

1940 ) 

1941 }, 

1942 ) 

1943 for patch_id, patch_difference_indices in enumerate(child_difference_indices): 

1944 self.assertIn( 

1945 patch_id, 

1946 { 

1947 data_id["patch"] 

1948 for data_id in registry.queryDataIds( 

1949 ["patch"], 

1950 skymap=skymap_name, 

1951 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]}, 

1952 ) 

1953 }, 

1954 ) 

1955 

1956 def testCalibrationCollections(self): 

1957 """Test operations on `~CollectionType.CALIBRATION` collections, 

1958 including `Registry.certify`, `Registry.decertify`, and 

1959 `Registry.findDataset`. 

1960 """ 

1961 # Setup - make a Registry, fill it with some datasets in 

1962 # non-calibration collections. 

1963 registry = self.makeRegistry() 

1964 self.loadData(registry, "base.yaml") 

1965 self.loadData(registry, "datasets.yaml") 

1966 # Set up some timestamps. 

1967 t1 = astropy.time.Time("2020-01-01T01:00:00", format="isot", scale="tai") 

1968 t2 = astropy.time.Time("2020-01-01T02:00:00", format="isot", scale="tai") 

1969 t3 = astropy.time.Time("2020-01-01T03:00:00", format="isot", scale="tai") 

1970 t4 = astropy.time.Time("2020-01-01T04:00:00", format="isot", scale="tai") 

1971 t5 = astropy.time.Time("2020-01-01T05:00:00", format="isot", scale="tai") 

1972 allTimespans = [ 

1973 Timespan(a, b) for a, b in itertools.combinations([None, t1, t2, t3, t4, t5, None], r=2) 

1974 ] 

1975 # Get references to some datasets. 

1976 bias2a = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g") 

1977 bias3a = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g") 

1978 bias2b = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r") 

1979 bias3b = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r") 

1980 # Register the main calibration collection we'll be working with. 

1981 collection = "Cam1/calibs/default" 

1982 registry.registerCollection(collection, type=CollectionType.CALIBRATION) 

1983 # Cannot associate into a calibration collection (no timespan). 

1984 with self.assertRaises(CollectionTypeError): 

1985 registry.associate(collection, [bias2a]) 

1986 # Certify 2a dataset with [t2, t4) validity. 

1987 registry.certify(collection, [bias2a], Timespan(begin=t2, end=t4)) 

1988 # Test that we can query for this dataset via the new collection, both 

1989 # on its own and with a RUN collection, as long as we don't try to join 

1990 # in temporal dimensions or use findFirst=True. 

1991 self.assertEqual( 

1992 set(registry.queryDatasets("bias", findFirst=False, collections=collection)), 

1993 {bias2a}, 

1994 ) 

1995 self.assertEqual( 

1996 set(registry.queryDatasets("bias", findFirst=False, collections=[collection, "imported_r"])), 

1997 { 

1998 bias2a, 

1999 bias2b, 

2000 bias3b, 

2001 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

2002 }, 

2003 ) 

2004 self.assertEqual( 

2005 set(registry.queryDataIds("detector", datasets="bias", collections=collection)), 

2006 {registry.expandDataId(instrument="Cam1", detector=2)}, 

2007 ) 

2008 self.assertEqual( 

2009 set(registry.queryDataIds("detector", datasets="bias", collections=[collection, "imported_r"])), 

2010 { 

2011 registry.expandDataId(instrument="Cam1", detector=2), 

2012 registry.expandDataId(instrument="Cam1", detector=3), 

2013 registry.expandDataId(instrument="Cam1", detector=4), 

2014 }, 

2015 ) 

2016 

2017 # We should not be able to certify 2b with anything overlapping that 

2018 # window. 

2019 with self.assertRaises(ConflictingDefinitionError): 

2020 registry.certify(collection, [bias2b], Timespan(begin=None, end=t3)) 

2021 with self.assertRaises(ConflictingDefinitionError): 

2022 registry.certify(collection, [bias2b], Timespan(begin=None, end=t5)) 

2023 with self.assertRaises(ConflictingDefinitionError): 

2024 registry.certify(collection, [bias2b], Timespan(begin=t1, end=t3)) 

2025 with self.assertRaises(ConflictingDefinitionError): 

2026 registry.certify(collection, [bias2b], Timespan(begin=t1, end=t5)) 

2027 with self.assertRaises(ConflictingDefinitionError): 

2028 registry.certify(collection, [bias2b], Timespan(begin=t1, end=None)) 

2029 with self.assertRaises(ConflictingDefinitionError): 

2030 registry.certify(collection, [bias2b], Timespan(begin=t2, end=t3)) 

2031 with self.assertRaises(ConflictingDefinitionError): 

2032 registry.certify(collection, [bias2b], Timespan(begin=t2, end=t5)) 

2033 with self.assertRaises(ConflictingDefinitionError): 

2034 registry.certify(collection, [bias2b], Timespan(begin=t2, end=None)) 

2035 # We should be able to certify 3a with a range overlapping that window, 

2036 # because it's for a different detector. 

2037 # We'll certify 3a over [t1, t3). 

2038 registry.certify(collection, [bias3a], Timespan(begin=t1, end=t3)) 

2039 # Now we'll certify 2b and 3b together over [t4, ∞). 

2040 registry.certify(collection, [bias2b, bias3b], Timespan(begin=t4, end=None)) 

2041 

2042 # Fetch all associations and check that they are what we expect. 

2043 self.assertCountEqual( 

2044 list( 

2045 registry.queryDatasetAssociations( 

2046 "bias", 

2047 collections=[collection, "imported_g", "imported_r"], 

2048 ) 

2049 ), 

2050 [ 

2051 DatasetAssociation( 

2052 ref=registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"), 

2053 collection="imported_g", 

2054 timespan=None, 

2055 ), 

2056 DatasetAssociation( 

2057 ref=registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

2058 collection="imported_r", 

2059 timespan=None, 

2060 ), 

2061 DatasetAssociation(ref=bias2a, collection="imported_g", timespan=None), 

2062 DatasetAssociation(ref=bias3a, collection="imported_g", timespan=None), 

2063 DatasetAssociation(ref=bias2b, collection="imported_r", timespan=None), 

2064 DatasetAssociation(ref=bias3b, collection="imported_r", timespan=None), 

2065 DatasetAssociation(ref=bias2a, collection=collection, timespan=Timespan(begin=t2, end=t4)), 

2066 DatasetAssociation(ref=bias3a, collection=collection, timespan=Timespan(begin=t1, end=t3)), 

2067 DatasetAssociation(ref=bias2b, collection=collection, timespan=Timespan(begin=t4, end=None)), 

2068 DatasetAssociation(ref=bias3b, collection=collection, timespan=Timespan(begin=t4, end=None)), 

2069 ], 

2070 ) 

2071 

2072 class Ambiguous: 

2073 """Tag class to denote lookups that should be ambiguous.""" 

2074 

2075 pass 

2076 

2077 def assertLookup( 

2078 detector: int, timespan: Timespan, expected: Optional[Union[DatasetRef, Type[Ambiguous]]] 

2079 ) -> None: 

2080 """Local function that asserts that a bias lookup returns the given 

2081 expected result. 

2082 """ 

2083 if expected is Ambiguous: 

2084 with self.assertRaises(RuntimeError): 

2085 registry.findDataset( 

2086 "bias", 

2087 collections=collection, 

2088 instrument="Cam1", 

2089 detector=detector, 

2090 timespan=timespan, 

2091 ) 

2092 else: 

2093 self.assertEqual( 

2094 expected, 

2095 registry.findDataset( 

2096 "bias", 

2097 collections=collection, 

2098 instrument="Cam1", 

2099 detector=detector, 

2100 timespan=timespan, 

2101 ), 

2102 ) 

2103 

2104 # Systematically test lookups against expected results. 

2105 assertLookup(detector=2, timespan=Timespan(None, t1), expected=None) 

2106 assertLookup(detector=2, timespan=Timespan(None, t2), expected=None) 

2107 assertLookup(detector=2, timespan=Timespan(None, t3), expected=bias2a) 

2108 assertLookup(detector=2, timespan=Timespan(None, t4), expected=bias2a) 

2109 assertLookup(detector=2, timespan=Timespan(None, t5), expected=Ambiguous) 

2110 assertLookup(detector=2, timespan=Timespan(None, None), expected=Ambiguous) 

2111 assertLookup(detector=2, timespan=Timespan(t1, t2), expected=None) 

2112 assertLookup(detector=2, timespan=Timespan(t1, t3), expected=bias2a) 

2113 assertLookup(detector=2, timespan=Timespan(t1, t4), expected=bias2a) 

2114 assertLookup(detector=2, timespan=Timespan(t1, t5), expected=Ambiguous) 

2115 assertLookup(detector=2, timespan=Timespan(t1, None), expected=Ambiguous) 

2116 assertLookup(detector=2, timespan=Timespan(t2, t3), expected=bias2a) 

2117 assertLookup(detector=2, timespan=Timespan(t2, t4), expected=bias2a) 

2118 assertLookup(detector=2, timespan=Timespan(t2, t5), expected=Ambiguous) 

2119 assertLookup(detector=2, timespan=Timespan(t2, None), expected=Ambiguous) 

2120 assertLookup(detector=2, timespan=Timespan(t3, t4), expected=bias2a) 

2121 assertLookup(detector=2, timespan=Timespan(t3, t5), expected=Ambiguous) 

2122 assertLookup(detector=2, timespan=Timespan(t3, None), expected=Ambiguous) 

2123 assertLookup(detector=2, timespan=Timespan(t4, t5), expected=bias2b) 

2124 assertLookup(detector=2, timespan=Timespan(t4, None), expected=bias2b) 

2125 assertLookup(detector=2, timespan=Timespan(t5, None), expected=bias2b) 

2126 assertLookup(detector=3, timespan=Timespan(None, t1), expected=None) 

2127 assertLookup(detector=3, timespan=Timespan(None, t2), expected=bias3a) 

2128 assertLookup(detector=3, timespan=Timespan(None, t3), expected=bias3a) 

2129 assertLookup(detector=3, timespan=Timespan(None, t4), expected=bias3a) 

2130 assertLookup(detector=3, timespan=Timespan(None, t5), expected=Ambiguous) 

2131 assertLookup(detector=3, timespan=Timespan(None, None), expected=Ambiguous) 

2132 assertLookup(detector=3, timespan=Timespan(t1, t2), expected=bias3a) 

2133 assertLookup(detector=3, timespan=Timespan(t1, t3), expected=bias3a) 

2134 assertLookup(detector=3, timespan=Timespan(t1, t4), expected=bias3a) 

2135 assertLookup(detector=3, timespan=Timespan(t1, t5), expected=Ambiguous) 

2136 assertLookup(detector=3, timespan=Timespan(t1, None), expected=Ambiguous) 

2137 assertLookup(detector=3, timespan=Timespan(t2, t3), expected=bias3a) 

2138 assertLookup(detector=3, timespan=Timespan(t2, t4), expected=bias3a) 

2139 assertLookup(detector=3, timespan=Timespan(t2, t5), expected=Ambiguous) 

2140 assertLookup(detector=3, timespan=Timespan(t2, None), expected=Ambiguous) 

2141 assertLookup(detector=3, timespan=Timespan(t3, t4), expected=None) 

2142 assertLookup(detector=3, timespan=Timespan(t3, t5), expected=bias3b) 

2143 assertLookup(detector=3, timespan=Timespan(t3, None), expected=bias3b) 

2144 assertLookup(detector=3, timespan=Timespan(t4, t5), expected=bias3b) 

2145 assertLookup(detector=3, timespan=Timespan(t4, None), expected=bias3b) 

2146 assertLookup(detector=3, timespan=Timespan(t5, None), expected=bias3b) 

2147 

2148 # Decertify [t3, t5) for all data IDs, and do test lookups again. 

2149 # This should truncate bias2a to [t2, t3), leave bias3a unchanged at 

2150 # [t1, t3), and truncate bias2b and bias3b to [t5, ∞). 

2151 registry.decertify(collection=collection, datasetType="bias", timespan=Timespan(t3, t5)) 

2152 assertLookup(detector=2, timespan=Timespan(None, t1), expected=None) 

2153 assertLookup(detector=2, timespan=Timespan(None, t2), expected=None) 

2154 assertLookup(detector=2, timespan=Timespan(None, t3), expected=bias2a) 

2155 assertLookup(detector=2, timespan=Timespan(None, t4), expected=bias2a) 

2156 assertLookup(detector=2, timespan=Timespan(None, t5), expected=bias2a) 

2157 assertLookup(detector=2, timespan=Timespan(None, None), expected=Ambiguous) 

2158 assertLookup(detector=2, timespan=Timespan(t1, t2), expected=None) 

2159 assertLookup(detector=2, timespan=Timespan(t1, t3), expected=bias2a) 

2160 assertLookup(detector=2, timespan=Timespan(t1, t4), expected=bias2a) 

2161 assertLookup(detector=2, timespan=Timespan(t1, t5), expected=bias2a) 

2162 assertLookup(detector=2, timespan=Timespan(t1, None), expected=Ambiguous) 

2163 assertLookup(detector=2, timespan=Timespan(t2, t3), expected=bias2a) 

2164 assertLookup(detector=2, timespan=Timespan(t2, t4), expected=bias2a) 

2165 assertLookup(detector=2, timespan=Timespan(t2, t5), expected=bias2a) 

2166 assertLookup(detector=2, timespan=Timespan(t2, None), expected=Ambiguous) 

2167 assertLookup(detector=2, timespan=Timespan(t3, t4), expected=None) 

2168 assertLookup(detector=2, timespan=Timespan(t3, t5), expected=None) 

2169 assertLookup(detector=2, timespan=Timespan(t3, None), expected=bias2b) 

2170 assertLookup(detector=2, timespan=Timespan(t4, t5), expected=None) 

2171 assertLookup(detector=2, timespan=Timespan(t4, None), expected=bias2b) 

2172 assertLookup(detector=2, timespan=Timespan(t5, None), expected=bias2b) 

2173 assertLookup(detector=3, timespan=Timespan(None, t1), expected=None) 

2174 assertLookup(detector=3, timespan=Timespan(None, t2), expected=bias3a) 

2175 assertLookup(detector=3, timespan=Timespan(None, t3), expected=bias3a) 

2176 assertLookup(detector=3, timespan=Timespan(None, t4), expected=bias3a) 

2177 assertLookup(detector=3, timespan=Timespan(None, t5), expected=bias3a) 

2178 assertLookup(detector=3, timespan=Timespan(None, None), expected=Ambiguous) 

2179 assertLookup(detector=3, timespan=Timespan(t1, t2), expected=bias3a) 

2180 assertLookup(detector=3, timespan=Timespan(t1, t3), expected=bias3a) 

2181 assertLookup(detector=3, timespan=Timespan(t1, t4), expected=bias3a) 

2182 assertLookup(detector=3, timespan=Timespan(t1, t5), expected=bias3a) 

2183 assertLookup(detector=3, timespan=Timespan(t1, None), expected=Ambiguous) 

2184 assertLookup(detector=3, timespan=Timespan(t2, t3), expected=bias3a) 

2185 assertLookup(detector=3, timespan=Timespan(t2, t4), expected=bias3a) 

2186 assertLookup(detector=3, timespan=Timespan(t2, t5), expected=bias3a) 

2187 assertLookup(detector=3, timespan=Timespan(t2, None), expected=Ambiguous) 

2188 assertLookup(detector=3, timespan=Timespan(t3, t4), expected=None) 

2189 assertLookup(detector=3, timespan=Timespan(t3, t5), expected=None) 

2190 assertLookup(detector=3, timespan=Timespan(t3, None), expected=bias3b) 

2191 assertLookup(detector=3, timespan=Timespan(t4, t5), expected=None) 

2192 assertLookup(detector=3, timespan=Timespan(t4, None), expected=bias3b) 

2193 assertLookup(detector=3, timespan=Timespan(t5, None), expected=bias3b) 

2194 

2195 # Decertify everything, this time with explicit data IDs, then check 

2196 # that no lookups succeed. 

2197 registry.decertify( 

2198 collection, 

2199 "bias", 

2200 Timespan(None, None), 

2201 dataIds=[ 

2202 dict(instrument="Cam1", detector=2), 

2203 dict(instrument="Cam1", detector=3), 

2204 ], 

2205 ) 

2206 for detector in (2, 3): 

2207 for timespan in allTimespans: 

2208 assertLookup(detector=detector, timespan=timespan, expected=None) 

2209 # Certify bias2a and bias3a over (-∞, ∞), check that all lookups return 

2210 # those. 

2211 registry.certify( 

2212 collection, 

2213 [bias2a, bias3a], 

2214 Timespan(None, None), 

2215 ) 

2216 for timespan in allTimespans: 

2217 assertLookup(detector=2, timespan=timespan, expected=bias2a) 

2218 assertLookup(detector=3, timespan=timespan, expected=bias3a) 

2219 # Decertify just bias2 over [t2, t4). 

2220 # This should split a single certification row into two (and leave the 

2221 # other existing row, for bias3a, alone). 

2222 registry.decertify( 

2223 collection, "bias", Timespan(t2, t4), dataIds=[dict(instrument="Cam1", detector=2)] 

2224 ) 

2225 for timespan in allTimespans: 

2226 assertLookup(detector=3, timespan=timespan, expected=bias3a) 

2227 overlapsBefore = timespan.overlaps(Timespan(None, t2)) 

2228 overlapsAfter = timespan.overlaps(Timespan(t4, None)) 

2229 if overlapsBefore and overlapsAfter: 

2230 expected = Ambiguous 

2231 elif overlapsBefore or overlapsAfter: 

2232 expected = bias2a 

2233 else: 

2234 expected = None 

2235 assertLookup(detector=2, timespan=timespan, expected=expected) 

2236 

2237 def testSkipCalibs(self): 

2238 """Test how queries handle skipping of calibration collections.""" 

2239 registry = self.makeRegistry() 

2240 self.loadData(registry, "base.yaml") 

2241 self.loadData(registry, "datasets.yaml") 

2242 

2243 coll_calib = "Cam1/calibs/default" 

2244 registry.registerCollection(coll_calib, type=CollectionType.CALIBRATION) 

2245 

2246 # Add all biases to the calibration collection. 

2247 # Without this, the logic that prunes dataset subqueries based on 

2248 # datasetType-collection summary information will fire before the logic 

2249 # we want to test below. This is a good thing (it avoids the dreaded 

2250 # NotImplementedError a bit more often) everywhere but here. 

2251 registry.certify(coll_calib, registry.queryDatasets("bias", collections=...), Timespan(None, None)) 

2252 

2253 coll_list = [coll_calib, "imported_g", "imported_r"] 

2254 chain = "Cam1/chain" 

2255 registry.registerCollection(chain, type=CollectionType.CHAINED) 

2256 registry.setCollectionChain(chain, coll_list) 

2257 

2258 # explicit list will raise if findFirst=True or there are temporal 

2259 # dimensions 

2260 with self.assertRaises(NotImplementedError): 

2261 registry.queryDatasets("bias", collections=coll_list, findFirst=True) 

2262 with self.assertRaises(NotImplementedError): 

2263 registry.queryDataIds( 

2264 ["instrument", "detector", "exposure"], datasets="bias", collections=coll_list 

2265 ).count() 

2266 

2267 # chain will skip 

2268 datasets = list(registry.queryDatasets("bias", collections=chain)) 

2269 self.assertGreater(len(datasets), 0) 

2270 

2271 dataIds = list(registry.queryDataIds(["instrument", "detector"], datasets="bias", collections=chain)) 

2272 self.assertGreater(len(dataIds), 0) 

2273 

2274 # glob will skip too 

2275 datasets = list(registry.queryDatasets("bias", collections="*d*")) 

2276 self.assertGreater(len(datasets), 0) 

2277 

2278 # regular expression will skip too 

2279 pattern = re.compile(".*") 

2280 datasets = list(registry.queryDatasets("bias", collections=pattern)) 

2281 self.assertGreater(len(datasets), 0) 

2282 

2283 # ellipsis should work as usual 

2284 datasets = list(registry.queryDatasets("bias", collections=...)) 

2285 self.assertGreater(len(datasets), 0) 

2286 

2287 # few tests with findFirst 

2288 datasets = list(registry.queryDatasets("bias", collections=chain, findFirst=True)) 

2289 self.assertGreater(len(datasets), 0) 

2290 

2291 def testIngestTimeQuery(self): 

2292 

2293 registry = self.makeRegistry() 

2294 self.loadData(registry, "base.yaml") 

2295 dt0 = datetime.utcnow() 

2296 self.loadData(registry, "datasets.yaml") 

2297 dt1 = datetime.utcnow() 

2298 

2299 datasets = list(registry.queryDatasets(..., collections=...)) 

2300 len0 = len(datasets) 

2301 self.assertGreater(len0, 0) 

2302 

2303 where = "ingest_date > T'2000-01-01'" 

2304 datasets = list(registry.queryDatasets(..., collections=..., where=where)) 

2305 len1 = len(datasets) 

2306 self.assertEqual(len0, len1) 

2307 

2308 # no one will ever use this piece of software in 30 years 

2309 where = "ingest_date > T'2050-01-01'" 

2310 datasets = list(registry.queryDatasets(..., collections=..., where=where)) 

2311 len2 = len(datasets) 

2312 self.assertEqual(len2, 0) 

2313 

2314 # Check more exact timing to make sure there is no 37 seconds offset 

2315 # (after fixing DM-30124). SQLite time precision is 1 second, make 

2316 # sure that we don't test with higher precision. 

2317 tests = [ 

2318 # format: (timestamp, operator, expected_len) 

2319 (dt0 - timedelta(seconds=1), ">", len0), 

2320 (dt0 - timedelta(seconds=1), "<", 0), 

2321 (dt1 + timedelta(seconds=1), "<", len0), 

2322 (dt1 + timedelta(seconds=1), ">", 0), 

2323 ] 

2324 for dt, op, expect_len in tests: 

2325 dt_str = dt.isoformat(sep=" ") 

2326 

2327 where = f"ingest_date {op} T'{dt_str}'" 

2328 datasets = list(registry.queryDatasets(..., collections=..., where=where)) 

2329 self.assertEqual(len(datasets), expect_len) 

2330 

2331 # same with bind using datetime or astropy Time 

2332 where = f"ingest_date {op} ingest_time" 

2333 datasets = list( 

2334 registry.queryDatasets(..., collections=..., where=where, bind={"ingest_time": dt}) 

2335 ) 

2336 self.assertEqual(len(datasets), expect_len) 

2337 

2338 dt_astropy = astropy.time.Time(dt, format="datetime") 

2339 datasets = list( 

2340 registry.queryDatasets(..., collections=..., where=where, bind={"ingest_time": dt_astropy}) 

2341 ) 

2342 self.assertEqual(len(datasets), expect_len) 

2343 

2344 def testTimespanQueries(self): 

2345 """Test query expressions involving timespans.""" 

2346 registry = self.makeRegistry() 

2347 self.loadData(registry, "hsc-rc2-subset.yaml") 

2348 # All exposures in the database; mapping from ID to timespan. 

2349 visits = {record.id: record.timespan for record in registry.queryDimensionRecords("visit")} 

2350 # Just those IDs, sorted (which is also temporal sorting, because HSC 

2351 # exposure IDs are monotonically increasing). 

2352 ids = sorted(visits.keys()) 

2353 self.assertGreater(len(ids), 20) 

2354 # Pick some quasi-random indexes into `ids` to play with. 

2355 i1 = int(len(ids) * 0.1) 

2356 i2 = int(len(ids) * 0.3) 

2357 i3 = int(len(ids) * 0.6) 

2358 i4 = int(len(ids) * 0.8) 

2359 # Extract some times from those: just before the beginning of i1 (which 

2360 # should be after the end of the exposure before), exactly the 

2361 # beginning of i2, just after the beginning of i3 (and before its end), 

2362 # and the exact end of i4. 

2363 t1 = visits[ids[i1]].begin - astropy.time.TimeDelta(1.0, format="sec") 

2364 self.assertGreater(t1, visits[ids[i1 - 1]].end) 

2365 t2 = visits[ids[i2]].begin 

2366 t3 = visits[ids[i3]].begin + astropy.time.TimeDelta(1.0, format="sec") 

2367 self.assertLess(t3, visits[ids[i3]].end) 

2368 t4 = visits[ids[i4]].end 

2369 # Make sure those are actually in order. 

2370 self.assertEqual([t1, t2, t3, t4], sorted([t4, t3, t2, t1])) 

2371 

2372 bind = { 

2373 "t1": t1, 

2374 "t2": t2, 

2375 "t3": t3, 

2376 "t4": t4, 

2377 "ts23": Timespan(t2, t3), 

2378 } 

2379 

2380 def query(where): 

2381 """Helper function that queries for visit data IDs and returns 

2382 results as a sorted, deduplicated list of visit IDs. 

2383 """ 

2384 return sorted( 

2385 { 

2386 dataId["visit"] 

2387 for dataId in registry.queryDataIds("visit", instrument="HSC", bind=bind, where=where) 

2388 } 

2389 ) 

2390 

2391 # Try a bunch of timespan queries, mixing up the bounds themselves, 

2392 # where they appear in the expression, and how we get the timespan into 

2393 # the expression. 

2394 

2395 # t1 is before the start of i1, so this should not include i1. 

2396 self.assertEqual(ids[:i1], query("visit.timespan OVERLAPS (null, t1)")) 

2397 # t2 is exactly at the start of i2, but ends are exclusive, so these 

2398 # should not include i2. 

2399 self.assertEqual(ids[i1:i2], query("(t1, t2) OVERLAPS visit.timespan")) 

2400 self.assertEqual(ids[:i2], query("visit.timespan < (t2, t4)")) 

2401 # t3 is in the middle of i3, so this should include i3. 

2402 self.assertEqual(ids[i2 : i3 + 1], query("visit.timespan OVERLAPS ts23")) 

2403 # This one should not include t3 by the same reasoning. 

2404 self.assertEqual(ids[i3 + 1 :], query("visit.timespan > (t1, t3)")) 

2405 # t4 is exactly at the end of i4, so this should include i4. 

2406 self.assertEqual(ids[i3 : i4 + 1], query(f"visit.timespan OVERLAPS (T'{t3.tai.isot}', t4)")) 

2407 # i4's upper bound of t4 is exclusive so this should not include t4. 

2408 self.assertEqual(ids[i4 + 1 :], query("visit.timespan OVERLAPS (t4, NULL)")) 

2409 

2410 # Now some timespan vs. time scalar queries. 

2411 self.assertEqual(ids[:i2], query("visit.timespan < t2")) 

2412 self.assertEqual(ids[:i2], query("t2 > visit.timespan")) 

2413 self.assertEqual(ids[i3 + 1 :], query("visit.timespan > t3")) 

2414 self.assertEqual(ids[i3 + 1 :], query("t3 < visit.timespan")) 

2415 self.assertEqual(ids[i3 : i3 + 1], query("visit.timespan OVERLAPS t3")) 

2416 self.assertEqual(ids[i3 : i3 + 1], query(f"T'{t3.tai.isot}' OVERLAPS visit.timespan")) 

2417 

2418 # Empty timespans should not overlap anything. 

2419 self.assertEqual([], query("visit.timespan OVERLAPS (t3, t2)")) 

2420 

2421 def testCollectionSummaries(self): 

2422 """Test recording and retrieval of collection summaries.""" 

2423 self.maxDiff = None 

2424 registry = self.makeRegistry() 

2425 # Importing datasets from yaml should go through the code path where 

2426 # we update collection summaries as we insert datasets. 

2427 self.loadData(registry, "base.yaml") 

2428 self.loadData(registry, "datasets.yaml") 

2429 flat = registry.getDatasetType("flat") 

2430 expected1 = CollectionSummary() 

2431 expected1.dataset_types.add(registry.getDatasetType("bias")) 

2432 expected1.add_data_ids( 

2433 flat, [DataCoordinate.standardize(instrument="Cam1", universe=registry.dimensions)] 

2434 ) 

2435 self.assertEqual(registry.getCollectionSummary("imported_g"), expected1) 

2436 self.assertEqual(registry.getCollectionSummary("imported_r"), expected1) 

2437 # Create a chained collection with both of the imported runs; the 

2438 # summary should be the same, because it's a union with itself. 

2439 chain = "chain" 

2440 registry.registerCollection(chain, CollectionType.CHAINED) 

2441 registry.setCollectionChain(chain, ["imported_r", "imported_g"]) 

2442 self.assertEqual(registry.getCollectionSummary(chain), expected1) 

2443 # Associate flats only into a tagged collection and a calibration 

2444 # collection to check summaries of those. 

2445 tag = "tag" 

2446 registry.registerCollection(tag, CollectionType.TAGGED) 

2447 registry.associate(tag, registry.queryDatasets(flat, collections="imported_g")) 

2448 calibs = "calibs" 

2449 registry.registerCollection(calibs, CollectionType.CALIBRATION) 

2450 registry.certify( 

2451 calibs, registry.queryDatasets(flat, collections="imported_g"), timespan=Timespan(None, None) 

2452 ) 

2453 expected2 = expected1.copy() 

2454 expected2.dataset_types.discard("bias") 

2455 self.assertEqual(registry.getCollectionSummary(tag), expected2) 

2456 self.assertEqual(registry.getCollectionSummary(calibs), expected2) 

2457 # Explicitly calling Registry.refresh() should load those same 

2458 # summaries, via a totally different code path. 

2459 registry.refresh() 

2460 self.assertEqual(registry.getCollectionSummary("imported_g"), expected1) 

2461 self.assertEqual(registry.getCollectionSummary("imported_r"), expected1) 

2462 self.assertEqual(registry.getCollectionSummary(tag), expected2) 

2463 self.assertEqual(registry.getCollectionSummary(calibs), expected2) 

2464 

2465 def testBindInQueryDatasets(self): 

2466 """Test that the bind parameter is correctly forwarded in 

2467 queryDatasets recursion. 

2468 """ 

2469 registry = self.makeRegistry() 

2470 # Importing datasets from yaml should go through the code path where 

2471 # we update collection summaries as we insert datasets. 

2472 self.loadData(registry, "base.yaml") 

2473 self.loadData(registry, "datasets.yaml") 

2474 self.assertEqual( 

2475 set(registry.queryDatasets("flat", band="r", collections=...)), 

2476 set(registry.queryDatasets("flat", where="band=my_band", bind={"my_band": "r"}, collections=...)), 

2477 ) 

2478 

2479 def testQueryResultSummaries(self): 

2480 """Test summary methods like `count`, `any`, and `explain_no_results` 

2481 on `DataCoordinateQueryResults` and `DatasetQueryResults` 

2482 """ 

2483 registry = self.makeRegistry() 

2484 self.loadData(registry, "base.yaml") 

2485 self.loadData(registry, "datasets.yaml") 

2486 self.loadData(registry, "spatial.yaml") 

2487 # Default test dataset has two collections, each with both flats and 

2488 # biases. Add a new collection with only biases. 

2489 registry.registerCollection("biases", CollectionType.TAGGED) 

2490 registry.associate("biases", registry.queryDatasets("bias", collections=["imported_g"])) 

2491 # First query yields two results, and involves no postprocessing. 

2492 query1 = registry.queryDataIds(["physical_filter"], band="r") 

2493 self.assertTrue(query1.any(execute=False, exact=False)) 

2494 self.assertTrue(query1.any(execute=True, exact=False)) 

2495 self.assertTrue(query1.any(execute=True, exact=True)) 

2496 self.assertEqual(query1.count(exact=False), 2) 

2497 self.assertEqual(query1.count(exact=True), 2) 

2498 self.assertFalse(list(query1.explain_no_results())) 

2499 # Second query should yield no results, but this isn't detectable 

2500 # unless we actually run a query. 

2501 query2 = registry.queryDataIds(["physical_filter"], band="h") 

2502 self.assertTrue(query2.any(execute=False, exact=False)) 

2503 self.assertFalse(query2.any(execute=True, exact=False)) 

2504 self.assertFalse(query2.any(execute=True, exact=True)) 

2505 self.assertEqual(query2.count(exact=False), 0) 

2506 self.assertEqual(query2.count(exact=True), 0) 

2507 self.assertFalse(list(query2.explain_no_results())) 

2508 # These queries yield no results due to various problems that can be 

2509 # spotted prior to execution, yielding helpful diagnostics. 

2510 base_query = registry.queryDataIds(["detector", "physical_filter"]) 

2511 queries_and_snippets = [ 

2512 ( 

2513 # Dataset type name doesn't match any existing dataset types. 

2514 registry.queryDatasets("nonexistent", collections=...), 

2515 ["nonexistent"], 

2516 ), 

2517 ( 

2518 # Dataset type object isn't registered. 

2519 registry.queryDatasets( 

2520 DatasetType( 

2521 "nonexistent", 

2522 dimensions=["instrument"], 

2523 universe=registry.dimensions, 

2524 storageClass="Image", 

2525 ), 

2526 collections=..., 

2527 ), 

2528 ["nonexistent"], 

2529 ), 

2530 ( 

2531 # No datasets of this type in this collection. 

2532 registry.queryDatasets("flat", collections=["biases"]), 

2533 ["flat", "biases"], 

2534 ), 

2535 ( 

2536 # No datasets of this type in this collection. 

2537 base_query.findDatasets("flat", collections=["biases"]), 

2538 ["flat", "biases"], 

2539 ), 

2540 ( 

2541 # No collections matching at all. 

2542 registry.queryDatasets("flat", collections=re.compile("potato.+")), 

2543 ["potato"], 

2544 ), 

2545 ] 

2546 # The behavior of these additional queries is slated to change in the 

2547 # future, so we also check for deprecation warnings. 

2548 with self.assertWarns(FutureWarning): 

2549 queries_and_snippets.append( 

2550 ( 

2551 # Dataset type name doesn't match any existing dataset 

2552 # types. 

2553 registry.queryDataIds(["detector"], datasets=["nonexistent"], collections=...), 

2554 ["nonexistent"], 

2555 ) 

2556 ) 

2557 with self.assertWarns(FutureWarning): 

2558 queries_and_snippets.append( 

2559 ( 

2560 # Dataset type name doesn't match any existing dataset 

2561 # types. 

2562 registry.queryDimensionRecords("detector", datasets=["nonexistent"], collections=...), 

2563 ["nonexistent"], 

2564 ) 

2565 ) 

2566 for query, snippets in queries_and_snippets: 

2567 self.assertFalse(query.any(execute=False, exact=False)) 

2568 self.assertFalse(query.any(execute=True, exact=False)) 

2569 self.assertFalse(query.any(execute=True, exact=True)) 

2570 self.assertEqual(query.count(exact=False), 0) 

2571 self.assertEqual(query.count(exact=True), 0) 

2572 messages = list(query.explain_no_results()) 

2573 self.assertTrue(messages) 

2574 # Want all expected snippets to appear in at least one message. 

2575 self.assertTrue( 

2576 any( 

2577 all(snippet in message for snippet in snippets) for message in query.explain_no_results() 

2578 ), 

2579 messages, 

2580 ) 

2581 

2582 # This query does yield results, but should also emit a warning because 

2583 # dataset type patterns to queryDataIds is deprecated; just look for 

2584 # the warning. 

2585 with self.assertWarns(FutureWarning): 

2586 registry.queryDataIds(["detector"], datasets=re.compile("^nonexistent$"), collections=...) 

2587 

2588 # These queries yield no results due to problems that can be identified 

2589 # by cheap follow-up queries, yielding helpful diagnostics. 

2590 for query, snippets in [ 

2591 ( 

2592 # No records for one of the involved dimensions. 

2593 registry.queryDataIds(["subfilter"]), 

2594 ["dimension records", "subfilter"], 

2595 ), 

2596 ( 

2597 # No records for one of the involved dimensions. 

2598 registry.queryDimensionRecords("subfilter"), 

2599 ["dimension records", "subfilter"], 

2600 ), 

2601 ]: 

2602 self.assertFalse(query.any(execute=True, exact=False)) 

2603 self.assertFalse(query.any(execute=True, exact=True)) 

2604 self.assertEqual(query.count(exact=True), 0) 

2605 messages = list(query.explain_no_results()) 

2606 self.assertTrue(messages) 

2607 # Want all expected snippets to appear in at least one message. 

2608 self.assertTrue( 

2609 any( 

2610 all(snippet in message for snippet in snippets) for message in query.explain_no_results() 

2611 ), 

2612 messages, 

2613 ) 

2614 

2615 # This query yields four overlaps in the database, but one is filtered 

2616 # out in postprocessing. The count queries aren't accurate because 

2617 # they don't account for duplication that happens due to an internal 

2618 # join against commonSkyPix. 

2619 query3 = registry.queryDataIds(["visit", "tract"], instrument="Cam1", skymap="SkyMap1") 

2620 self.assertEqual( 

2621 { 

2622 DataCoordinate.standardize( 

2623 instrument="Cam1", 

2624 skymap="SkyMap1", 

2625 visit=v, 

2626 tract=t, 

2627 universe=registry.dimensions, 

2628 ) 

2629 for v, t in [(1, 0), (2, 0), (2, 1)] 

2630 }, 

2631 set(query3), 

2632 ) 

2633 self.assertTrue(query3.any(execute=False, exact=False)) 

2634 self.assertTrue(query3.any(execute=True, exact=False)) 

2635 self.assertTrue(query3.any(execute=True, exact=True)) 

2636 self.assertGreaterEqual(query3.count(exact=False), 4) 

2637 self.assertGreaterEqual(query3.count(exact=True), 3) 

2638 self.assertFalse(list(query3.explain_no_results())) 

2639 # This query yields overlaps in the database, but all are filtered 

2640 # out in postprocessing. The count queries again aren't very useful. 

2641 # We have to use `where=` here to avoid an optimization that 

2642 # (currently) skips the spatial postprocess-filtering because it 

2643 # recognizes that no spatial join is necessary. That's not ideal, but 

2644 # fixing it is out of scope for this ticket. 

2645 query4 = registry.queryDataIds( 

2646 ["visit", "tract"], 

2647 instrument="Cam1", 

2648 skymap="SkyMap1", 

2649 where="visit=1 AND detector=1 AND tract=0 AND patch=4", 

2650 ) 

2651 self.assertFalse(set(query4)) 

2652 self.assertTrue(query4.any(execute=False, exact=False)) 

2653 self.assertTrue(query4.any(execute=True, exact=False)) 

2654 self.assertFalse(query4.any(execute=True, exact=True)) 

2655 self.assertGreaterEqual(query4.count(exact=False), 1) 

2656 self.assertEqual(query4.count(exact=True), 0) 

2657 messages = list(query4.explain_no_results()) 

2658 self.assertTrue(messages) 

2659 self.assertTrue(any("regions did not overlap" in message for message in messages)) 

2660 

2661 # And there are cases when queries make empty results but we do not 

2662 # know how to explain that yet (could we just say miracles happen?) 

2663 query5 = registry.queryDimensionRecords( 

2664 "detector", where="detector.purpose = 'no-purpose'", instrument="Cam1" 

2665 ) 

2666 self.assertEqual(query5.count(exact=True), 0) 

2667 messages = list(query5.explain_no_results()) 

2668 self.assertFalse(messages) 

2669 # This query should yield results from one dataset type but not the 

2670 # other, which is not registered. 

2671 query5 = registry.queryDatasets(["bias", "nonexistent"], collections=["biases"]) 

2672 self.assertTrue(set(query5)) 

2673 self.assertTrue(query5.any(execute=False, exact=False)) 

2674 self.assertTrue(query5.any(execute=True, exact=False)) 

2675 self.assertTrue(query5.any(execute=True, exact=True)) 

2676 self.assertGreaterEqual(query5.count(exact=False), 1) 

2677 self.assertGreaterEqual(query5.count(exact=True), 1) 

2678 self.assertFalse(messages, list(query5.explain_no_results())) 

2679 

2680 def testQueryDataIdsOrderBy(self): 

2681 """Test order_by and limit on result returned by queryDataIds().""" 

2682 registry = self.makeRegistry() 

2683 self.loadData(registry, "base.yaml") 

2684 self.loadData(registry, "datasets.yaml") 

2685 self.loadData(registry, "spatial.yaml") 

2686 

2687 def do_query(dimensions=("visit", "tract"), datasets=None, collections=None): 

2688 return registry.queryDataIds( 

2689 dimensions, datasets=datasets, collections=collections, instrument="Cam1", skymap="SkyMap1" 

2690 ) 

2691 

2692 Test = namedtuple( 

2693 "testQueryDataIdsOrderByTest", 

2694 ("order_by", "keys", "result", "limit", "datasets", "collections"), 

2695 defaults=(None, None, None), 

2696 ) 

2697 

2698 test_data = ( 

2699 Test("tract,visit", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))), 

2700 Test("-tract,visit", "tract,visit", ((1, 2), (1, 2), (0, 1), (0, 1), (0, 2), (0, 2))), 

2701 Test("tract,-visit", "tract,visit", ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2))), 

2702 Test("-tract,-visit", "tract,visit", ((1, 2), (1, 2), (0, 2), (0, 2), (0, 1), (0, 1))), 

2703 Test( 

2704 "tract.id,visit.id", 

2705 "tract,visit", 

2706 ((0, 1), (0, 1), (0, 2)), 

2707 limit=(3,), 

2708 ), 

2709 Test("-tract,-visit", "tract,visit", ((1, 2), (1, 2), (0, 2)), limit=(3,)), 

2710 Test("tract,visit", "tract,visit", ((0, 2), (1, 2), (1, 2)), limit=(3, 3)), 

2711 Test("-tract,-visit", "tract,visit", ((0, 1),), limit=(3, 5)), 

2712 Test( 

2713 "tract,visit.exposure_time", "tract,visit", ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2)) 

2714 ), 

2715 Test( 

2716 "-tract,-visit.exposure_time", "tract,visit", ((1, 2), (1, 2), (0, 1), (0, 1), (0, 2), (0, 2)) 

2717 ), 

2718 Test("tract,-exposure_time", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))), 

2719 Test("tract,visit.name", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))), 

2720 Test( 

2721 "tract,-timespan.begin,timespan.end", 

2722 "tract,visit", 

2723 ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2)), 

2724 ), 

2725 Test("visit.day_obs,exposure.day_obs", "visit,exposure", ()), 

2726 Test("visit.timespan.begin,-exposure.timespan.begin", "visit,exposure", ()), 

2727 Test( 

2728 "tract,detector", 

2729 "tract,detector", 

2730 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)), 

2731 datasets="flat", 

2732 collections="imported_r", 

2733 ), 

2734 Test( 

2735 "tract,detector.full_name", 

2736 "tract,detector", 

2737 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)), 

2738 datasets="flat", 

2739 collections="imported_r", 

2740 ), 

2741 Test( 

2742 "tract,detector.raft,detector.name_in_raft", 

2743 "tract,detector", 

2744 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)), 

2745 datasets="flat", 

2746 collections="imported_r", 

2747 ), 

2748 ) 

2749 

2750 for test in test_data: 

2751 order_by = test.order_by.split(",") 

2752 keys = test.keys.split(",") 

2753 query = do_query(keys, test.datasets, test.collections).order_by(*order_by) 

2754 if test.limit is not None: 

2755 query = query.limit(*test.limit) 

2756 dataIds = tuple(tuple(dataId[k] for k in keys) for dataId in query) 

2757 self.assertEqual(dataIds, test.result) 

2758 

2759 # and materialize 

2760 query = do_query(keys).order_by(*order_by) 

2761 if test.limit is not None: 

2762 query = query.limit(*test.limit) 

2763 with query.materialize() as materialized: 

2764 dataIds = tuple(tuple(dataId[k] for k in keys) for dataId in materialized) 

2765 self.assertEqual(dataIds, test.result) 

2766 

2767 # errors in a name 

2768 for order_by in ("", "-"): 

2769 with self.assertRaisesRegex(ValueError, "Empty dimension name in ORDER BY"): 

2770 list(do_query().order_by(order_by)) 

2771 

2772 for order_by in ("undimension.name", "-undimension.name"): 

2773 with self.assertRaisesRegex(ValueError, "Unknown dimension element name 'undimension'"): 

2774 list(do_query().order_by(order_by)) 

2775 

2776 for order_by in ("attract", "-attract"): 

2777 with self.assertRaisesRegex(ValueError, "Metadata 'attract' cannot be found in any dimension"): 

2778 list(do_query().order_by(order_by)) 

2779 

2780 with self.assertRaisesRegex(ValueError, "Metadata 'exposure_time' exists in more than one dimension"): 

2781 list(do_query(("exposure", "visit")).order_by("exposure_time")) 

2782 

2783 with self.assertRaisesRegex(ValueError, "Timespan exists in more than one dimesion"): 

2784 list(do_query(("exposure", "visit")).order_by("timespan.begin")) 

2785 

2786 with self.assertRaisesRegex( 

2787 ValueError, "Cannot find any temporal dimension element for 'timespan.begin'" 

2788 ): 

2789 list(do_query(("tract")).order_by("timespan.begin")) 

2790 

2791 with self.assertRaisesRegex(ValueError, "Cannot use 'timespan.begin' with non-temporal element"): 

2792 list(do_query(("tract")).order_by("tract.timespan.begin")) 

2793 

2794 with self.assertRaisesRegex(ValueError, "Field 'name' does not exist in 'tract'."): 

2795 list(do_query(("tract")).order_by("tract.name")) 

2796 

2797 def testQueryDataIdsGovernorExceptions(self): 

2798 """Test exceptions raised by queryDataIds() for incorrect governors.""" 

2799 registry = self.makeRegistry() 

2800 self.loadData(registry, "base.yaml") 

2801 self.loadData(registry, "datasets.yaml") 

2802 self.loadData(registry, "spatial.yaml") 

2803 

2804 def do_query(dimensions, dataId=None, where=None, bind=None, **kwargs): 

2805 return registry.queryDataIds(dimensions, dataId=dataId, where=where, bind=bind, **kwargs) 

2806 

2807 Test = namedtuple( 

2808 "testQueryDataIdExceptionsTest", 

2809 ("dimensions", "dataId", "where", "bind", "kwargs", "exception", "count"), 

2810 defaults=(None, None, None, {}, None, 0), 

2811 ) 

2812 

2813 test_data = ( 

2814 Test("tract,visit", count=6), 

2815 Test("tract,visit", kwargs={"instrument": "Cam1", "skymap": "SkyMap1"}, count=6), 

2816 Test( 

2817 "tract,visit", kwargs={"instrument": "Cam2", "skymap": "SkyMap1"}, exception=DataIdValueError 

2818 ), 

2819 Test("tract,visit", dataId={"instrument": "Cam1", "skymap": "SkyMap1"}, count=6), 

2820 Test( 

2821 "tract,visit", dataId={"instrument": "Cam1", "skymap": "SkyMap2"}, exception=DataIdValueError 

2822 ), 

2823 Test("tract,visit", where="instrument='Cam1' AND skymap='SkyMap1'", count=6), 

2824 Test("tract,visit", where="instrument='Cam1' AND skymap='SkyMap5'", exception=DataIdValueError), 

2825 Test( 

2826 "tract,visit", 

2827 where="instrument=cam AND skymap=map", 

2828 bind={"cam": "Cam1", "map": "SkyMap1"}, 

2829 count=6, 

2830 ), 

2831 Test( 

2832 "tract,visit", 

2833 where="instrument=cam AND skymap=map", 

2834 bind={"cam": "Cam", "map": "SkyMap"}, 

2835 exception=DataIdValueError, 

2836 ), 

2837 ) 

2838 

2839 for test in test_data: 

2840 dimensions = test.dimensions.split(",") 

2841 if test.exception: 

2842 with self.assertRaises(test.exception): 

2843 do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs).count() 

2844 else: 

2845 query = do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs) 

2846 self.assertEqual(query.count(), test.count) 

2847 

2848 # and materialize 

2849 if test.exception: 

2850 with self.assertRaises(test.exception): 

2851 query = do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs) 

2852 with query.materialize() as materialized: 

2853 materialized.count() 

2854 else: 

2855 query = do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs) 

2856 with query.materialize() as materialized: 

2857 self.assertEqual(materialized.count(), test.count) 

2858 

2859 def testQueryDimensionRecordsOrderBy(self): 

2860 """Test order_by and limit on result returned by 

2861 queryDimensionRecords(). 

2862 """ 

2863 registry = self.makeRegistry() 

2864 self.loadData(registry, "base.yaml") 

2865 self.loadData(registry, "datasets.yaml") 

2866 self.loadData(registry, "spatial.yaml") 

2867 

2868 def do_query(element, datasets=None, collections=None): 

2869 return registry.queryDimensionRecords( 

2870 element, instrument="Cam1", datasets=datasets, collections=collections 

2871 ) 

2872 

2873 query = do_query("detector") 

2874 self.assertEqual(len(list(query)), 4) 

2875 

2876 Test = namedtuple( 

2877 "testQueryDataIdsOrderByTest", 

2878 ("element", "order_by", "result", "limit", "datasets", "collections"), 

2879 defaults=(None, None, None), 

2880 ) 

2881 

2882 test_data = ( 

2883 Test("detector", "detector", (1, 2, 3, 4)), 

2884 Test("detector", "-detector", (4, 3, 2, 1)), 

2885 Test("detector", "raft,-name_in_raft", (2, 1, 4, 3)), 

2886 Test("detector", "-detector.purpose", (4,), limit=(1,)), 

2887 Test("detector", "-purpose,detector.raft,name_in_raft", (2, 3), limit=(2, 2)), 

2888 Test("visit", "visit", (1, 2)), 

2889 Test("visit", "-visit.id", (2, 1)), 

2890 Test("visit", "zenith_angle", (1, 2)), 

2891 Test("visit", "-visit.name", (2, 1)), 

2892 Test("visit", "day_obs,-timespan.begin", (2, 1)), 

2893 ) 

2894 

2895 for test in test_data: 

2896 order_by = test.order_by.split(",") 

2897 query = do_query(test.element).order_by(*order_by) 

2898 if test.limit is not None: 

2899 query = query.limit(*test.limit) 

2900 dataIds = tuple(rec.id for rec in query) 

2901 self.assertEqual(dataIds, test.result) 

2902 

2903 # errors in a name 

2904 for order_by in ("", "-"): 

2905 with self.assertRaisesRegex(ValueError, "Empty dimension name in ORDER BY"): 

2906 list(do_query("detector").order_by(order_by)) 

2907 

2908 for order_by in ("undimension.name", "-undimension.name"): 

2909 with self.assertRaisesRegex(ValueError, "Element name mismatch: 'undimension'"): 

2910 list(do_query("detector").order_by(order_by)) 

2911 

2912 for order_by in ("attract", "-attract"): 

2913 with self.assertRaisesRegex(ValueError, "Field 'attract' does not exist in 'detector'."): 

2914 list(do_query("detector").order_by(order_by)) 

2915 

2916 def testQueryDimensionRecordsExceptions(self): 

2917 """Test exceptions raised by queryDimensionRecords().""" 

2918 registry = self.makeRegistry() 

2919 self.loadData(registry, "base.yaml") 

2920 self.loadData(registry, "datasets.yaml") 

2921 self.loadData(registry, "spatial.yaml") 

2922 

2923 result = registry.queryDimensionRecords("detector") 

2924 self.assertEqual(result.count(), 4) 

2925 result = registry.queryDimensionRecords("detector", instrument="Cam1") 

2926 self.assertEqual(result.count(), 4) 

2927 result = registry.queryDimensionRecords("detector", dataId={"instrument": "Cam1"}) 

2928 self.assertEqual(result.count(), 4) 

2929 result = registry.queryDimensionRecords("detector", where="instrument='Cam1'") 

2930 self.assertEqual(result.count(), 4) 

2931 result = registry.queryDimensionRecords("detector", where="instrument=instr", bind={"instr": "Cam1"}) 

2932 self.assertEqual(result.count(), 4) 

2933 

2934 with self.assertRaisesRegex(DataIdValueError, "dimension instrument"): 

2935 result = registry.queryDimensionRecords("detector", instrument="NotCam1") 

2936 result.count() 

2937 

2938 with self.assertRaisesRegex(DataIdValueError, "dimension instrument"): 

2939 result = registry.queryDimensionRecords("detector", dataId={"instrument": "NotCam1"}) 

2940 result.count() 

2941 

2942 with self.assertRaisesRegex(DataIdValueError, "Unknown values specified for governor dimension"): 

2943 result = registry.queryDimensionRecords("detector", where="instrument='NotCam1'") 

2944 result.count() 

2945 

2946 with self.assertRaisesRegex(DataIdValueError, "Unknown values specified for governor dimension"): 

2947 result = registry.queryDimensionRecords( 

2948 "detector", where="instrument=instr", bind={"instr": "NotCam1"} 

2949 ) 

2950 result.count() 

2951 

2952 def testDatasetConstrainedDimensionRecordQueries(self): 

2953 """Test that queryDimensionRecords works even when given a dataset 

2954 constraint whose dimensions extend beyond the requested dimension 

2955 element's. 

2956 """ 

2957 registry = self.makeRegistry() 

2958 self.loadData(registry, "base.yaml") 

2959 self.loadData(registry, "datasets.yaml") 

2960 # Query for physical_filter dimension records, using a dataset that 

2961 # has both physical_filter and dataset dimensions. 

2962 records = registry.queryDimensionRecords( 

2963 "physical_filter", 

2964 datasets=["flat"], 

2965 collections="imported_r", 

2966 ) 

2967 self.assertEqual({record.name for record in records}, {"Cam1-R1", "Cam1-R2"}) 

2968 # Trying to constrain by all dataset types is an error. 

2969 with self.assertRaises(TypeError): 

2970 list(registry.queryDimensionRecords("physical_filter", datasets=..., collections="imported_r")) 

2971 

2972 def testSkyPixDatasetQueries(self): 

2973 """Test that we can build queries involving skypix dimensions as long 

2974 as a dataset type that uses those dimensions is included. 

2975 """ 

2976 registry = self.makeRegistry() 

2977 self.loadData(registry, "base.yaml") 

2978 dataset_type = DatasetType( 

2979 "a", dimensions=["htm7", "instrument"], universe=registry.dimensions, storageClass="int" 

2980 ) 

2981 registry.registerDatasetType(dataset_type) 

2982 run = "r" 

2983 registry.registerRun(run) 

2984 # First try queries where there are no datasets; the concern is whether 

2985 # we can even build and execute these queries without raising, even 

2986 # when "doomed" query shortcuts are in play. 

2987 self.assertFalse( 

2988 list(registry.queryDataIds(["htm7", "instrument"], datasets=dataset_type, collections=run)) 

2989 ) 

2990 self.assertFalse(list(registry.queryDatasets(dataset_type, collections=run))) 

2991 # Now add a dataset and see that we can get it back. 

2992 htm7 = registry.dimensions.skypix["htm"][7].pixelization 

2993 data_id = registry.expandDataId(instrument="Cam1", htm7=htm7.universe()[0][0]) 

2994 (ref,) = registry.insertDatasets(dataset_type, [data_id], run=run) 

2995 self.assertEqual( 

2996 set(registry.queryDataIds(["htm7", "instrument"], datasets=dataset_type, collections=run)), 

2997 {data_id}, 

2998 ) 

2999 self.assertEqual(set(registry.queryDatasets(dataset_type, collections=run)), {ref}) 

3000 

3001 def testDatasetIdFactory(self): 

3002 """Simple test for DatasetIdFactory, mostly to catch potential changes 

3003 in its API. 

3004 """ 

3005 registry = self.makeRegistry() 

3006 factory = registry.datasetIdFactory 

3007 dataset_type = DatasetType( 

3008 "datasetType", 

3009 dimensions=["detector", "instrument"], 

3010 universe=registry.dimensions, 

3011 storageClass="int", 

3012 ) 

3013 run = "run" 

3014 data_id = DataCoordinate.standardize(instrument="Cam1", detector=1, graph=dataset_type.dimensions) 

3015 

3016 datasetId = factory.makeDatasetId(run, dataset_type, data_id, DatasetIdGenEnum.UNIQUE) 

3017 self.assertIsInstance(datasetId, uuid.UUID) 

3018 self.assertEqual(datasetId.version, 4) 

3019 

3020 datasetId = factory.makeDatasetId(run, dataset_type, data_id, DatasetIdGenEnum.DATAID_TYPE) 

3021 self.assertIsInstance(datasetId, uuid.UUID) 

3022 self.assertEqual(datasetId.version, 5) 

3023 

3024 datasetId = factory.makeDatasetId(run, dataset_type, data_id, DatasetIdGenEnum.DATAID_TYPE_RUN) 

3025 self.assertIsInstance(datasetId, uuid.UUID) 

3026 self.assertEqual(datasetId.version, 5)