Coverage for python/lsst/daf/butler/registry/tests/_registry.py: 5%

Shortcuts on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1206 statements  

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = ["RegistryTests"] 

24 

25import itertools 

26import logging 

27import os 

28import re 

29import unittest 

30import uuid 

31from abc import ABC, abstractmethod 

32from collections import defaultdict, namedtuple 

33from datetime import datetime, timedelta 

34from typing import TYPE_CHECKING, Iterator, Optional, Type, Union 

35 

36import astropy.time 

37import sqlalchemy 

38 

39try: 

40 import numpy as np 

41except ImportError: 

42 np = None 

43 

44import lsst.sphgeom 

45 

46from ...core import ( 

47 DataCoordinate, 

48 DataCoordinateSet, 

49 DatasetAssociation, 

50 DatasetRef, 

51 DatasetType, 

52 DimensionGraph, 

53 NamedValueSet, 

54 StorageClass, 

55 Timespan, 

56 ddl, 

57) 

58from .._collectionType import CollectionType 

59from .._config import RegistryConfig 

60from .._exceptions import ( 

61 ConflictingDefinitionError, 

62 InconsistentDataIdError, 

63 MissingCollectionError, 

64 OrphanedRecordError, 

65) 

66from ..interfaces import ButlerAttributeExistsError, DatasetIdGenEnum 

67from ..summaries import CollectionSummary 

68 

69if TYPE_CHECKING: 69 ↛ 70line 69 didn't jump to line 70, because the condition on line 69 was never true

70 from .._registry import Registry 

71 

72 

73class RegistryTests(ABC): 

74 """Generic tests for the `Registry` class that can be subclassed to 

75 generate tests for different configurations. 

76 """ 

77 

78 collectionsManager: Optional[str] = None 

79 """Name of the collections manager class, if subclass provides value for 

80 this member then it overrides name specified in default configuration 

81 (`str`). 

82 """ 

83 

84 datasetsManager: Optional[str] = None 

85 """Name of the datasets manager class, if subclass provides value for 

86 this member then it overrides name specified in default configuration 

87 (`str`). 

88 """ 

89 

90 @classmethod 

91 @abstractmethod 

92 def getDataDir(cls) -> str: 

93 """Return the root directory containing test data YAML files.""" 

94 raise NotImplementedError() 

95 

96 def makeRegistryConfig(self) -> RegistryConfig: 

97 """Create RegistryConfig used to create a registry. 

98 

99 This method should be called by a subclass from `makeRegistry`. 

100 Returned instance will be pre-configured based on the values of class 

101 members, and default-configured for all other parameters. Subclasses 

102 that need default configuration should just instantiate 

103 `RegistryConfig` directly. 

104 """ 

105 config = RegistryConfig() 

106 if self.collectionsManager: 

107 config["managers", "collections"] = self.collectionsManager 

108 if self.datasetsManager: 

109 config["managers", "datasets"] = self.datasetsManager 

110 return config 

111 

112 @abstractmethod 

113 def makeRegistry(self) -> Registry: 

114 """Return the Registry instance to be tested.""" 

115 raise NotImplementedError() 

116 

117 def loadData(self, registry: Registry, filename: str): 

118 """Load registry test data from ``getDataDir/<filename>``, 

119 which should be a YAML import/export file. 

120 """ 

121 from ...transfers import YamlRepoImportBackend 

122 

123 with open(os.path.join(self.getDataDir(), filename), "r") as stream: 

124 backend = YamlRepoImportBackend(stream, registry) 

125 backend.register() 

126 backend.load(datastore=None) 

127 

128 def checkQueryResults(self, results, expected): 

129 """Check that a query results object contains expected values. 

130 

131 Parameters 

132 ---------- 

133 results : `DataCoordinateQueryResults` or `DatasetQueryResults` 

134 A lazy-evaluation query results object. 

135 expected : `list` 

136 A list of `DataCoordinate` o `DatasetRef` objects that should be 

137 equal to results of the query, aside from ordering. 

138 """ 

139 self.assertCountEqual(list(results), expected) 

140 self.assertEqual(results.count(), len(expected)) 

141 if expected: 

142 self.assertTrue(results.any()) 

143 else: 

144 self.assertFalse(results.any()) 

145 

146 def testOpaque(self): 

147 """Tests for `Registry.registerOpaqueTable`, 

148 `Registry.insertOpaqueData`, `Registry.fetchOpaqueData`, and 

149 `Registry.deleteOpaqueData`. 

150 """ 

151 registry = self.makeRegistry() 

152 table = "opaque_table_for_testing" 

153 registry.registerOpaqueTable( 

154 table, 

155 spec=ddl.TableSpec( 

156 fields=[ 

157 ddl.FieldSpec("id", dtype=sqlalchemy.BigInteger, primaryKey=True), 

158 ddl.FieldSpec("name", dtype=sqlalchemy.String, length=16, nullable=False), 

159 ddl.FieldSpec("count", dtype=sqlalchemy.SmallInteger, nullable=True), 

160 ], 

161 ), 

162 ) 

163 rows = [ 

164 {"id": 1, "name": "one", "count": None}, 

165 {"id": 2, "name": "two", "count": 5}, 

166 {"id": 3, "name": "three", "count": 6}, 

167 ] 

168 registry.insertOpaqueData(table, *rows) 

169 self.assertCountEqual(rows, list(registry.fetchOpaqueData(table))) 

170 self.assertEqual(rows[0:1], list(registry.fetchOpaqueData(table, id=1))) 

171 self.assertEqual(rows[1:2], list(registry.fetchOpaqueData(table, name="two"))) 

172 self.assertEqual(rows[0:1], list(registry.fetchOpaqueData(table, id=(1, 3), name=("one", "two")))) 

173 self.assertEqual(rows, list(registry.fetchOpaqueData(table, id=(1, 2, 3)))) 

174 # Test very long IN clause which exceeds sqlite limit on number of 

175 # parameters. SQLite says the limit is 32k but it looks like it is 

176 # much higher. 

177 self.assertEqual(rows, list(registry.fetchOpaqueData(table, id=list(range(300_000))))) 

178 # Two IN clauses, each longer than 1k batch size, first with 

179 # duplicates, second has matching elements in different batches (after 

180 # sorting). 

181 self.assertEqual( 

182 rows[0:2], 

183 list( 

184 registry.fetchOpaqueData( 

185 table, 

186 id=list(range(1000)) + list(range(100, 0, -1)), 

187 name=["one"] + [f"q{i}" for i in range(2200)] + ["two"], 

188 ) 

189 ), 

190 ) 

191 self.assertEqual([], list(registry.fetchOpaqueData(table, id=1, name="two"))) 

192 registry.deleteOpaqueData(table, id=3) 

193 self.assertCountEqual(rows[:2], list(registry.fetchOpaqueData(table))) 

194 registry.deleteOpaqueData(table) 

195 self.assertEqual([], list(registry.fetchOpaqueData(table))) 

196 

197 def testDatasetType(self): 

198 """Tests for `Registry.registerDatasetType` and 

199 `Registry.getDatasetType`. 

200 """ 

201 registry = self.makeRegistry() 

202 # Check valid insert 

203 datasetTypeName = "test" 

204 storageClass = StorageClass("testDatasetType") 

205 registry.storageClasses.registerStorageClass(storageClass) 

206 dimensions = registry.dimensions.extract(("instrument", "visit")) 

207 differentDimensions = registry.dimensions.extract(("instrument", "patch")) 

208 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

209 # Inserting for the first time should return True 

210 self.assertTrue(registry.registerDatasetType(inDatasetType)) 

211 outDatasetType1 = registry.getDatasetType(datasetTypeName) 

212 self.assertEqual(outDatasetType1, inDatasetType) 

213 

214 # Re-inserting should work 

215 self.assertFalse(registry.registerDatasetType(inDatasetType)) 

216 # Except when they are not identical 

217 with self.assertRaises(ConflictingDefinitionError): 

218 nonIdenticalDatasetType = DatasetType(datasetTypeName, differentDimensions, storageClass) 

219 registry.registerDatasetType(nonIdenticalDatasetType) 

220 

221 # Template can be None 

222 datasetTypeName = "testNoneTemplate" 

223 storageClass = StorageClass("testDatasetType2") 

224 registry.storageClasses.registerStorageClass(storageClass) 

225 dimensions = registry.dimensions.extract(("instrument", "visit")) 

226 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

227 registry.registerDatasetType(inDatasetType) 

228 outDatasetType2 = registry.getDatasetType(datasetTypeName) 

229 self.assertEqual(outDatasetType2, inDatasetType) 

230 

231 allTypes = set(registry.queryDatasetTypes()) 

232 self.assertEqual(allTypes, {outDatasetType1, outDatasetType2}) 

233 

234 def testDimensions(self): 

235 """Tests for `Registry.insertDimensionData`, 

236 `Registry.syncDimensionData`, and `Registry.expandDataId`. 

237 """ 

238 registry = self.makeRegistry() 

239 dimensionName = "instrument" 

240 dimension = registry.dimensions[dimensionName] 

241 dimensionValue = { 

242 "name": "DummyCam", 

243 "visit_max": 10, 

244 "exposure_max": 10, 

245 "detector_max": 2, 

246 "class_name": "lsst.obs.base.Instrument", 

247 } 

248 registry.insertDimensionData(dimensionName, dimensionValue) 

249 # Inserting the same value twice should fail 

250 with self.assertRaises(sqlalchemy.exc.IntegrityError): 

251 registry.insertDimensionData(dimensionName, dimensionValue) 

252 # expandDataId should retrieve the record we just inserted 

253 self.assertEqual( 

254 registry.expandDataId(instrument="DummyCam", graph=dimension.graph) 

255 .records[dimensionName] 

256 .toDict(), 

257 dimensionValue, 

258 ) 

259 # expandDataId should raise if there is no record with the given ID. 

260 with self.assertRaises(LookupError): 

261 registry.expandDataId({"instrument": "Unknown"}, graph=dimension.graph) 

262 # band doesn't have a table; insert should fail. 

263 with self.assertRaises(TypeError): 

264 registry.insertDimensionData("band", {"band": "i"}) 

265 dimensionName2 = "physical_filter" 

266 dimension2 = registry.dimensions[dimensionName2] 

267 dimensionValue2 = {"name": "DummyCam_i", "band": "i"} 

268 # Missing required dependency ("instrument") should fail 

269 with self.assertRaises(KeyError): 

270 registry.insertDimensionData(dimensionName2, dimensionValue2) 

271 # Adding required dependency should fix the failure 

272 dimensionValue2["instrument"] = "DummyCam" 

273 registry.insertDimensionData(dimensionName2, dimensionValue2) 

274 # expandDataId should retrieve the record we just inserted. 

275 self.assertEqual( 

276 registry.expandDataId(instrument="DummyCam", physical_filter="DummyCam_i", graph=dimension2.graph) 

277 .records[dimensionName2] 

278 .toDict(), 

279 dimensionValue2, 

280 ) 

281 # Use syncDimensionData to insert a new record successfully. 

282 dimensionName3 = "detector" 

283 dimensionValue3 = { 

284 "instrument": "DummyCam", 

285 "id": 1, 

286 "full_name": "one", 

287 "name_in_raft": "zero", 

288 "purpose": "SCIENCE", 

289 } 

290 self.assertTrue(registry.syncDimensionData(dimensionName3, dimensionValue3)) 

291 # Sync that again. Note that one field ("raft") is NULL, and that 

292 # should be okay. 

293 self.assertFalse(registry.syncDimensionData(dimensionName3, dimensionValue3)) 

294 # Now try that sync with the same primary key but a different value. 

295 # This should fail. 

296 with self.assertRaises(ConflictingDefinitionError): 

297 registry.syncDimensionData( 

298 dimensionName3, 

299 { 

300 "instrument": "DummyCam", 

301 "id": 1, 

302 "full_name": "one", 

303 "name_in_raft": "four", 

304 "purpose": "SCIENCE", 

305 }, 

306 ) 

307 

308 @unittest.skipIf(np is None, "numpy not available.") 

309 def testNumpyDataId(self): 

310 """Test that we can use a numpy int in a dataId.""" 

311 registry = self.makeRegistry() 

312 dimensionEntries = [ 

313 ("instrument", {"instrument": "DummyCam"}), 

314 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}), 

315 # Using an np.int64 here fails unless Records.fromDict is also 

316 # patched to look for numbers.Integral 

317 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}), 

318 ] 

319 for args in dimensionEntries: 

320 registry.insertDimensionData(*args) 

321 

322 # Try a normal integer and something that looks like an int but 

323 # is not. 

324 for visit_id in (42, np.int64(42)): 

325 with self.subTest(visit_id=visit_id, id_type=type(visit_id).__name__): 

326 expanded = registry.expandDataId({"instrument": "DummyCam", "visit": visit_id}) 

327 self.assertEqual(expanded["visit"], int(visit_id)) 

328 self.assertIsInstance(expanded["visit"], int) 

329 

330 def testDataIdRelationships(self): 

331 """Test that `Registry.expandDataId` raises an exception when the given 

332 keys are inconsistent. 

333 """ 

334 registry = self.makeRegistry() 

335 self.loadData(registry, "base.yaml") 

336 # Insert a few more dimension records for the next test. 

337 registry.insertDimensionData( 

338 "exposure", 

339 {"instrument": "Cam1", "id": 1, "obs_id": "one", "physical_filter": "Cam1-G"}, 

340 ) 

341 registry.insertDimensionData( 

342 "exposure", 

343 {"instrument": "Cam1", "id": 2, "obs_id": "two", "physical_filter": "Cam1-G"}, 

344 ) 

345 registry.insertDimensionData( 

346 "visit_system", 

347 {"instrument": "Cam1", "id": 0, "name": "one-to-one"}, 

348 ) 

349 registry.insertDimensionData( 

350 "visit", 

351 {"instrument": "Cam1", "id": 1, "name": "one", "physical_filter": "Cam1-G", "visit_system": 0}, 

352 ) 

353 registry.insertDimensionData( 

354 "visit_definition", 

355 {"instrument": "Cam1", "visit": 1, "exposure": 1, "visit_system": 0}, 

356 ) 

357 with self.assertRaises(InconsistentDataIdError): 

358 registry.expandDataId( 

359 {"instrument": "Cam1", "visit": 1, "exposure": 2}, 

360 ) 

361 

362 def testDataset(self): 

363 """Basic tests for `Registry.insertDatasets`, `Registry.getDataset`, 

364 and `Registry.removeDatasets`. 

365 """ 

366 registry = self.makeRegistry() 

367 self.loadData(registry, "base.yaml") 

368 run = "test" 

369 registry.registerRun(run) 

370 datasetType = registry.getDatasetType("bias") 

371 dataId = {"instrument": "Cam1", "detector": 2} 

372 (ref,) = registry.insertDatasets(datasetType, dataIds=[dataId], run=run) 

373 outRef = registry.getDataset(ref.id) 

374 self.assertIsNotNone(ref.id) 

375 self.assertEqual(ref, outRef) 

376 with self.assertRaises(ConflictingDefinitionError): 

377 registry.insertDatasets(datasetType, dataIds=[dataId], run=run) 

378 registry.removeDatasets([ref]) 

379 self.assertIsNone(registry.findDataset(datasetType, dataId, collections=[run])) 

380 

381 def testFindDataset(self): 

382 """Tests for `Registry.findDataset`.""" 

383 registry = self.makeRegistry() 

384 self.loadData(registry, "base.yaml") 

385 run = "test" 

386 datasetType = registry.getDatasetType("bias") 

387 dataId = {"instrument": "Cam1", "detector": 4} 

388 registry.registerRun(run) 

389 (inputRef,) = registry.insertDatasets(datasetType, dataIds=[dataId], run=run) 

390 outputRef = registry.findDataset(datasetType, dataId, collections=[run]) 

391 self.assertEqual(outputRef, inputRef) 

392 # Check that retrieval with invalid dataId raises 

393 with self.assertRaises(LookupError): 

394 dataId = {"instrument": "Cam1"} # no detector 

395 registry.findDataset(datasetType, dataId, collections=run) 

396 # Check that different dataIds match to different datasets 

397 dataId1 = {"instrument": "Cam1", "detector": 1} 

398 (inputRef1,) = registry.insertDatasets(datasetType, dataIds=[dataId1], run=run) 

399 dataId2 = {"instrument": "Cam1", "detector": 2} 

400 (inputRef2,) = registry.insertDatasets(datasetType, dataIds=[dataId2], run=run) 

401 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef1) 

402 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef2) 

403 self.assertNotEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef2) 

404 self.assertNotEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef1) 

405 # Check that requesting a non-existing dataId returns None 

406 nonExistingDataId = {"instrument": "Cam1", "detector": 3} 

407 self.assertIsNone(registry.findDataset(datasetType, nonExistingDataId, collections=run)) 

408 

409 def testRemoveDatasetTypeSuccess(self): 

410 """Test that Registry.removeDatasetType works when there are no 

411 datasets of that type present. 

412 """ 

413 registry = self.makeRegistry() 

414 self.loadData(registry, "base.yaml") 

415 registry.removeDatasetType("flat") 

416 with self.assertRaises(KeyError): 

417 registry.getDatasetType("flat") 

418 

419 def testRemoveDatasetTypeFailure(self): 

420 """Test that Registry.removeDatasetType raises when there are datasets 

421 of that type present or if the dataset type is for a component. 

422 """ 

423 registry = self.makeRegistry() 

424 self.loadData(registry, "base.yaml") 

425 self.loadData(registry, "datasets.yaml") 

426 with self.assertRaises(OrphanedRecordError): 

427 registry.removeDatasetType("flat") 

428 with self.assertRaises(ValueError): 

429 registry.removeDatasetType(DatasetType.nameWithComponent("flat", "image")) 

430 

431 def testImportDatasetsUUID(self): 

432 """Test for `Registry._importDatasets` with UUID dataset ID.""" 

433 if not self.datasetsManager.endswith(".ByDimensionsDatasetRecordStorageManagerUUID"): 

434 self.skipTest(f"Unexpected dataset manager {self.datasetsManager}") 

435 

436 registry = self.makeRegistry() 

437 self.loadData(registry, "base.yaml") 

438 for run in range(6): 

439 registry.registerRun(f"run{run}") 

440 datasetTypeBias = registry.getDatasetType("bias") 

441 datasetTypeFlat = registry.getDatasetType("flat") 

442 dataIdBias1 = {"instrument": "Cam1", "detector": 1} 

443 dataIdBias2 = {"instrument": "Cam1", "detector": 2} 

444 dataIdFlat1 = {"instrument": "Cam1", "detector": 1, "physical_filter": "Cam1-G", "band": "g"} 

445 

446 dataset_id = uuid.uuid4() 

447 ref = DatasetRef(datasetTypeBias, dataIdBias1, id=dataset_id, run="run0") 

448 (ref1,) = registry._importDatasets([ref]) 

449 # UUID is used without change 

450 self.assertEqual(ref.id, ref1.id) 

451 

452 # All different failure modes 

453 refs = ( 

454 # Importing same DatasetRef with different dataset ID is an error 

455 DatasetRef(datasetTypeBias, dataIdBias1, id=uuid.uuid4(), run="run0"), 

456 # Same DatasetId but different DataId 

457 DatasetRef(datasetTypeBias, dataIdBias2, id=ref1.id, run="run0"), 

458 DatasetRef(datasetTypeFlat, dataIdFlat1, id=ref1.id, run="run0"), 

459 # Same DatasetRef and DatasetId but different run 

460 DatasetRef(datasetTypeBias, dataIdBias1, id=ref1.id, run="run1"), 

461 ) 

462 for ref in refs: 

463 with self.assertRaises(ConflictingDefinitionError): 

464 registry._importDatasets([ref]) 

465 

466 # Test for non-unique IDs, they can be re-imported multiple times. 

467 for run, idGenMode in ((2, DatasetIdGenEnum.DATAID_TYPE), (4, DatasetIdGenEnum.DATAID_TYPE_RUN)): 

468 with self.subTest(idGenMode=idGenMode): 

469 

470 # Use integer dataset ID to force UUID calculation in _import 

471 ref = DatasetRef(datasetTypeBias, dataIdBias1, id=0, run=f"run{run}") 

472 (ref1,) = registry._importDatasets([ref], idGenerationMode=idGenMode) 

473 self.assertIsInstance(ref1.id, uuid.UUID) 

474 self.assertEqual(ref1.id.version, 5) 

475 

476 # Importing it again is OK 

477 (ref2,) = registry._importDatasets([ref1]) 

478 self.assertEqual(ref2.id, ref1.id) 

479 

480 # Cannot import to different run with the same ID 

481 ref = DatasetRef(datasetTypeBias, dataIdBias1, id=ref1.id, run=f"run{run+1}") 

482 with self.assertRaises(ConflictingDefinitionError): 

483 registry._importDatasets([ref]) 

484 

485 ref = DatasetRef(datasetTypeBias, dataIdBias1, id=0, run=f"run{run+1}") 

486 if idGenMode is DatasetIdGenEnum.DATAID_TYPE: 

487 # Cannot import same DATAID_TYPE ref into a new run 

488 with self.assertRaises(ConflictingDefinitionError): 

489 (ref2,) = registry._importDatasets([ref], idGenerationMode=idGenMode) 

490 else: 

491 # DATAID_TYPE_RUN ref can be imported into a new run 

492 (ref2,) = registry._importDatasets([ref], idGenerationMode=idGenMode) 

493 

494 def testImportDatasetsInt(self): 

495 """Test for `Registry._importDatasets` with integer dataset ID.""" 

496 if not self.datasetsManager.endswith(".ByDimensionsDatasetRecordStorageManager"): 

497 self.skipTest(f"Unexpected dataset manager {self.datasetsManager}") 

498 

499 registry = self.makeRegistry() 

500 self.loadData(registry, "base.yaml") 

501 run = "test" 

502 registry.registerRun(run) 

503 datasetTypeBias = registry.getDatasetType("bias") 

504 datasetTypeFlat = registry.getDatasetType("flat") 

505 dataIdBias1 = {"instrument": "Cam1", "detector": 1} 

506 dataIdBias2 = {"instrument": "Cam1", "detector": 2} 

507 dataIdFlat1 = {"instrument": "Cam1", "detector": 1, "physical_filter": "Cam1-G", "band": "g"} 

508 dataset_id = 999999999 

509 

510 ref = DatasetRef(datasetTypeBias, dataIdBias1, id=dataset_id, run=run) 

511 (ref1,) = registry._importDatasets([ref]) 

512 # Should make new integer ID. 

513 self.assertNotEqual(ref1.id, ref.id) 

514 

515 # Ingesting same dataId with different dataset ID is an error 

516 ref2 = ref1.unresolved().resolved(dataset_id, run=run) 

517 with self.assertRaises(ConflictingDefinitionError): 

518 registry._importDatasets([ref2]) 

519 

520 # Ingesting different dataId with the same dataset ID should work 

521 ref3 = DatasetRef(datasetTypeBias, dataIdBias2, id=ref1.id, run=run) 

522 (ref4,) = registry._importDatasets([ref3]) 

523 self.assertNotEqual(ref4.id, ref1.id) 

524 

525 ref3 = DatasetRef(datasetTypeFlat, dataIdFlat1, id=ref1.id, run=run) 

526 (ref4,) = registry._importDatasets([ref3]) 

527 self.assertNotEqual(ref4.id, ref1.id) 

528 

529 def testDatasetTypeComponentQueries(self): 

530 """Test component options when querying for dataset types.""" 

531 registry = self.makeRegistry() 

532 self.loadData(registry, "base.yaml") 

533 self.loadData(registry, "datasets.yaml") 

534 # Test querying for dataset types with different inputs. 

535 # First query for all dataset types; components should only be included 

536 # when components=True. 

537 self.assertEqual({"bias", "flat"}, NamedValueSet(registry.queryDatasetTypes()).names) 

538 self.assertEqual({"bias", "flat"}, NamedValueSet(registry.queryDatasetTypes(components=False)).names) 

539 self.assertLess( 

540 {"bias", "flat", "bias.wcs", "flat.photoCalib"}, 

541 NamedValueSet(registry.queryDatasetTypes(components=True)).names, 

542 ) 

543 # Use a pattern that can match either parent or components. Again, 

544 # components are only returned if components=True. 

545 self.assertEqual({"bias"}, NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"))).names) 

546 self.assertEqual( 

547 {"bias"}, NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"), components=False)).names 

548 ) 

549 self.assertLess( 

550 {"bias", "bias.wcs"}, 

551 NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"), components=True)).names, 

552 ) 

553 # This pattern matches only a component. In this case we also return 

554 # that component dataset type if components=None. 

555 self.assertEqual( 

556 {"bias.wcs"}, NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"))).names 

557 ) 

558 self.assertEqual( 

559 set(), 

560 NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"), components=False)).names, 

561 ) 

562 self.assertEqual( 

563 {"bias.wcs"}, 

564 NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"), components=True)).names, 

565 ) 

566 # Add a dataset type using a StorageClass that we'll then remove; check 

567 # that this does not affect our ability to query for dataset types 

568 # (though it will warn). 

569 tempStorageClass = StorageClass( 

570 name="TempStorageClass", 

571 components={"data", registry.storageClasses.getStorageClass("StructuredDataDict")}, 

572 ) 

573 registry.storageClasses.registerStorageClass(tempStorageClass) 

574 datasetType = DatasetType( 

575 "temporary", 

576 dimensions=["instrument"], 

577 storageClass=tempStorageClass, 

578 universe=registry.dimensions, 

579 ) 

580 registry.registerDatasetType(datasetType) 

581 registry.storageClasses._unregisterStorageClass(tempStorageClass.name) 

582 datasetType._storageClass = None 

583 del tempStorageClass 

584 # Querying for all dataset types, including components, should include 

585 # at least all non-component dataset types (and I don't want to 

586 # enumerate all of the Exposure components for bias and flat here). 

587 with self.assertLogs("lsst.daf.butler.registries", logging.WARN) as cm: 

588 everything = NamedValueSet(registry.queryDatasetTypes(components=True)) 

589 self.assertIn("TempStorageClass", cm.output[0]) 

590 self.assertLess({"bias", "flat", "temporary"}, everything.names) 

591 # It should not include "temporary.columns", because we tried to remove 

592 # the storage class that would tell it about that. So if the next line 

593 # fails (i.e. "temporary.columns" _is_ in everything.names), it means 

594 # this part of the test isn't doing anything, because the _unregister 

595 # call about isn't simulating the real-life case we want it to 

596 # simulate, in which different versions of daf_butler in entirely 

597 # different Python processes interact with the same repo. 

598 self.assertNotIn("temporary.data", everything.names) 

599 # Query for dataset types that start with "temp". This should again 

600 # not include the component, and also not fail. 

601 with self.assertLogs("lsst.daf.butler.registries", logging.WARN) as cm: 

602 startsWithTemp = NamedValueSet(registry.queryDatasetTypes(re.compile("temp.*"))) 

603 self.assertIn("TempStorageClass", cm.output[0]) 

604 self.assertEqual({"temporary"}, startsWithTemp.names) 

605 

606 def testComponentLookups(self): 

607 """Test searching for component datasets via their parents.""" 

608 registry = self.makeRegistry() 

609 self.loadData(registry, "base.yaml") 

610 self.loadData(registry, "datasets.yaml") 

611 # Test getting the child dataset type (which does still exist in the 

612 # Registry), and check for consistency with 

613 # DatasetRef.makeComponentRef. 

614 collection = "imported_g" 

615 parentType = registry.getDatasetType("bias") 

616 childType = registry.getDatasetType("bias.wcs") 

617 parentRefResolved = registry.findDataset( 

618 parentType, collections=collection, instrument="Cam1", detector=1 

619 ) 

620 self.assertIsInstance(parentRefResolved, DatasetRef) 

621 self.assertEqual(childType, parentRefResolved.makeComponentRef("wcs").datasetType) 

622 # Search for a single dataset with findDataset. 

623 childRef1 = registry.findDataset("bias.wcs", collections=collection, dataId=parentRefResolved.dataId) 

624 self.assertEqual(childRef1, parentRefResolved.makeComponentRef("wcs")) 

625 # Search for detector data IDs constrained by component dataset 

626 # existence with queryDataIds. 

627 dataIds = registry.queryDataIds( 

628 ["detector"], 

629 datasets=["bias.wcs"], 

630 collections=collection, 

631 ).toSet() 

632 self.assertEqual( 

633 dataIds, 

634 DataCoordinateSet( 

635 { 

636 DataCoordinate.standardize(instrument="Cam1", detector=d, graph=parentType.dimensions) 

637 for d in (1, 2, 3) 

638 }, 

639 parentType.dimensions, 

640 ), 

641 ) 

642 # Search for multiple datasets of a single type with queryDatasets. 

643 childRefs2 = set( 

644 registry.queryDatasets( 

645 "bias.wcs", 

646 collections=collection, 

647 ) 

648 ) 

649 self.assertEqual( 

650 {ref.unresolved() for ref in childRefs2}, {DatasetRef(childType, dataId) for dataId in dataIds} 

651 ) 

652 

653 def testCollections(self): 

654 """Tests for registry methods that manage collections.""" 

655 registry = self.makeRegistry() 

656 self.loadData(registry, "base.yaml") 

657 self.loadData(registry, "datasets.yaml") 

658 run1 = "imported_g" 

659 run2 = "imported_r" 

660 # Test setting a collection docstring after it has been created. 

661 registry.setCollectionDocumentation(run1, "doc for run1") 

662 self.assertEqual(registry.getCollectionDocumentation(run1), "doc for run1") 

663 registry.setCollectionDocumentation(run1, None) 

664 self.assertIsNone(registry.getCollectionDocumentation(run1)) 

665 datasetType = "bias" 

666 # Find some datasets via their run's collection. 

667 dataId1 = {"instrument": "Cam1", "detector": 1} 

668 ref1 = registry.findDataset(datasetType, dataId1, collections=run1) 

669 self.assertIsNotNone(ref1) 

670 dataId2 = {"instrument": "Cam1", "detector": 2} 

671 ref2 = registry.findDataset(datasetType, dataId2, collections=run1) 

672 self.assertIsNotNone(ref2) 

673 # Associate those into a new collection, then look for them there. 

674 tag1 = "tag1" 

675 registry.registerCollection(tag1, type=CollectionType.TAGGED, doc="doc for tag1") 

676 # Check that we can query for old and new collections by type. 

677 self.assertEqual(set(registry.queryCollections(collectionTypes=CollectionType.RUN)), {run1, run2}) 

678 self.assertEqual( 

679 set(registry.queryCollections(collectionTypes={CollectionType.TAGGED, CollectionType.RUN})), 

680 {tag1, run1, run2}, 

681 ) 

682 self.assertEqual(registry.getCollectionDocumentation(tag1), "doc for tag1") 

683 registry.associate(tag1, [ref1, ref2]) 

684 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1) 

685 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

686 # Disassociate one and verify that we can't it there anymore... 

687 registry.disassociate(tag1, [ref1]) 

688 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=tag1)) 

689 # ...but we can still find ref2 in tag1, and ref1 in the run. 

690 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run1), ref1) 

691 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

692 collections = set(registry.queryCollections()) 

693 self.assertEqual(collections, {run1, run2, tag1}) 

694 # Associate both refs into tag1 again; ref2 is already there, but that 

695 # should be a harmless no-op. 

696 registry.associate(tag1, [ref1, ref2]) 

697 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1) 

698 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

699 # Get a different dataset (from a different run) that has the same 

700 # dataset type and data ID as ref2. 

701 ref2b = registry.findDataset(datasetType, dataId2, collections=run2) 

702 self.assertNotEqual(ref2, ref2b) 

703 # Attempting to associate that into tag1 should be an error. 

704 with self.assertRaises(ConflictingDefinitionError): 

705 registry.associate(tag1, [ref2b]) 

706 # That error shouldn't have messed up what we had before. 

707 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1) 

708 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

709 # Attempt to associate the conflicting dataset again, this time with 

710 # a dataset that isn't in the collection and won't cause a conflict. 

711 # Should also fail without modifying anything. 

712 dataId3 = {"instrument": "Cam1", "detector": 3} 

713 ref3 = registry.findDataset(datasetType, dataId3, collections=run1) 

714 with self.assertRaises(ConflictingDefinitionError): 

715 registry.associate(tag1, [ref3, ref2b]) 

716 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1) 

717 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

718 self.assertIsNone(registry.findDataset(datasetType, dataId3, collections=tag1)) 

719 # Register a chained collection that searches [tag1, run2] 

720 chain1 = "chain1" 

721 registry.registerCollection(chain1, type=CollectionType.CHAINED) 

722 self.assertIs(registry.getCollectionType(chain1), CollectionType.CHAINED) 

723 # Chained collection exists, but has no collections in it. 

724 self.assertFalse(registry.getCollectionChain(chain1)) 

725 # If we query for all collections, we should get the chained collection 

726 # only if we don't ask to flatten it (i.e. yield only its children). 

727 self.assertEqual(set(registry.queryCollections(flattenChains=False)), {tag1, run1, run2, chain1}) 

728 self.assertEqual(set(registry.queryCollections(flattenChains=True)), {tag1, run1, run2}) 

729 # Attempt to set its child collections to something circular; that 

730 # should fail. 

731 with self.assertRaises(ValueError): 

732 registry.setCollectionChain(chain1, [tag1, chain1]) 

733 # Add the child collections. 

734 registry.setCollectionChain(chain1, [tag1, run2]) 

735 self.assertEqual(list(registry.getCollectionChain(chain1)), [tag1, run2]) 

736 # Searching for dataId1 or dataId2 in the chain should return ref1 and 

737 # ref2, because both are in tag1. 

738 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain1), ref1) 

739 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain1), ref2) 

740 # Now disassociate ref2 from tag1. The search (for bias) with 

741 # dataId2 in chain1 should then: 

742 # 1. not find it in tag1 

743 # 2. find a different dataset in run2 

744 registry.disassociate(tag1, [ref2]) 

745 ref2b = registry.findDataset(datasetType, dataId2, collections=chain1) 

746 self.assertNotEqual(ref2b, ref2) 

747 self.assertEqual(ref2b, registry.findDataset(datasetType, dataId2, collections=run2)) 

748 # Define a new chain so we can test recursive chains. 

749 chain2 = "chain2" 

750 registry.registerCollection(chain2, type=CollectionType.CHAINED) 

751 registry.setCollectionChain(chain2, [run2, chain1]) 

752 # Query for collections matching a regex. 

753 self.assertCountEqual( 

754 list(registry.queryCollections(re.compile("imported_."), flattenChains=False)), 

755 ["imported_r", "imported_g"], 

756 ) 

757 # Query for collections matching a regex or an explicit str. 

758 self.assertCountEqual( 

759 list(registry.queryCollections([re.compile("imported_."), "chain1"], flattenChains=False)), 

760 ["imported_r", "imported_g", "chain1"], 

761 ) 

762 # Search for bias with dataId1 should find it via tag1 in chain2, 

763 # recursing, because is not in run1. 

764 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=run2)) 

765 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain2), ref1) 

766 # Search for bias with dataId2 should find it in run2 (ref2b). 

767 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain2), ref2b) 

768 # Search for a flat that is in run2. That should not be found 

769 # at the front of chain2, because of the restriction to bias 

770 # on run2 there, but it should be found in at the end of chain1. 

771 dataId4 = {"instrument": "Cam1", "detector": 3, "physical_filter": "Cam1-R2"} 

772 ref4 = registry.findDataset("flat", dataId4, collections=run2) 

773 self.assertIsNotNone(ref4) 

774 self.assertEqual(ref4, registry.findDataset("flat", dataId4, collections=chain2)) 

775 # Deleting a collection that's part of a CHAINED collection is not 

776 # allowed, and is exception-safe. 

777 with self.assertRaises(Exception): 

778 registry.removeCollection(run2) 

779 self.assertEqual(registry.getCollectionType(run2), CollectionType.RUN) 

780 with self.assertRaises(Exception): 

781 registry.removeCollection(chain1) 

782 self.assertEqual(registry.getCollectionType(chain1), CollectionType.CHAINED) 

783 # Actually remove chain2, test that it's gone by asking for its type. 

784 registry.removeCollection(chain2) 

785 with self.assertRaises(MissingCollectionError): 

786 registry.getCollectionType(chain2) 

787 # Actually remove run2 and chain1, which should work now. 

788 registry.removeCollection(chain1) 

789 registry.removeCollection(run2) 

790 with self.assertRaises(MissingCollectionError): 

791 registry.getCollectionType(run2) 

792 with self.assertRaises(MissingCollectionError): 

793 registry.getCollectionType(chain1) 

794 # Remove tag1 as well, just to test that we can remove TAGGED 

795 # collections. 

796 registry.removeCollection(tag1) 

797 with self.assertRaises(MissingCollectionError): 

798 registry.getCollectionType(tag1) 

799 

800 def testCollectionChainFlatten(self): 

801 """Test that Registry.setCollectionChain obeys its 'flatten' option.""" 

802 registry = self.makeRegistry() 

803 registry.registerCollection("inner", CollectionType.CHAINED) 

804 registry.registerCollection("innermost", CollectionType.RUN) 

805 registry.setCollectionChain("inner", ["innermost"]) 

806 registry.registerCollection("outer", CollectionType.CHAINED) 

807 registry.setCollectionChain("outer", ["inner"], flatten=False) 

808 self.assertEqual(list(registry.getCollectionChain("outer")), ["inner"]) 

809 registry.setCollectionChain("outer", ["inner"], flatten=True) 

810 self.assertEqual(list(registry.getCollectionChain("outer")), ["innermost"]) 

811 

812 def testBasicTransaction(self): 

813 """Test that all operations within a single transaction block are 

814 rolled back if an exception propagates out of the block. 

815 """ 

816 registry = self.makeRegistry() 

817 storageClass = StorageClass("testDatasetType") 

818 registry.storageClasses.registerStorageClass(storageClass) 

819 with registry.transaction(): 

820 registry.insertDimensionData("instrument", {"name": "Cam1", "class_name": "A"}) 

821 with self.assertRaises(ValueError): 

822 with registry.transaction(): 

823 registry.insertDimensionData("instrument", {"name": "Cam2"}) 

824 raise ValueError("Oops, something went wrong") 

825 # Cam1 should exist 

826 self.assertEqual(registry.expandDataId(instrument="Cam1").records["instrument"].class_name, "A") 

827 # But Cam2 and Cam3 should both not exist 

828 with self.assertRaises(LookupError): 

829 registry.expandDataId(instrument="Cam2") 

830 with self.assertRaises(LookupError): 

831 registry.expandDataId(instrument="Cam3") 

832 

833 def testNestedTransaction(self): 

834 """Test that operations within a transaction block are not rolled back 

835 if an exception propagates out of an inner transaction block and is 

836 then caught. 

837 """ 

838 registry = self.makeRegistry() 

839 dimension = registry.dimensions["instrument"] 

840 dataId1 = {"instrument": "DummyCam"} 

841 dataId2 = {"instrument": "DummyCam2"} 

842 checkpointReached = False 

843 with registry.transaction(): 

844 # This should be added and (ultimately) committed. 

845 registry.insertDimensionData(dimension, dataId1) 

846 with self.assertRaises(sqlalchemy.exc.IntegrityError): 

847 with registry.transaction(savepoint=True): 

848 # This does not conflict, and should succeed (but not 

849 # be committed). 

850 registry.insertDimensionData(dimension, dataId2) 

851 checkpointReached = True 

852 # This should conflict and raise, triggerring a rollback 

853 # of the previous insertion within the same transaction 

854 # context, but not the original insertion in the outer 

855 # block. 

856 registry.insertDimensionData(dimension, dataId1) 

857 self.assertTrue(checkpointReached) 

858 self.assertIsNotNone(registry.expandDataId(dataId1, graph=dimension.graph)) 

859 with self.assertRaises(LookupError): 

860 registry.expandDataId(dataId2, graph=dimension.graph) 

861 

862 def testInstrumentDimensions(self): 

863 """Test queries involving only instrument dimensions, with no joins to 

864 skymap.""" 

865 registry = self.makeRegistry() 

866 

867 # need a bunch of dimensions and datasets for test 

868 registry.insertDimensionData( 

869 "instrument", dict(name="DummyCam", visit_max=25, exposure_max=300, detector_max=6) 

870 ) 

871 registry.insertDimensionData( 

872 "physical_filter", 

873 dict(instrument="DummyCam", name="dummy_r", band="r"), 

874 dict(instrument="DummyCam", name="dummy_i", band="i"), 

875 ) 

876 registry.insertDimensionData( 

877 "detector", *[dict(instrument="DummyCam", id=i, full_name=str(i)) for i in range(1, 6)] 

878 ) 

879 registry.insertDimensionData( 

880 "visit_system", 

881 dict(instrument="DummyCam", id=1, name="default"), 

882 ) 

883 registry.insertDimensionData( 

884 "visit", 

885 dict(instrument="DummyCam", id=10, name="ten", physical_filter="dummy_i", visit_system=1), 

886 dict(instrument="DummyCam", id=11, name="eleven", physical_filter="dummy_r", visit_system=1), 

887 dict(instrument="DummyCam", id=20, name="twelve", physical_filter="dummy_r", visit_system=1), 

888 ) 

889 registry.insertDimensionData( 

890 "exposure", 

891 dict(instrument="DummyCam", id=100, obs_id="100", physical_filter="dummy_i"), 

892 dict(instrument="DummyCam", id=101, obs_id="101", physical_filter="dummy_i"), 

893 dict(instrument="DummyCam", id=110, obs_id="110", physical_filter="dummy_r"), 

894 dict(instrument="DummyCam", id=111, obs_id="111", physical_filter="dummy_r"), 

895 dict(instrument="DummyCam", id=200, obs_id="200", physical_filter="dummy_r"), 

896 dict(instrument="DummyCam", id=201, obs_id="201", physical_filter="dummy_r"), 

897 ) 

898 registry.insertDimensionData( 

899 "visit_definition", 

900 dict(instrument="DummyCam", exposure=100, visit_system=1, visit=10), 

901 dict(instrument="DummyCam", exposure=101, visit_system=1, visit=10), 

902 dict(instrument="DummyCam", exposure=110, visit_system=1, visit=11), 

903 dict(instrument="DummyCam", exposure=111, visit_system=1, visit=11), 

904 dict(instrument="DummyCam", exposure=200, visit_system=1, visit=20), 

905 dict(instrument="DummyCam", exposure=201, visit_system=1, visit=20), 

906 ) 

907 # dataset types 

908 run1 = "test1_r" 

909 run2 = "test2_r" 

910 tagged2 = "test2_t" 

911 registry.registerRun(run1) 

912 registry.registerRun(run2) 

913 registry.registerCollection(tagged2) 

914 storageClass = StorageClass("testDataset") 

915 registry.storageClasses.registerStorageClass(storageClass) 

916 rawType = DatasetType( 

917 name="RAW", 

918 dimensions=registry.dimensions.extract(("instrument", "exposure", "detector")), 

919 storageClass=storageClass, 

920 ) 

921 registry.registerDatasetType(rawType) 

922 calexpType = DatasetType( 

923 name="CALEXP", 

924 dimensions=registry.dimensions.extract(("instrument", "visit", "detector")), 

925 storageClass=storageClass, 

926 ) 

927 registry.registerDatasetType(calexpType) 

928 

929 # add pre-existing datasets 

930 for exposure in (100, 101, 110, 111): 

931 for detector in (1, 2, 3): 

932 # note that only 3 of 5 detectors have datasets 

933 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector) 

934 (ref,) = registry.insertDatasets(rawType, dataIds=[dataId], run=run1) 

935 # exposures 100 and 101 appear in both run1 and tagged2. 

936 # 100 has different datasets in the different collections 

937 # 101 has the same dataset in both collections. 

938 if exposure == 100: 

939 (ref,) = registry.insertDatasets(rawType, dataIds=[dataId], run=run2) 

940 if exposure in (100, 101): 

941 registry.associate(tagged2, [ref]) 

942 # Add pre-existing datasets to tagged2. 

943 for exposure in (200, 201): 

944 for detector in (3, 4, 5): 

945 # note that only 3 of 5 detectors have datasets 

946 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector) 

947 (ref,) = registry.insertDatasets(rawType, dataIds=[dataId], run=run2) 

948 registry.associate(tagged2, [ref]) 

949 

950 dimensions = DimensionGraph( 

951 registry.dimensions, dimensions=(rawType.dimensions.required | calexpType.dimensions.required) 

952 ) 

953 # Test that single dim string works as well as list of str 

954 rows = registry.queryDataIds("visit", datasets=rawType, collections=run1).expanded().toSet() 

955 rowsI = registry.queryDataIds(["visit"], datasets=rawType, collections=run1).expanded().toSet() 

956 self.assertEqual(rows, rowsI) 

957 # with empty expression 

958 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1).expanded().toSet() 

959 self.assertEqual(len(rows), 4 * 3) # 4 exposures times 3 detectors 

960 for dataId in rows: 

961 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit")) 

962 packer1 = registry.dimensions.makePacker("visit_detector", dataId) 

963 packer2 = registry.dimensions.makePacker("exposure_detector", dataId) 

964 self.assertEqual( 

965 packer1.unpack(packer1.pack(dataId)), 

966 DataCoordinate.standardize(dataId, graph=packer1.dimensions), 

967 ) 

968 self.assertEqual( 

969 packer2.unpack(packer2.pack(dataId)), 

970 DataCoordinate.standardize(dataId, graph=packer2.dimensions), 

971 ) 

972 self.assertNotEqual(packer1.pack(dataId), packer2.pack(dataId)) 

973 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101, 110, 111)) 

974 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 11)) 

975 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3)) 

976 

977 # second collection 

978 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=tagged2).toSet() 

979 self.assertEqual(len(rows), 4 * 3) # 4 exposures times 3 detectors 

980 for dataId in rows: 

981 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit")) 

982 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101, 200, 201)) 

983 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 20)) 

984 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3, 4, 5)) 

985 

986 # with two input datasets 

987 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=[run1, tagged2]).toSet() 

988 self.assertEqual(len(set(rows)), 6 * 3) # 6 exposures times 3 detectors; set needed to de-dupe 

989 for dataId in rows: 

990 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit")) 

991 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101, 110, 111, 200, 201)) 

992 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 11, 20)) 

993 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3, 4, 5)) 

994 

995 # limit to single visit 

996 rows = registry.queryDataIds( 

997 dimensions, datasets=rawType, collections=run1, where="visit = 10", instrument="DummyCam" 

998 ).toSet() 

999 self.assertEqual(len(rows), 2 * 3) # 2 exposures times 3 detectors 

1000 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101)) 

1001 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10,)) 

1002 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3)) 

1003 

1004 # more limiting expression, using link names instead of Table.column 

1005 rows = registry.queryDataIds( 

1006 dimensions, 

1007 datasets=rawType, 

1008 collections=run1, 

1009 where="visit = 10 and detector > 1 and 'DummyCam'=instrument", 

1010 ).toSet() 

1011 self.assertEqual(len(rows), 2 * 2) # 2 exposures times 2 detectors 

1012 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101)) 

1013 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10,)) 

1014 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (2, 3)) 

1015 

1016 # queryDataIds with only one of `datasets` and `collections` is an 

1017 # error. 

1018 with self.assertRaises(TypeError): 

1019 registry.queryDataIds(dimensions, datasets=rawType) 

1020 with self.assertRaises(TypeError): 

1021 registry.queryDataIds(dimensions, collections=run1) 

1022 

1023 # expression excludes everything 

1024 rows = registry.queryDataIds( 

1025 dimensions, datasets=rawType, collections=run1, where="visit > 1000", instrument="DummyCam" 

1026 ).toSet() 

1027 self.assertEqual(len(rows), 0) 

1028 

1029 # Selecting by physical_filter, this is not in the dimensions, but it 

1030 # is a part of the full expression so it should work too. 

1031 rows = registry.queryDataIds( 

1032 dimensions, 

1033 datasets=rawType, 

1034 collections=run1, 

1035 where="physical_filter = 'dummy_r'", 

1036 instrument="DummyCam", 

1037 ).toSet() 

1038 self.assertEqual(len(rows), 2 * 3) # 2 exposures times 3 detectors 

1039 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (110, 111)) 

1040 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (11,)) 

1041 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3)) 

1042 

1043 def testSkyMapDimensions(self): 

1044 """Tests involving only skymap dimensions, no joins to instrument.""" 

1045 registry = self.makeRegistry() 

1046 

1047 # need a bunch of dimensions and datasets for test, we want 

1048 # "band" in the test so also have to add physical_filter 

1049 # dimensions 

1050 registry.insertDimensionData("instrument", dict(instrument="DummyCam")) 

1051 registry.insertDimensionData( 

1052 "physical_filter", 

1053 dict(instrument="DummyCam", name="dummy_r", band="r"), 

1054 dict(instrument="DummyCam", name="dummy_i", band="i"), 

1055 ) 

1056 registry.insertDimensionData("skymap", dict(name="DummyMap", hash="sha!".encode("utf8"))) 

1057 for tract in range(10): 

1058 registry.insertDimensionData("tract", dict(skymap="DummyMap", id=tract)) 

1059 registry.insertDimensionData( 

1060 "patch", 

1061 *[dict(skymap="DummyMap", tract=tract, id=patch, cell_x=0, cell_y=0) for patch in range(10)], 

1062 ) 

1063 

1064 # dataset types 

1065 run = "test" 

1066 registry.registerRun(run) 

1067 storageClass = StorageClass("testDataset") 

1068 registry.storageClasses.registerStorageClass(storageClass) 

1069 calexpType = DatasetType( 

1070 name="deepCoadd_calexp", 

1071 dimensions=registry.dimensions.extract(("skymap", "tract", "patch", "band")), 

1072 storageClass=storageClass, 

1073 ) 

1074 registry.registerDatasetType(calexpType) 

1075 mergeType = DatasetType( 

1076 name="deepCoadd_mergeDet", 

1077 dimensions=registry.dimensions.extract(("skymap", "tract", "patch")), 

1078 storageClass=storageClass, 

1079 ) 

1080 registry.registerDatasetType(mergeType) 

1081 measType = DatasetType( 

1082 name="deepCoadd_meas", 

1083 dimensions=registry.dimensions.extract(("skymap", "tract", "patch", "band")), 

1084 storageClass=storageClass, 

1085 ) 

1086 registry.registerDatasetType(measType) 

1087 

1088 dimensions = DimensionGraph( 

1089 registry.dimensions, 

1090 dimensions=( 

1091 calexpType.dimensions.required | mergeType.dimensions.required | measType.dimensions.required 

1092 ), 

1093 ) 

1094 

1095 # add pre-existing datasets 

1096 for tract in (1, 3, 5): 

1097 for patch in (2, 4, 6, 7): 

1098 dataId = dict(skymap="DummyMap", tract=tract, patch=patch) 

1099 registry.insertDatasets(mergeType, dataIds=[dataId], run=run) 

1100 for aFilter in ("i", "r"): 

1101 dataId = dict(skymap="DummyMap", tract=tract, patch=patch, band=aFilter) 

1102 registry.insertDatasets(calexpType, dataIds=[dataId], run=run) 

1103 

1104 # with empty expression 

1105 rows = registry.queryDataIds(dimensions, datasets=[calexpType, mergeType], collections=run).toSet() 

1106 self.assertEqual(len(rows), 3 * 4 * 2) # 4 tracts x 4 patches x 2 filters 

1107 for dataId in rows: 

1108 self.assertCountEqual(dataId.keys(), ("skymap", "tract", "patch", "band")) 

1109 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 3, 5)) 

1110 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 4, 6, 7)) 

1111 self.assertCountEqual(set(dataId["band"] for dataId in rows), ("i", "r")) 

1112 

1113 # limit to 2 tracts and 2 patches 

1114 rows = registry.queryDataIds( 

1115 dimensions, 

1116 datasets=[calexpType, mergeType], 

1117 collections=run, 

1118 where="tract IN (1, 5) AND patch IN (2, 7)", 

1119 skymap="DummyMap", 

1120 ).toSet() 

1121 self.assertEqual(len(rows), 2 * 2 * 2) # 2 tracts x 2 patches x 2 filters 

1122 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 5)) 

1123 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 7)) 

1124 self.assertCountEqual(set(dataId["band"] for dataId in rows), ("i", "r")) 

1125 

1126 # limit to single filter 

1127 rows = registry.queryDataIds( 

1128 dimensions, datasets=[calexpType, mergeType], collections=run, where="band = 'i'" 

1129 ).toSet() 

1130 self.assertEqual(len(rows), 3 * 4 * 1) # 4 tracts x 4 patches x 2 filters 

1131 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 3, 5)) 

1132 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 4, 6, 7)) 

1133 self.assertCountEqual(set(dataId["band"] for dataId in rows), ("i",)) 

1134 

1135 # expression excludes everything, specifying non-existing skymap is 

1136 # not a fatal error, it's operator error 

1137 rows = registry.queryDataIds( 

1138 dimensions, datasets=[calexpType, mergeType], collections=run, where="skymap = 'Mars'" 

1139 ).toSet() 

1140 self.assertEqual(len(rows), 0) 

1141 

1142 def testSpatialJoin(self): 

1143 """Test queries that involve spatial overlap joins.""" 

1144 registry = self.makeRegistry() 

1145 self.loadData(registry, "hsc-rc2-subset.yaml") 

1146 

1147 # Dictionary of spatial DatabaseDimensionElements, keyed by the name of 

1148 # the TopologicalFamily they belong to. We'll relate all elements in 

1149 # each family to all of the elements in each other family. 

1150 families = defaultdict(set) 

1151 # Dictionary of {element.name: {dataId: region}}. 

1152 regions = {} 

1153 for element in registry.dimensions.getDatabaseElements(): 

1154 if element.spatial is not None: 

1155 families[element.spatial.name].add(element) 

1156 regions[element.name] = { 

1157 record.dataId: record.region for record in registry.queryDimensionRecords(element) 

1158 } 

1159 

1160 # If this check fails, it's not necessarily a problem - it may just be 

1161 # a reasonable change to the default dimension definitions - but the 

1162 # test below depends on there being more than one family to do anything 

1163 # useful. 

1164 self.assertEqual(len(families), 2) 

1165 

1166 # Overlap DatabaseDimensionElements with each other. 

1167 for family1, family2 in itertools.combinations(families, 2): 

1168 for element1, element2 in itertools.product(families[family1], families[family2]): 

1169 graph = DimensionGraph.union(element1.graph, element2.graph) 

1170 # Construct expected set of overlapping data IDs via a 

1171 # brute-force comparison of the regions we've already fetched. 

1172 expected = { 

1173 DataCoordinate.standardize({**dataId1.byName(), **dataId2.byName()}, graph=graph) 

1174 for (dataId1, region1), (dataId2, region2) in itertools.product( 

1175 regions[element1.name].items(), regions[element2.name].items() 

1176 ) 

1177 if not region1.isDisjointFrom(region2) 

1178 } 

1179 self.assertGreater(len(expected), 2, msg="Test that we aren't just comparing empty sets.") 

1180 queried = set(registry.queryDataIds(graph)) 

1181 self.assertEqual(expected, queried) 

1182 

1183 # Overlap each DatabaseDimensionElement with the commonSkyPix system. 

1184 commonSkyPix = registry.dimensions.commonSkyPix 

1185 for elementName, regions in regions.items(): 

1186 graph = DimensionGraph.union(registry.dimensions[elementName].graph, commonSkyPix.graph) 

1187 expected = set() 

1188 for dataId, region in regions.items(): 

1189 for begin, end in commonSkyPix.pixelization.envelope(region): 

1190 expected.update( 

1191 DataCoordinate.standardize({commonSkyPix.name: index, **dataId.byName()}, graph=graph) 

1192 for index in range(begin, end) 

1193 ) 

1194 self.assertGreater(len(expected), 2, msg="Test that we aren't just comparing empty sets.") 

1195 queried = set(registry.queryDataIds(graph)) 

1196 self.assertEqual(expected, queried) 

1197 

1198 def testAbstractQuery(self): 

1199 """Test that we can run a query that just lists the known 

1200 bands. This is tricky because band is 

1201 backed by a query against physical_filter. 

1202 """ 

1203 registry = self.makeRegistry() 

1204 registry.insertDimensionData("instrument", dict(name="DummyCam")) 

1205 registry.insertDimensionData( 

1206 "physical_filter", 

1207 dict(instrument="DummyCam", name="dummy_i", band="i"), 

1208 dict(instrument="DummyCam", name="dummy_i2", band="i"), 

1209 dict(instrument="DummyCam", name="dummy_r", band="r"), 

1210 ) 

1211 rows = registry.queryDataIds(["band"]).toSet() 

1212 self.assertCountEqual( 

1213 rows, 

1214 [ 

1215 DataCoordinate.standardize(band="i", universe=registry.dimensions), 

1216 DataCoordinate.standardize(band="r", universe=registry.dimensions), 

1217 ], 

1218 ) 

1219 

1220 def testAttributeManager(self): 

1221 """Test basic functionality of attribute manager.""" 

1222 # number of attributes with schema versions in a fresh database, 

1223 # 6 managers with 3 records per manager, plus config for dimensions 

1224 VERSION_COUNT = 6 * 3 + 1 

1225 

1226 registry = self.makeRegistry() 

1227 attributes = registry._managers.attributes 

1228 

1229 # check what get() returns for non-existing key 

1230 self.assertIsNone(attributes.get("attr")) 

1231 self.assertEqual(attributes.get("attr", ""), "") 

1232 self.assertEqual(attributes.get("attr", "Value"), "Value") 

1233 self.assertEqual(len(list(attributes.items())), VERSION_COUNT) 

1234 

1235 # cannot store empty key or value 

1236 with self.assertRaises(ValueError): 

1237 attributes.set("", "value") 

1238 with self.assertRaises(ValueError): 

1239 attributes.set("attr", "") 

1240 

1241 # set value of non-existing key 

1242 attributes.set("attr", "value") 

1243 self.assertEqual(len(list(attributes.items())), VERSION_COUNT + 1) 

1244 self.assertEqual(attributes.get("attr"), "value") 

1245 

1246 # update value of existing key 

1247 with self.assertRaises(ButlerAttributeExistsError): 

1248 attributes.set("attr", "value2") 

1249 

1250 attributes.set("attr", "value2", force=True) 

1251 self.assertEqual(len(list(attributes.items())), VERSION_COUNT + 1) 

1252 self.assertEqual(attributes.get("attr"), "value2") 

1253 

1254 # delete existing key 

1255 self.assertTrue(attributes.delete("attr")) 

1256 self.assertEqual(len(list(attributes.items())), VERSION_COUNT) 

1257 

1258 # delete non-existing key 

1259 self.assertFalse(attributes.delete("non-attr")) 

1260 

1261 # store bunch of keys and get the list back 

1262 data = [ 

1263 ("version.core", "1.2.3"), 

1264 ("version.dimensions", "3.2.1"), 

1265 ("config.managers.opaque", "ByNameOpaqueTableStorageManager"), 

1266 ] 

1267 for key, value in data: 

1268 attributes.set(key, value) 

1269 items = dict(attributes.items()) 

1270 for key, value in data: 

1271 self.assertEqual(items[key], value) 

1272 

1273 def testQueryDatasetsDeduplication(self): 

1274 """Test that the findFirst option to queryDatasets selects datasets 

1275 from collections in the order given". 

1276 """ 

1277 registry = self.makeRegistry() 

1278 self.loadData(registry, "base.yaml") 

1279 self.loadData(registry, "datasets.yaml") 

1280 self.assertCountEqual( 

1281 list(registry.queryDatasets("bias", collections=["imported_g", "imported_r"])), 

1282 [ 

1283 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"), 

1284 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g"), 

1285 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g"), 

1286 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r"), 

1287 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r"), 

1288 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

1289 ], 

1290 ) 

1291 self.assertCountEqual( 

1292 list(registry.queryDatasets("bias", collections=["imported_g", "imported_r"], findFirst=True)), 

1293 [ 

1294 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"), 

1295 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g"), 

1296 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g"), 

1297 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

1298 ], 

1299 ) 

1300 self.assertCountEqual( 

1301 list(registry.queryDatasets("bias", collections=["imported_r", "imported_g"], findFirst=True)), 

1302 [ 

1303 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"), 

1304 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r"), 

1305 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r"), 

1306 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

1307 ], 

1308 ) 

1309 

1310 def testQueryResults(self): 

1311 """Test querying for data IDs and then manipulating the QueryResults 

1312 object returned to perform other queries. 

1313 """ 

1314 registry = self.makeRegistry() 

1315 self.loadData(registry, "base.yaml") 

1316 self.loadData(registry, "datasets.yaml") 

1317 bias = registry.getDatasetType("bias") 

1318 flat = registry.getDatasetType("flat") 

1319 # Obtain expected results from methods other than those we're testing 

1320 # here. That includes: 

1321 # - the dimensions of the data IDs we want to query: 

1322 expectedGraph = DimensionGraph(registry.dimensions, names=["detector", "physical_filter"]) 

1323 # - the dimensions of some other data IDs we'll extract from that: 

1324 expectedSubsetGraph = DimensionGraph(registry.dimensions, names=["detector"]) 

1325 # - the data IDs we expect to obtain from the first queries: 

1326 expectedDataIds = DataCoordinateSet( 

1327 { 

1328 DataCoordinate.standardize( 

1329 instrument="Cam1", detector=d, physical_filter=p, universe=registry.dimensions 

1330 ) 

1331 for d, p in itertools.product({1, 2, 3}, {"Cam1-G", "Cam1-R1", "Cam1-R2"}) 

1332 }, 

1333 graph=expectedGraph, 

1334 hasFull=False, 

1335 hasRecords=False, 

1336 ) 

1337 # - the flat datasets we expect to find from those data IDs, in just 

1338 # one collection (so deduplication is irrelevant): 

1339 expectedFlats = [ 

1340 registry.findDataset( 

1341 flat, instrument="Cam1", detector=1, physical_filter="Cam1-R1", collections="imported_r" 

1342 ), 

1343 registry.findDataset( 

1344 flat, instrument="Cam1", detector=2, physical_filter="Cam1-R1", collections="imported_r" 

1345 ), 

1346 registry.findDataset( 

1347 flat, instrument="Cam1", detector=3, physical_filter="Cam1-R2", collections="imported_r" 

1348 ), 

1349 ] 

1350 # - the data IDs we expect to extract from that: 

1351 expectedSubsetDataIds = expectedDataIds.subset(expectedSubsetGraph) 

1352 # - the bias datasets we expect to find from those data IDs, after we 

1353 # subset-out the physical_filter dimension, both with duplicates: 

1354 expectedAllBiases = [ 

1355 registry.findDataset(bias, instrument="Cam1", detector=1, collections="imported_g"), 

1356 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_g"), 

1357 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_g"), 

1358 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_r"), 

1359 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_r"), 

1360 ] 

1361 # - ...and without duplicates: 

1362 expectedDeduplicatedBiases = [ 

1363 registry.findDataset(bias, instrument="Cam1", detector=1, collections="imported_g"), 

1364 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_r"), 

1365 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_r"), 

1366 ] 

1367 # Test against those expected results, using a "lazy" query for the 

1368 # data IDs (which re-executes that query each time we use it to do 

1369 # something new). 

1370 dataIds = registry.queryDataIds( 

1371 ["detector", "physical_filter"], 

1372 where="detector.purpose = 'SCIENCE'", # this rejects detector=4 

1373 instrument="Cam1", 

1374 ) 

1375 self.assertEqual(dataIds.graph, expectedGraph) 

1376 self.assertEqual(dataIds.toSet(), expectedDataIds) 

1377 self.assertCountEqual( 

1378 list( 

1379 dataIds.findDatasets( 

1380 flat, 

1381 collections=["imported_r"], 

1382 ) 

1383 ), 

1384 expectedFlats, 

1385 ) 

1386 subsetDataIds = dataIds.subset(expectedSubsetGraph, unique=True) 

1387 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph) 

1388 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds) 

1389 self.assertCountEqual( 

1390 list(subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=False)), 

1391 expectedAllBiases, 

1392 ) 

1393 self.assertCountEqual( 

1394 list(subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=True)), 

1395 expectedDeduplicatedBiases, 

1396 ) 

1397 # Materialize the bias dataset queries (only) by putting the results 

1398 # into temporary tables, then repeat those tests. 

1399 with subsetDataIds.findDatasets( 

1400 bias, collections=["imported_r", "imported_g"], findFirst=False 

1401 ).materialize() as biases: 

1402 self.assertCountEqual(list(biases), expectedAllBiases) 

1403 with subsetDataIds.findDatasets( 

1404 bias, collections=["imported_r", "imported_g"], findFirst=True 

1405 ).materialize() as biases: 

1406 self.assertCountEqual(list(biases), expectedDeduplicatedBiases) 

1407 # Materialize the data ID subset query, but not the dataset queries. 

1408 with subsetDataIds.materialize() as subsetDataIds: 

1409 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph) 

1410 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds) 

1411 self.assertCountEqual( 

1412 list( 

1413 subsetDataIds.findDatasets( 

1414 bias, collections=["imported_r", "imported_g"], findFirst=False 

1415 ) 

1416 ), 

1417 expectedAllBiases, 

1418 ) 

1419 self.assertCountEqual( 

1420 list( 

1421 subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=True) 

1422 ), 

1423 expectedDeduplicatedBiases, 

1424 ) 

1425 # Materialize the dataset queries, too. 

1426 with subsetDataIds.findDatasets( 

1427 bias, collections=["imported_r", "imported_g"], findFirst=False 

1428 ).materialize() as biases: 

1429 self.assertCountEqual(list(biases), expectedAllBiases) 

1430 with subsetDataIds.findDatasets( 

1431 bias, collections=["imported_r", "imported_g"], findFirst=True 

1432 ).materialize() as biases: 

1433 self.assertCountEqual(list(biases), expectedDeduplicatedBiases) 

1434 # Materialize the original query, but none of the follow-up queries. 

1435 with dataIds.materialize() as dataIds: 

1436 self.assertEqual(dataIds.graph, expectedGraph) 

1437 self.assertEqual(dataIds.toSet(), expectedDataIds) 

1438 self.assertCountEqual( 

1439 list( 

1440 dataIds.findDatasets( 

1441 flat, 

1442 collections=["imported_r"], 

1443 ) 

1444 ), 

1445 expectedFlats, 

1446 ) 

1447 subsetDataIds = dataIds.subset(expectedSubsetGraph, unique=True) 

1448 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph) 

1449 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds) 

1450 self.assertCountEqual( 

1451 list( 

1452 subsetDataIds.findDatasets( 

1453 bias, collections=["imported_r", "imported_g"], findFirst=False 

1454 ) 

1455 ), 

1456 expectedAllBiases, 

1457 ) 

1458 self.assertCountEqual( 

1459 list( 

1460 subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=True) 

1461 ), 

1462 expectedDeduplicatedBiases, 

1463 ) 

1464 # Materialize just the bias dataset queries. 

1465 with subsetDataIds.findDatasets( 

1466 bias, collections=["imported_r", "imported_g"], findFirst=False 

1467 ).materialize() as biases: 

1468 self.assertCountEqual(list(biases), expectedAllBiases) 

1469 with subsetDataIds.findDatasets( 

1470 bias, collections=["imported_r", "imported_g"], findFirst=True 

1471 ).materialize() as biases: 

1472 self.assertCountEqual(list(biases), expectedDeduplicatedBiases) 

1473 # Materialize the subset data ID query, but not the dataset 

1474 # queries. 

1475 with subsetDataIds.materialize() as subsetDataIds: 

1476 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph) 

1477 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds) 

1478 self.assertCountEqual( 

1479 list( 

1480 subsetDataIds.findDatasets( 

1481 bias, collections=["imported_r", "imported_g"], findFirst=False 

1482 ) 

1483 ), 

1484 expectedAllBiases, 

1485 ) 

1486 self.assertCountEqual( 

1487 list( 

1488 subsetDataIds.findDatasets( 

1489 bias, collections=["imported_r", "imported_g"], findFirst=True 

1490 ) 

1491 ), 

1492 expectedDeduplicatedBiases, 

1493 ) 

1494 # Materialize the bias dataset queries, too, so now we're 

1495 # materializing every single step. 

1496 with subsetDataIds.findDatasets( 

1497 bias, collections=["imported_r", "imported_g"], findFirst=False 

1498 ).materialize() as biases: 

1499 self.assertCountEqual(list(biases), expectedAllBiases) 

1500 with subsetDataIds.findDatasets( 

1501 bias, collections=["imported_r", "imported_g"], findFirst=True 

1502 ).materialize() as biases: 

1503 self.assertCountEqual(list(biases), expectedDeduplicatedBiases) 

1504 

1505 def testEmptyDimensionsQueries(self): 

1506 """Test Query and QueryResults objects in the case where there are no 

1507 dimensions. 

1508 """ 

1509 # Set up test data: one dataset type, two runs, one dataset in each. 

1510 registry = self.makeRegistry() 

1511 self.loadData(registry, "base.yaml") 

1512 schema = DatasetType("schema", dimensions=registry.dimensions.empty, storageClass="Catalog") 

1513 registry.registerDatasetType(schema) 

1514 dataId = DataCoordinate.makeEmpty(registry.dimensions) 

1515 run1 = "run1" 

1516 run2 = "run2" 

1517 registry.registerRun(run1) 

1518 registry.registerRun(run2) 

1519 (dataset1,) = registry.insertDatasets(schema, dataIds=[dataId], run=run1) 

1520 (dataset2,) = registry.insertDatasets(schema, dataIds=[dataId], run=run2) 

1521 # Query directly for both of the datasets, and each one, one at a time. 

1522 self.checkQueryResults( 

1523 registry.queryDatasets(schema, collections=[run1, run2], findFirst=False), [dataset1, dataset2] 

1524 ) 

1525 self.checkQueryResults( 

1526 registry.queryDatasets(schema, collections=[run1, run2], findFirst=True), 

1527 [dataset1], 

1528 ) 

1529 self.checkQueryResults( 

1530 registry.queryDatasets(schema, collections=[run2, run1], findFirst=True), 

1531 [dataset2], 

1532 ) 

1533 # Query for data IDs with no dimensions. 

1534 dataIds = registry.queryDataIds([]) 

1535 self.checkQueryResults(dataIds, [dataId]) 

1536 # Use queried data IDs to find the datasets. 

1537 self.checkQueryResults( 

1538 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False), 

1539 [dataset1, dataset2], 

1540 ) 

1541 self.checkQueryResults( 

1542 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), 

1543 [dataset1], 

1544 ) 

1545 self.checkQueryResults( 

1546 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), 

1547 [dataset2], 

1548 ) 

1549 # Now materialize the data ID query results and repeat those tests. 

1550 with dataIds.materialize() as dataIds: 

1551 self.checkQueryResults(dataIds, [dataId]) 

1552 self.checkQueryResults( 

1553 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), 

1554 [dataset1], 

1555 ) 

1556 self.checkQueryResults( 

1557 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), 

1558 [dataset2], 

1559 ) 

1560 # Query for non-empty data IDs, then subset that to get the empty one. 

1561 # Repeat the above tests starting from that. 

1562 dataIds = registry.queryDataIds(["instrument"]).subset(registry.dimensions.empty, unique=True) 

1563 self.checkQueryResults(dataIds, [dataId]) 

1564 self.checkQueryResults( 

1565 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False), 

1566 [dataset1, dataset2], 

1567 ) 

1568 self.checkQueryResults( 

1569 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), 

1570 [dataset1], 

1571 ) 

1572 self.checkQueryResults( 

1573 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), 

1574 [dataset2], 

1575 ) 

1576 with dataIds.materialize() as dataIds: 

1577 self.checkQueryResults(dataIds, [dataId]) 

1578 self.checkQueryResults( 

1579 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False), 

1580 [dataset1, dataset2], 

1581 ) 

1582 self.checkQueryResults( 

1583 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), 

1584 [dataset1], 

1585 ) 

1586 self.checkQueryResults( 

1587 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), 

1588 [dataset2], 

1589 ) 

1590 # Query for non-empty data IDs, then materialize, then subset to get 

1591 # the empty one. Repeat again. 

1592 with registry.queryDataIds(["instrument"]).materialize() as nonEmptyDataIds: 

1593 dataIds = nonEmptyDataIds.subset(registry.dimensions.empty, unique=True) 

1594 self.checkQueryResults(dataIds, [dataId]) 

1595 self.checkQueryResults( 

1596 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False), 

1597 [dataset1, dataset2], 

1598 ) 

1599 self.checkQueryResults( 

1600 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), 

1601 [dataset1], 

1602 ) 

1603 self.checkQueryResults( 

1604 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), 

1605 [dataset2], 

1606 ) 

1607 with dataIds.materialize() as dataIds: 

1608 self.checkQueryResults(dataIds, [dataId]) 

1609 self.checkQueryResults( 

1610 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False), 

1611 [dataset1, dataset2], 

1612 ) 

1613 self.checkQueryResults( 

1614 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), 

1615 [dataset1], 

1616 ) 

1617 self.checkQueryResults( 

1618 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), 

1619 [dataset2], 

1620 ) 

1621 

1622 def testDimensionDataModifications(self): 

1623 """Test that modifying dimension records via: 

1624 syncDimensionData(..., update=True) and 

1625 insertDimensionData(..., replace=True) works as expected, even in the 

1626 presence of datasets using those dimensions and spatial overlap 

1627 relationships. 

1628 """ 

1629 

1630 def unpack_range_set(ranges: lsst.sphgeom.RangeSet) -> Iterator[int]: 

1631 """Unpack a sphgeom.RangeSet into the integers it contains.""" 

1632 for begin, end in ranges: 

1633 yield from range(begin, end) 

1634 

1635 def range_set_hull( 

1636 ranges: lsst.sphgeom.RangeSet, 

1637 pixelization: lsst.sphgeom.HtmPixelization, 

1638 ) -> lsst.sphgeom.ConvexPolygon: 

1639 """Create a ConvexPolygon hull of the region defined by a set of 

1640 HTM pixelization index ranges. 

1641 """ 

1642 points = [] 

1643 for index in unpack_range_set(ranges): 

1644 points.extend(pixelization.triangle(index).getVertices()) 

1645 return lsst.sphgeom.ConvexPolygon(points) 

1646 

1647 # Use HTM to set up an initial parent region (one arbitrary trixel) 

1648 # and four child regions (the trixels within the parent at the next 

1649 # level. We'll use the parent as a tract/visit region and the children 

1650 # as its patch/visit_detector regions. 

1651 registry = self.makeRegistry() 

1652 htm6 = registry.dimensions.skypix["htm"][6].pixelization 

1653 commonSkyPix = registry.dimensions.commonSkyPix.pixelization 

1654 index = 12288 

1655 child_ranges_small = lsst.sphgeom.RangeSet(index).scaled(4) 

1656 assert htm6.universe().contains(child_ranges_small) 

1657 child_regions_small = [htm6.triangle(i) for i in unpack_range_set(child_ranges_small)] 

1658 parent_region_small = lsst.sphgeom.ConvexPolygon( 

1659 list(itertools.chain.from_iterable(c.getVertices() for c in child_regions_small)) 

1660 ) 

1661 assert all(parent_region_small.contains(c) for c in child_regions_small) 

1662 # Make a larger version of each child region, defined to be the set of 

1663 # htm6 trixels that overlap the original's bounding circle. Make a new 

1664 # parent that's the convex hull of the new children. 

1665 child_regions_large = [ 

1666 range_set_hull(htm6.envelope(c.getBoundingCircle()), htm6) for c in child_regions_small 

1667 ] 

1668 assert all(large.contains(small) for large, small in zip(child_regions_large, child_regions_small)) 

1669 parent_region_large = lsst.sphgeom.ConvexPolygon( 

1670 list(itertools.chain.from_iterable(c.getVertices() for c in child_regions_large)) 

1671 ) 

1672 assert all(parent_region_large.contains(c) for c in child_regions_large) 

1673 assert parent_region_large.contains(parent_region_small) 

1674 assert not parent_region_small.contains(parent_region_large) 

1675 assert not all(parent_region_small.contains(c) for c in child_regions_large) 

1676 # Find some commonSkyPix indices that overlap the large regions but not 

1677 # overlap the small regions. We use commonSkyPix here to make sure the 

1678 # real tests later involve what's in the database, not just post-query 

1679 # region filtering. 

1680 child_difference_indices = [] 

1681 for large, small in zip(child_regions_large, child_regions_small): 

1682 difference = list(unpack_range_set(commonSkyPix.envelope(large) - commonSkyPix.envelope(small))) 

1683 assert difference, "if this is empty, we can't test anything useful with these regions" 

1684 assert all( 

1685 not commonSkyPix.triangle(d).isDisjointFrom(large) 

1686 and commonSkyPix.triangle(d).isDisjointFrom(small) 

1687 for d in difference 

1688 ) 

1689 child_difference_indices.append(difference) 

1690 parent_difference_indices = list( 

1691 unpack_range_set( 

1692 commonSkyPix.envelope(parent_region_large) - commonSkyPix.envelope(parent_region_small) 

1693 ) 

1694 ) 

1695 assert parent_difference_indices, "if this is empty, we can't test anything useful with these regions" 

1696 assert all( 

1697 ( 

1698 not commonSkyPix.triangle(d).isDisjointFrom(parent_region_large) 

1699 and commonSkyPix.triangle(d).isDisjointFrom(parent_region_small) 

1700 ) 

1701 for d in parent_difference_indices 

1702 ) 

1703 # Now that we've finally got those regions, we'll insert the large ones 

1704 # as tract/patch dimension records. 

1705 skymap_name = "testing_v1" 

1706 registry.insertDimensionData( 

1707 "skymap", 

1708 { 

1709 "name": skymap_name, 

1710 "hash": bytes([42]), 

1711 "tract_max": 1, 

1712 "patch_nx_max": 2, 

1713 "patch_ny_max": 2, 

1714 }, 

1715 ) 

1716 registry.insertDimensionData("tract", {"skymap": skymap_name, "id": 0, "region": parent_region_large}) 

1717 registry.insertDimensionData( 

1718 "patch", 

1719 *[ 

1720 {"skymap": skymap_name, "tract": 0, "id": n, "cell_x": n % 2, "cell_y": n // 2, "region": c} 

1721 for n, c in enumerate(child_regions_large) 

1722 ], 

1723 ) 

1724 # Add at dataset that uses these dimensions to make sure that modifying 

1725 # them doesn't disrupt foreign keys (need to make sure DB doesn't 

1726 # implement insert with replace=True as delete-then-insert). 

1727 dataset_type = DatasetType( 

1728 "coadd", 

1729 dimensions=["tract", "patch"], 

1730 universe=registry.dimensions, 

1731 storageClass="Exposure", 

1732 ) 

1733 registry.registerDatasetType(dataset_type) 

1734 registry.registerCollection("the_run", CollectionType.RUN) 

1735 registry.insertDatasets( 

1736 dataset_type, 

1737 [{"skymap": skymap_name, "tract": 0, "patch": 2}], 

1738 run="the_run", 

1739 ) 

1740 # Query for tracts and patches that overlap some "difference" htm9 

1741 # pixels; there should be overlaps, because the database has 

1742 # the "large" suite of regions. 

1743 self.assertEqual( 

1744 {0}, 

1745 { 

1746 data_id["tract"] 

1747 for data_id in registry.queryDataIds( 

1748 ["tract"], 

1749 skymap=skymap_name, 

1750 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]}, 

1751 ) 

1752 }, 

1753 ) 

1754 for patch_id, patch_difference_indices in enumerate(child_difference_indices): 

1755 self.assertIn( 

1756 patch_id, 

1757 { 

1758 data_id["patch"] 

1759 for data_id in registry.queryDataIds( 

1760 ["patch"], 

1761 skymap=skymap_name, 

1762 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]}, 

1763 ) 

1764 }, 

1765 ) 

1766 # Use sync to update the tract region and insert to update the patch 

1767 # regions, to the "small" suite. 

1768 updated = registry.syncDimensionData( 

1769 "tract", 

1770 {"skymap": skymap_name, "id": 0, "region": parent_region_small}, 

1771 update=True, 

1772 ) 

1773 self.assertEqual(updated, {"region": parent_region_large}) 

1774 registry.insertDimensionData( 

1775 "patch", 

1776 *[ 

1777 {"skymap": skymap_name, "tract": 0, "id": n, "cell_x": n % 2, "cell_y": n // 2, "region": c} 

1778 for n, c in enumerate(child_regions_small) 

1779 ], 

1780 replace=True, 

1781 ) 

1782 # Query again; there now should be no such overlaps, because the 

1783 # database has the "small" suite of regions. 

1784 self.assertFalse( 

1785 set( 

1786 registry.queryDataIds( 

1787 ["tract"], 

1788 skymap=skymap_name, 

1789 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]}, 

1790 ) 

1791 ) 

1792 ) 

1793 for patch_id, patch_difference_indices in enumerate(child_difference_indices): 

1794 self.assertNotIn( 

1795 patch_id, 

1796 { 

1797 data_id["patch"] 

1798 for data_id in registry.queryDataIds( 

1799 ["patch"], 

1800 skymap=skymap_name, 

1801 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]}, 

1802 ) 

1803 }, 

1804 ) 

1805 # Update back to the large regions and query one more time. 

1806 updated = registry.syncDimensionData( 

1807 "tract", 

1808 {"skymap": skymap_name, "id": 0, "region": parent_region_large}, 

1809 update=True, 

1810 ) 

1811 self.assertEqual(updated, {"region": parent_region_small}) 

1812 registry.insertDimensionData( 

1813 "patch", 

1814 *[ 

1815 {"skymap": skymap_name, "tract": 0, "id": n, "cell_x": n % 2, "cell_y": n // 2, "region": c} 

1816 for n, c in enumerate(child_regions_large) 

1817 ], 

1818 replace=True, 

1819 ) 

1820 self.assertEqual( 

1821 {0}, 

1822 { 

1823 data_id["tract"] 

1824 for data_id in registry.queryDataIds( 

1825 ["tract"], 

1826 skymap=skymap_name, 

1827 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]}, 

1828 ) 

1829 }, 

1830 ) 

1831 for patch_id, patch_difference_indices in enumerate(child_difference_indices): 

1832 self.assertIn( 

1833 patch_id, 

1834 { 

1835 data_id["patch"] 

1836 for data_id in registry.queryDataIds( 

1837 ["patch"], 

1838 skymap=skymap_name, 

1839 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]}, 

1840 ) 

1841 }, 

1842 ) 

1843 

1844 def testCalibrationCollections(self): 

1845 """Test operations on `~CollectionType.CALIBRATION` collections, 

1846 including `Registry.certify`, `Registry.decertify`, and 

1847 `Registry.findDataset`. 

1848 """ 

1849 # Setup - make a Registry, fill it with some datasets in 

1850 # non-calibration collections. 

1851 registry = self.makeRegistry() 

1852 self.loadData(registry, "base.yaml") 

1853 self.loadData(registry, "datasets.yaml") 

1854 # Set up some timestamps. 

1855 t1 = astropy.time.Time("2020-01-01T01:00:00", format="isot", scale="tai") 

1856 t2 = astropy.time.Time("2020-01-01T02:00:00", format="isot", scale="tai") 

1857 t3 = astropy.time.Time("2020-01-01T03:00:00", format="isot", scale="tai") 

1858 t4 = astropy.time.Time("2020-01-01T04:00:00", format="isot", scale="tai") 

1859 t5 = astropy.time.Time("2020-01-01T05:00:00", format="isot", scale="tai") 

1860 allTimespans = [ 

1861 Timespan(a, b) for a, b in itertools.combinations([None, t1, t2, t3, t4, t5, None], r=2) 

1862 ] 

1863 # Get references to some datasets. 

1864 bias2a = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g") 

1865 bias3a = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g") 

1866 bias2b = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r") 

1867 bias3b = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r") 

1868 # Register the main calibration collection we'll be working with. 

1869 collection = "Cam1/calibs/default" 

1870 registry.registerCollection(collection, type=CollectionType.CALIBRATION) 

1871 # Cannot associate into a calibration collection (no timespan). 

1872 with self.assertRaises(TypeError): 

1873 registry.associate(collection, [bias2a]) 

1874 # Certify 2a dataset with [t2, t4) validity. 

1875 registry.certify(collection, [bias2a], Timespan(begin=t2, end=t4)) 

1876 # We should not be able to certify 2b with anything overlapping that 

1877 # window. 

1878 with self.assertRaises(ConflictingDefinitionError): 

1879 registry.certify(collection, [bias2b], Timespan(begin=None, end=t3)) 

1880 with self.assertRaises(ConflictingDefinitionError): 

1881 registry.certify(collection, [bias2b], Timespan(begin=None, end=t5)) 

1882 with self.assertRaises(ConflictingDefinitionError): 

1883 registry.certify(collection, [bias2b], Timespan(begin=t1, end=t3)) 

1884 with self.assertRaises(ConflictingDefinitionError): 

1885 registry.certify(collection, [bias2b], Timespan(begin=t1, end=t5)) 

1886 with self.assertRaises(ConflictingDefinitionError): 

1887 registry.certify(collection, [bias2b], Timespan(begin=t1, end=None)) 

1888 with self.assertRaises(ConflictingDefinitionError): 

1889 registry.certify(collection, [bias2b], Timespan(begin=t2, end=t3)) 

1890 with self.assertRaises(ConflictingDefinitionError): 

1891 registry.certify(collection, [bias2b], Timespan(begin=t2, end=t5)) 

1892 with self.assertRaises(ConflictingDefinitionError): 

1893 registry.certify(collection, [bias2b], Timespan(begin=t2, end=None)) 

1894 # We should be able to certify 3a with a range overlapping that window, 

1895 # because it's for a different detector. 

1896 # We'll certify 3a over [t1, t3). 

1897 registry.certify(collection, [bias3a], Timespan(begin=t1, end=t3)) 

1898 # Now we'll certify 2b and 3b together over [t4, ∞). 

1899 registry.certify(collection, [bias2b, bias3b], Timespan(begin=t4, end=None)) 

1900 

1901 # Fetch all associations and check that they are what we expect. 

1902 self.assertCountEqual( 

1903 list( 

1904 registry.queryDatasetAssociations( 

1905 "bias", 

1906 collections=[collection, "imported_g", "imported_r"], 

1907 ) 

1908 ), 

1909 [ 

1910 DatasetAssociation( 

1911 ref=registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"), 

1912 collection="imported_g", 

1913 timespan=None, 

1914 ), 

1915 DatasetAssociation( 

1916 ref=registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

1917 collection="imported_r", 

1918 timespan=None, 

1919 ), 

1920 DatasetAssociation(ref=bias2a, collection="imported_g", timespan=None), 

1921 DatasetAssociation(ref=bias3a, collection="imported_g", timespan=None), 

1922 DatasetAssociation(ref=bias2b, collection="imported_r", timespan=None), 

1923 DatasetAssociation(ref=bias3b, collection="imported_r", timespan=None), 

1924 DatasetAssociation(ref=bias2a, collection=collection, timespan=Timespan(begin=t2, end=t4)), 

1925 DatasetAssociation(ref=bias3a, collection=collection, timespan=Timespan(begin=t1, end=t3)), 

1926 DatasetAssociation(ref=bias2b, collection=collection, timespan=Timespan(begin=t4, end=None)), 

1927 DatasetAssociation(ref=bias3b, collection=collection, timespan=Timespan(begin=t4, end=None)), 

1928 ], 

1929 ) 

1930 

1931 class Ambiguous: 

1932 """Tag class to denote lookups that should be ambiguous.""" 

1933 

1934 pass 

1935 

1936 def assertLookup( 

1937 detector: int, timespan: Timespan, expected: Optional[Union[DatasetRef, Type[Ambiguous]]] 

1938 ) -> None: 

1939 """Local function that asserts that a bias lookup returns the given 

1940 expected result. 

1941 """ 

1942 if expected is Ambiguous: 

1943 with self.assertRaises(RuntimeError): 

1944 registry.findDataset( 

1945 "bias", 

1946 collections=collection, 

1947 instrument="Cam1", 

1948 detector=detector, 

1949 timespan=timespan, 

1950 ) 

1951 else: 

1952 self.assertEqual( 

1953 expected, 

1954 registry.findDataset( 

1955 "bias", 

1956 collections=collection, 

1957 instrument="Cam1", 

1958 detector=detector, 

1959 timespan=timespan, 

1960 ), 

1961 ) 

1962 

1963 # Systematically test lookups against expected results. 

1964 assertLookup(detector=2, timespan=Timespan(None, t1), expected=None) 

1965 assertLookup(detector=2, timespan=Timespan(None, t2), expected=None) 

1966 assertLookup(detector=2, timespan=Timespan(None, t3), expected=bias2a) 

1967 assertLookup(detector=2, timespan=Timespan(None, t4), expected=bias2a) 

1968 assertLookup(detector=2, timespan=Timespan(None, t5), expected=Ambiguous) 

1969 assertLookup(detector=2, timespan=Timespan(None, None), expected=Ambiguous) 

1970 assertLookup(detector=2, timespan=Timespan(t1, t2), expected=None) 

1971 assertLookup(detector=2, timespan=Timespan(t1, t3), expected=bias2a) 

1972 assertLookup(detector=2, timespan=Timespan(t1, t4), expected=bias2a) 

1973 assertLookup(detector=2, timespan=Timespan(t1, t5), expected=Ambiguous) 

1974 assertLookup(detector=2, timespan=Timespan(t1, None), expected=Ambiguous) 

1975 assertLookup(detector=2, timespan=Timespan(t2, t3), expected=bias2a) 

1976 assertLookup(detector=2, timespan=Timespan(t2, t4), expected=bias2a) 

1977 assertLookup(detector=2, timespan=Timespan(t2, t5), expected=Ambiguous) 

1978 assertLookup(detector=2, timespan=Timespan(t2, None), expected=Ambiguous) 

1979 assertLookup(detector=2, timespan=Timespan(t3, t4), expected=bias2a) 

1980 assertLookup(detector=2, timespan=Timespan(t3, t5), expected=Ambiguous) 

1981 assertLookup(detector=2, timespan=Timespan(t3, None), expected=Ambiguous) 

1982 assertLookup(detector=2, timespan=Timespan(t4, t5), expected=bias2b) 

1983 assertLookup(detector=2, timespan=Timespan(t4, None), expected=bias2b) 

1984 assertLookup(detector=2, timespan=Timespan(t5, None), expected=bias2b) 

1985 assertLookup(detector=3, timespan=Timespan(None, t1), expected=None) 

1986 assertLookup(detector=3, timespan=Timespan(None, t2), expected=bias3a) 

1987 assertLookup(detector=3, timespan=Timespan(None, t3), expected=bias3a) 

1988 assertLookup(detector=3, timespan=Timespan(None, t4), expected=bias3a) 

1989 assertLookup(detector=3, timespan=Timespan(None, t5), expected=Ambiguous) 

1990 assertLookup(detector=3, timespan=Timespan(None, None), expected=Ambiguous) 

1991 assertLookup(detector=3, timespan=Timespan(t1, t2), expected=bias3a) 

1992 assertLookup(detector=3, timespan=Timespan(t1, t3), expected=bias3a) 

1993 assertLookup(detector=3, timespan=Timespan(t1, t4), expected=bias3a) 

1994 assertLookup(detector=3, timespan=Timespan(t1, t5), expected=Ambiguous) 

1995 assertLookup(detector=3, timespan=Timespan(t1, None), expected=Ambiguous) 

1996 assertLookup(detector=3, timespan=Timespan(t2, t3), expected=bias3a) 

1997 assertLookup(detector=3, timespan=Timespan(t2, t4), expected=bias3a) 

1998 assertLookup(detector=3, timespan=Timespan(t2, t5), expected=Ambiguous) 

1999 assertLookup(detector=3, timespan=Timespan(t2, None), expected=Ambiguous) 

2000 assertLookup(detector=3, timespan=Timespan(t3, t4), expected=None) 

2001 assertLookup(detector=3, timespan=Timespan(t3, t5), expected=bias3b) 

2002 assertLookup(detector=3, timespan=Timespan(t3, None), expected=bias3b) 

2003 assertLookup(detector=3, timespan=Timespan(t4, t5), expected=bias3b) 

2004 assertLookup(detector=3, timespan=Timespan(t4, None), expected=bias3b) 

2005 assertLookup(detector=3, timespan=Timespan(t5, None), expected=bias3b) 

2006 

2007 # Decertify [t3, t5) for all data IDs, and do test lookups again. 

2008 # This should truncate bias2a to [t2, t3), leave bias3a unchanged at 

2009 # [t1, t3), and truncate bias2b and bias3b to [t5, ∞). 

2010 registry.decertify(collection=collection, datasetType="bias", timespan=Timespan(t3, t5)) 

2011 assertLookup(detector=2, timespan=Timespan(None, t1), expected=None) 

2012 assertLookup(detector=2, timespan=Timespan(None, t2), expected=None) 

2013 assertLookup(detector=2, timespan=Timespan(None, t3), expected=bias2a) 

2014 assertLookup(detector=2, timespan=Timespan(None, t4), expected=bias2a) 

2015 assertLookup(detector=2, timespan=Timespan(None, t5), expected=bias2a) 

2016 assertLookup(detector=2, timespan=Timespan(None, None), expected=Ambiguous) 

2017 assertLookup(detector=2, timespan=Timespan(t1, t2), expected=None) 

2018 assertLookup(detector=2, timespan=Timespan(t1, t3), expected=bias2a) 

2019 assertLookup(detector=2, timespan=Timespan(t1, t4), expected=bias2a) 

2020 assertLookup(detector=2, timespan=Timespan(t1, t5), expected=bias2a) 

2021 assertLookup(detector=2, timespan=Timespan(t1, None), expected=Ambiguous) 

2022 assertLookup(detector=2, timespan=Timespan(t2, t3), expected=bias2a) 

2023 assertLookup(detector=2, timespan=Timespan(t2, t4), expected=bias2a) 

2024 assertLookup(detector=2, timespan=Timespan(t2, t5), expected=bias2a) 

2025 assertLookup(detector=2, timespan=Timespan(t2, None), expected=Ambiguous) 

2026 assertLookup(detector=2, timespan=Timespan(t3, t4), expected=None) 

2027 assertLookup(detector=2, timespan=Timespan(t3, t5), expected=None) 

2028 assertLookup(detector=2, timespan=Timespan(t3, None), expected=bias2b) 

2029 assertLookup(detector=2, timespan=Timespan(t4, t5), expected=None) 

2030 assertLookup(detector=2, timespan=Timespan(t4, None), expected=bias2b) 

2031 assertLookup(detector=2, timespan=Timespan(t5, None), expected=bias2b) 

2032 assertLookup(detector=3, timespan=Timespan(None, t1), expected=None) 

2033 assertLookup(detector=3, timespan=Timespan(None, t2), expected=bias3a) 

2034 assertLookup(detector=3, timespan=Timespan(None, t3), expected=bias3a) 

2035 assertLookup(detector=3, timespan=Timespan(None, t4), expected=bias3a) 

2036 assertLookup(detector=3, timespan=Timespan(None, t5), expected=bias3a) 

2037 assertLookup(detector=3, timespan=Timespan(None, None), expected=Ambiguous) 

2038 assertLookup(detector=3, timespan=Timespan(t1, t2), expected=bias3a) 

2039 assertLookup(detector=3, timespan=Timespan(t1, t3), expected=bias3a) 

2040 assertLookup(detector=3, timespan=Timespan(t1, t4), expected=bias3a) 

2041 assertLookup(detector=3, timespan=Timespan(t1, t5), expected=bias3a) 

2042 assertLookup(detector=3, timespan=Timespan(t1, None), expected=Ambiguous) 

2043 assertLookup(detector=3, timespan=Timespan(t2, t3), expected=bias3a) 

2044 assertLookup(detector=3, timespan=Timespan(t2, t4), expected=bias3a) 

2045 assertLookup(detector=3, timespan=Timespan(t2, t5), expected=bias3a) 

2046 assertLookup(detector=3, timespan=Timespan(t2, None), expected=Ambiguous) 

2047 assertLookup(detector=3, timespan=Timespan(t3, t4), expected=None) 

2048 assertLookup(detector=3, timespan=Timespan(t3, t5), expected=None) 

2049 assertLookup(detector=3, timespan=Timespan(t3, None), expected=bias3b) 

2050 assertLookup(detector=3, timespan=Timespan(t4, t5), expected=None) 

2051 assertLookup(detector=3, timespan=Timespan(t4, None), expected=bias3b) 

2052 assertLookup(detector=3, timespan=Timespan(t5, None), expected=bias3b) 

2053 

2054 # Decertify everything, this time with explicit data IDs, then check 

2055 # that no lookups succeed. 

2056 registry.decertify( 

2057 collection, 

2058 "bias", 

2059 Timespan(None, None), 

2060 dataIds=[ 

2061 dict(instrument="Cam1", detector=2), 

2062 dict(instrument="Cam1", detector=3), 

2063 ], 

2064 ) 

2065 for detector in (2, 3): 

2066 for timespan in allTimespans: 

2067 assertLookup(detector=detector, timespan=timespan, expected=None) 

2068 # Certify bias2a and bias3a over (-∞, ∞), check that all lookups return 

2069 # those. 

2070 registry.certify( 

2071 collection, 

2072 [bias2a, bias3a], 

2073 Timespan(None, None), 

2074 ) 

2075 for timespan in allTimespans: 

2076 assertLookup(detector=2, timespan=timespan, expected=bias2a) 

2077 assertLookup(detector=3, timespan=timespan, expected=bias3a) 

2078 # Decertify just bias2 over [t2, t4). 

2079 # This should split a single certification row into two (and leave the 

2080 # other existing row, for bias3a, alone). 

2081 registry.decertify( 

2082 collection, "bias", Timespan(t2, t4), dataIds=[dict(instrument="Cam1", detector=2)] 

2083 ) 

2084 for timespan in allTimespans: 

2085 assertLookup(detector=3, timespan=timespan, expected=bias3a) 

2086 overlapsBefore = timespan.overlaps(Timespan(None, t2)) 

2087 overlapsAfter = timespan.overlaps(Timespan(t4, None)) 

2088 if overlapsBefore and overlapsAfter: 

2089 expected = Ambiguous 

2090 elif overlapsBefore or overlapsAfter: 

2091 expected = bias2a 

2092 else: 

2093 expected = None 

2094 assertLookup(detector=2, timespan=timespan, expected=expected) 

2095 

2096 def testSkipCalibs(self): 

2097 """Test how queries handle skipping of calibration collections.""" 

2098 registry = self.makeRegistry() 

2099 self.loadData(registry, "base.yaml") 

2100 self.loadData(registry, "datasets.yaml") 

2101 

2102 coll_calib = "Cam1/calibs/default" 

2103 registry.registerCollection(coll_calib, type=CollectionType.CALIBRATION) 

2104 

2105 # Add all biases to the calibration collection. 

2106 # Without this, the logic that prunes dataset subqueries based on 

2107 # datasetType-collection summary information will fire before the logic 

2108 # we want to test below. This is a good thing (it avoids the dreaded 

2109 # NotImplementedError a bit more often) everywhere but here. 

2110 registry.certify(coll_calib, registry.queryDatasets("bias", collections=...), Timespan(None, None)) 

2111 

2112 coll_list = [coll_calib, "imported_g", "imported_r"] 

2113 chain = "Cam1/chain" 

2114 registry.registerCollection(chain, type=CollectionType.CHAINED) 

2115 registry.setCollectionChain(chain, coll_list) 

2116 

2117 # explicit list will raise if findFirst=True or there are temporal 

2118 # dimensions 

2119 with self.assertRaises(NotImplementedError): 

2120 registry.queryDatasets("bias", collections=coll_list, findFirst=True) 

2121 with self.assertRaises(NotImplementedError): 

2122 registry.queryDataIds( 

2123 ["instrument", "detector", "exposure"], datasets="bias", collections=coll_list 

2124 ).count() 

2125 

2126 # chain will skip 

2127 datasets = list(registry.queryDatasets("bias", collections=chain)) 

2128 self.assertGreater(len(datasets), 0) 

2129 

2130 dataIds = list(registry.queryDataIds(["instrument", "detector"], datasets="bias", collections=chain)) 

2131 self.assertGreater(len(dataIds), 0) 

2132 

2133 # glob will skip too 

2134 datasets = list(registry.queryDatasets("bias", collections="*d*")) 

2135 self.assertGreater(len(datasets), 0) 

2136 

2137 # regular expression will skip too 

2138 pattern = re.compile(".*") 

2139 datasets = list(registry.queryDatasets("bias", collections=pattern)) 

2140 self.assertGreater(len(datasets), 0) 

2141 

2142 # ellipsis should work as usual 

2143 datasets = list(registry.queryDatasets("bias", collections=...)) 

2144 self.assertGreater(len(datasets), 0) 

2145 

2146 # few tests with findFirst 

2147 datasets = list(registry.queryDatasets("bias", collections=chain, findFirst=True)) 

2148 self.assertGreater(len(datasets), 0) 

2149 

2150 def testIngestTimeQuery(self): 

2151 

2152 registry = self.makeRegistry() 

2153 self.loadData(registry, "base.yaml") 

2154 dt0 = datetime.utcnow() 

2155 self.loadData(registry, "datasets.yaml") 

2156 dt1 = datetime.utcnow() 

2157 

2158 datasets = list(registry.queryDatasets(..., collections=...)) 

2159 len0 = len(datasets) 

2160 self.assertGreater(len0, 0) 

2161 

2162 where = "ingest_date > T'2000-01-01'" 

2163 datasets = list(registry.queryDatasets(..., collections=..., where=where)) 

2164 len1 = len(datasets) 

2165 self.assertEqual(len0, len1) 

2166 

2167 # no one will ever use this piece of software in 30 years 

2168 where = "ingest_date > T'2050-01-01'" 

2169 datasets = list(registry.queryDatasets(..., collections=..., where=where)) 

2170 len2 = len(datasets) 

2171 self.assertEqual(len2, 0) 

2172 

2173 # Check more exact timing to make sure there is no 37 seconds offset 

2174 # (after fixing DM-30124). SQLite time precision is 1 second, make 

2175 # sure that we don't test with higher precision. 

2176 tests = [ 

2177 # format: (timestamp, operator, expected_len) 

2178 (dt0 - timedelta(seconds=1), ">", len0), 

2179 (dt0 - timedelta(seconds=1), "<", 0), 

2180 (dt1 + timedelta(seconds=1), "<", len0), 

2181 (dt1 + timedelta(seconds=1), ">", 0), 

2182 ] 

2183 for dt, op, expect_len in tests: 

2184 dt_str = dt.isoformat(sep=" ") 

2185 

2186 where = f"ingest_date {op} T'{dt_str}'" 

2187 datasets = list(registry.queryDatasets(..., collections=..., where=where)) 

2188 self.assertEqual(len(datasets), expect_len) 

2189 

2190 # same with bind using datetime or astropy Time 

2191 where = f"ingest_date {op} ingest_time" 

2192 datasets = list( 

2193 registry.queryDatasets(..., collections=..., where=where, bind={"ingest_time": dt}) 

2194 ) 

2195 self.assertEqual(len(datasets), expect_len) 

2196 

2197 dt_astropy = astropy.time.Time(dt, format="datetime") 

2198 datasets = list( 

2199 registry.queryDatasets(..., collections=..., where=where, bind={"ingest_time": dt_astropy}) 

2200 ) 

2201 self.assertEqual(len(datasets), expect_len) 

2202 

2203 def testTimespanQueries(self): 

2204 """Test query expressions involving timespans.""" 

2205 registry = self.makeRegistry() 

2206 self.loadData(registry, "hsc-rc2-subset.yaml") 

2207 # All exposures in the database; mapping from ID to timespan. 

2208 visits = {record.id: record.timespan for record in registry.queryDimensionRecords("visit")} 

2209 # Just those IDs, sorted (which is also temporal sorting, because HSC 

2210 # exposure IDs are monotonically increasing). 

2211 ids = sorted(visits.keys()) 

2212 self.assertGreater(len(ids), 20) 

2213 # Pick some quasi-random indexes into `ids` to play with. 

2214 i1 = int(len(ids) * 0.1) 

2215 i2 = int(len(ids) * 0.3) 

2216 i3 = int(len(ids) * 0.6) 

2217 i4 = int(len(ids) * 0.8) 

2218 # Extract some times from those: just before the beginning of i1 (which 

2219 # should be after the end of the exposure before), exactly the 

2220 # beginning of i2, just after the beginning of i3 (and before its end), 

2221 # and the exact end of i4. 

2222 t1 = visits[ids[i1]].begin - astropy.time.TimeDelta(1.0, format="sec") 

2223 self.assertGreater(t1, visits[ids[i1 - 1]].end) 

2224 t2 = visits[ids[i2]].begin 

2225 t3 = visits[ids[i3]].begin + astropy.time.TimeDelta(1.0, format="sec") 

2226 self.assertLess(t3, visits[ids[i3]].end) 

2227 t4 = visits[ids[i4]].end 

2228 # Make sure those are actually in order. 

2229 self.assertEqual([t1, t2, t3, t4], sorted([t4, t3, t2, t1])) 

2230 

2231 bind = { 

2232 "t1": t1, 

2233 "t2": t2, 

2234 "t3": t3, 

2235 "t4": t4, 

2236 "ts23": Timespan(t2, t3), 

2237 } 

2238 

2239 def query(where): 

2240 """Helper function that queries for visit data IDs and returns 

2241 results as a sorted, deduplicated list of visit IDs. 

2242 """ 

2243 return sorted( 

2244 { 

2245 dataId["visit"] 

2246 for dataId in registry.queryDataIds("visit", instrument="HSC", bind=bind, where=where) 

2247 } 

2248 ) 

2249 

2250 # Try a bunch of timespan queries, mixing up the bounds themselves, 

2251 # where they appear in the expression, and how we get the timespan into 

2252 # the expression. 

2253 

2254 # t1 is before the start of i1, so this should not include i1. 

2255 self.assertEqual(ids[:i1], query("visit.timespan OVERLAPS (null, t1)")) 

2256 # t2 is exactly at the start of i2, but ends are exclusive, so these 

2257 # should not include i2. 

2258 self.assertEqual(ids[i1:i2], query("(t1, t2) OVERLAPS visit.timespan")) 

2259 self.assertEqual(ids[:i2], query("visit.timespan < (t2, t4)")) 

2260 # t3 is in the middle of i3, so this should include i3. 

2261 self.assertEqual(ids[i2 : i3 + 1], query("visit.timespan OVERLAPS ts23")) 

2262 # This one should not include t3 by the same reasoning. 

2263 self.assertEqual(ids[i3 + 1 :], query("visit.timespan > (t1, t3)")) 

2264 # t4 is exactly at the end of i4, so this should include i4. 

2265 self.assertEqual(ids[i3 : i4 + 1], query(f"visit.timespan OVERLAPS (T'{t3.tai.isot}', t4)")) 

2266 # i4's upper bound of t4 is exclusive so this should not include t4. 

2267 self.assertEqual(ids[i4 + 1 :], query("visit.timespan OVERLAPS (t4, NULL)")) 

2268 

2269 # Now some timespan vs. time scalar queries. 

2270 self.assertEqual(ids[:i2], query("visit.timespan < t2")) 

2271 self.assertEqual(ids[:i2], query("t2 > visit.timespan")) 

2272 self.assertEqual(ids[i3 + 1 :], query("visit.timespan > t3")) 

2273 self.assertEqual(ids[i3 + 1 :], query("t3 < visit.timespan")) 

2274 self.assertEqual(ids[i3 : i3 + 1], query("visit.timespan OVERLAPS t3")) 

2275 self.assertEqual(ids[i3 : i3 + 1], query(f"T'{t3.tai.isot}' OVERLAPS visit.timespan")) 

2276 

2277 # Empty timespans should not overlap anything. 

2278 self.assertEqual([], query("visit.timespan OVERLAPS (t3, t2)")) 

2279 

2280 def testCollectionSummaries(self): 

2281 """Test recording and retrieval of collection summaries.""" 

2282 self.maxDiff = None 

2283 registry = self.makeRegistry() 

2284 # Importing datasets from yaml should go through the code path where 

2285 # we update collection summaries as we insert datasets. 

2286 self.loadData(registry, "base.yaml") 

2287 self.loadData(registry, "datasets.yaml") 

2288 flat = registry.getDatasetType("flat") 

2289 expected1 = CollectionSummary.makeEmpty(registry.dimensions) 

2290 expected1.datasetTypes.add(registry.getDatasetType("bias")) 

2291 expected1.datasetTypes.add(flat) 

2292 expected1.dimensions.update_extract( 

2293 DataCoordinate.standardize(instrument="Cam1", universe=registry.dimensions) 

2294 ) 

2295 self.assertEqual(registry.getCollectionSummary("imported_g"), expected1) 

2296 self.assertEqual(registry.getCollectionSummary("imported_r"), expected1) 

2297 # Create a chained collection with both of the imported runs; the 

2298 # summary should be the same, because it's a union with itself. 

2299 chain = "chain" 

2300 registry.registerCollection(chain, CollectionType.CHAINED) 

2301 registry.setCollectionChain(chain, ["imported_r", "imported_g"]) 

2302 self.assertEqual(registry.getCollectionSummary(chain), expected1) 

2303 # Associate flats only into a tagged collection and a calibration 

2304 # collection to check summaries of those. 

2305 tag = "tag" 

2306 registry.registerCollection(tag, CollectionType.TAGGED) 

2307 registry.associate(tag, registry.queryDatasets(flat, collections="imported_g")) 

2308 calibs = "calibs" 

2309 registry.registerCollection(calibs, CollectionType.CALIBRATION) 

2310 registry.certify( 

2311 calibs, registry.queryDatasets(flat, collections="imported_g"), timespan=Timespan(None, None) 

2312 ) 

2313 expected2 = expected1.copy() 

2314 expected2.datasetTypes.discard("bias") 

2315 self.assertEqual(registry.getCollectionSummary(tag), expected2) 

2316 self.assertEqual(registry.getCollectionSummary(calibs), expected2) 

2317 # Explicitly calling Registry.refresh() should load those same 

2318 # summaries, via a totally different code path. 

2319 registry.refresh() 

2320 self.assertEqual(registry.getCollectionSummary("imported_g"), expected1) 

2321 self.assertEqual(registry.getCollectionSummary("imported_r"), expected1) 

2322 self.assertEqual(registry.getCollectionSummary(tag), expected2) 

2323 self.assertEqual(registry.getCollectionSummary(calibs), expected2) 

2324 

2325 def testUnrelatedDimensionQueries(self): 

2326 """Test that WHERE expressions in queries can reference dimensions that 

2327 are not in the result set. 

2328 """ 

2329 registry = self.makeRegistry() 

2330 # There is no data to back this query, but it should still return 

2331 # zero records instead of raising. 

2332 self.assertFalse( 

2333 set( 

2334 registry.queryDataIds( 

2335 ["visit", "detector"], where="instrument='Cam1' AND skymap='not_here' AND tract=0" 

2336 ) 

2337 ), 

2338 ) 

2339 

2340 def testBindInQueryDatasets(self): 

2341 """Test that the bind parameter is correctly forwarded in 

2342 queryDatasets recursion. 

2343 """ 

2344 registry = self.makeRegistry() 

2345 # Importing datasets from yaml should go through the code path where 

2346 # we update collection summaries as we insert datasets. 

2347 self.loadData(registry, "base.yaml") 

2348 self.loadData(registry, "datasets.yaml") 

2349 self.assertEqual( 

2350 set(registry.queryDatasets("flat", band="r", collections=...)), 

2351 set(registry.queryDatasets("flat", where="band=my_band", bind={"my_band": "r"}, collections=...)), 

2352 ) 

2353 

2354 def testQueryResultSummaries(self): 

2355 """Test summary methods like `count`, `any`, and `explain_no_results` 

2356 on `DataCoordinateQueryResults` and `DatasetQueryResults` 

2357 """ 

2358 registry = self.makeRegistry() 

2359 self.loadData(registry, "base.yaml") 

2360 self.loadData(registry, "datasets.yaml") 

2361 self.loadData(registry, "spatial.yaml") 

2362 # Default test dataset has two collections, each with both flats and 

2363 # biases. Add a new collection with only biases. 

2364 registry.registerCollection("biases", CollectionType.TAGGED) 

2365 registry.associate("biases", registry.queryDatasets("bias", collections=["imported_g"])) 

2366 # First query yields two results, and involves no postprocessing. 

2367 query1 = registry.queryDataIds(["physical_filter"], band="r") 

2368 self.assertTrue(query1.any(execute=False, exact=False)) 

2369 self.assertTrue(query1.any(execute=True, exact=False)) 

2370 self.assertTrue(query1.any(execute=True, exact=True)) 

2371 self.assertEqual(query1.count(exact=False), 2) 

2372 self.assertEqual(query1.count(exact=True), 2) 

2373 self.assertFalse(list(query1.explain_no_results())) 

2374 # Second query should yield no results, but this isn't detectable 

2375 # unless we actually run a query. 

2376 query2 = registry.queryDataIds(["physical_filter"], band="h") 

2377 self.assertTrue(query2.any(execute=False, exact=False)) 

2378 self.assertFalse(query2.any(execute=True, exact=False)) 

2379 self.assertFalse(query2.any(execute=True, exact=True)) 

2380 self.assertEqual(query2.count(exact=False), 0) 

2381 self.assertEqual(query2.count(exact=True), 0) 

2382 self.assertFalse(list(query2.explain_no_results())) 

2383 # These queries yield no results due to various problems that can be 

2384 # spotted prior to execution, yielding helpful diagnostics. 

2385 for query, snippets in [ 

2386 ( 

2387 # Dataset type name doesn't match any existing dataset types. 

2388 registry.queryDatasets("nonexistent", collections=...), 

2389 ["nonexistent"], 

2390 ), 

2391 ( 

2392 # Dataset type name doesn't match any existing dataset types. 

2393 registry.queryDataIds(["detector"], datasets=["nonexistent"], collections=...), 

2394 ["nonexistent"], 

2395 ), 

2396 ( 

2397 # Dataset type object isn't registered. 

2398 registry.queryDatasets( 

2399 DatasetType( 

2400 "nonexistent", 

2401 dimensions=["instrument"], 

2402 universe=registry.dimensions, 

2403 storageClass="Image", 

2404 ), 

2405 collections=..., 

2406 ), 

2407 ["nonexistent"], 

2408 ), 

2409 ( 

2410 # No datasets of this type in this collection. 

2411 registry.queryDatasets("flat", collections=["biases"]), 

2412 ["flat", "biases"], 

2413 ), 

2414 ( 

2415 # No collections matching at all. 

2416 registry.queryDatasets("flat", collections=re.compile("potato.+")), 

2417 ["potato"], 

2418 ), 

2419 ]: 

2420 

2421 self.assertFalse(query.any(execute=False, exact=False)) 

2422 self.assertFalse(query.any(execute=True, exact=False)) 

2423 self.assertFalse(query.any(execute=True, exact=True)) 

2424 self.assertEqual(query.count(exact=False), 0) 

2425 self.assertEqual(query.count(exact=True), 0) 

2426 messages = list(query.explain_no_results()) 

2427 self.assertTrue(messages) 

2428 # Want all expected snippets to appear in at least one message. 

2429 self.assertTrue( 

2430 any( 

2431 all(snippet in message for snippet in snippets) for message in query.explain_no_results() 

2432 ), 

2433 messages, 

2434 ) 

2435 

2436 # These queries yield no results due to problems that can be identified 

2437 # by cheap follow-up queries, yielding helpful diagnostics. 

2438 for query, snippets in [ 

2439 ( 

2440 # No records for one of the involved dimensions. 

2441 registry.queryDataIds(["subfilter"]), 

2442 ["dimension records", "subfilter"], 

2443 ), 

2444 ]: 

2445 self.assertFalse(query.any(execute=True, exact=False)) 

2446 self.assertFalse(query.any(execute=True, exact=True)) 

2447 self.assertEqual(query.count(exact=True), 0) 

2448 messages = list(query.explain_no_results()) 

2449 self.assertTrue(messages) 

2450 # Want all expected snippets to appear in at least one message. 

2451 self.assertTrue( 

2452 any( 

2453 all(snippet in message for snippet in snippets) for message in query.explain_no_results() 

2454 ), 

2455 messages, 

2456 ) 

2457 

2458 # This query yields four overlaps in the database, but one is filtered 

2459 # out in postprocessing. The count queries aren't accurate because 

2460 # they don't account for duplication that happens due to an internal 

2461 # join against commonSkyPix. 

2462 query3 = registry.queryDataIds(["visit", "tract"], instrument="Cam1", skymap="SkyMap1") 

2463 self.assertEqual( 

2464 { 

2465 DataCoordinate.standardize( 

2466 instrument="Cam1", 

2467 skymap="SkyMap1", 

2468 visit=v, 

2469 tract=t, 

2470 universe=registry.dimensions, 

2471 ) 

2472 for v, t in [(1, 0), (2, 0), (2, 1)] 

2473 }, 

2474 set(query3), 

2475 ) 

2476 self.assertTrue(query3.any(execute=False, exact=False)) 

2477 self.assertTrue(query3.any(execute=True, exact=False)) 

2478 self.assertTrue(query3.any(execute=True, exact=True)) 

2479 self.assertGreaterEqual(query3.count(exact=False), 4) 

2480 self.assertGreaterEqual(query3.count(exact=True), 3) 

2481 self.assertFalse(list(query3.explain_no_results())) 

2482 # This query yields overlaps in the database, but all are filtered 

2483 # out in postprocessing. The count queries again aren't very useful. 

2484 # We have to use `where=` here to avoid an optimization that 

2485 # (currently) skips the spatial postprocess-filtering because it 

2486 # recognizes that no spatial join is necessary. That's not ideal, but 

2487 # fixing it is out of scope for this ticket. 

2488 query4 = registry.queryDataIds( 

2489 ["visit", "tract"], 

2490 instrument="Cam1", 

2491 skymap="SkyMap1", 

2492 where="visit=1 AND detector=1 AND tract=0 AND patch=4", 

2493 ) 

2494 self.assertFalse(set(query4)) 

2495 self.assertTrue(query4.any(execute=False, exact=False)) 

2496 self.assertTrue(query4.any(execute=True, exact=False)) 

2497 self.assertFalse(query4.any(execute=True, exact=True)) 

2498 self.assertGreaterEqual(query4.count(exact=False), 1) 

2499 self.assertEqual(query4.count(exact=True), 0) 

2500 messages = list(query4.explain_no_results()) 

2501 self.assertTrue(messages) 

2502 self.assertTrue(any("regions did not overlap" in message for message in messages)) 

2503 

2504 def testQueryDataIdsOrderBy(self): 

2505 """Test order_by and limit on result returned by queryDataIds().""" 

2506 registry = self.makeRegistry() 

2507 self.loadData(registry, "base.yaml") 

2508 self.loadData(registry, "datasets.yaml") 

2509 self.loadData(registry, "spatial.yaml") 

2510 

2511 def do_query(dimensions=("visit", "tract"), datasets=None, collections=None): 

2512 return registry.queryDataIds( 

2513 dimensions, datasets=datasets, collections=collections, instrument="Cam1", skymap="SkyMap1" 

2514 ) 

2515 

2516 # query = do_query() 

2517 # self.assertEqual(len(list(query)), 6) 

2518 

2519 Test = namedtuple( 

2520 "testQueryDataIdsOrderByTest", 

2521 ("order_by", "keys", "result", "limit", "datasets", "collections"), 

2522 defaults=(None, None, None), 

2523 ) 

2524 

2525 # For each test four items are defined here: 

2526 # - order_by column names, comma separated 

2527 # - limit tuple or None 

2528 # - DataId keys to extract 

2529 # - tuple of the resulting values we expect 

2530 test_data = ( 

2531 Test("tract,visit", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))), 

2532 Test("-tract,visit", "tract,visit", ((1, 2), (1, 2), (0, 1), (0, 1), (0, 2), (0, 2))), 

2533 Test("tract,-visit", "tract,visit", ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2))), 

2534 Test("-tract,-visit", "tract,visit", ((1, 2), (1, 2), (0, 2), (0, 2), (0, 1), (0, 1))), 

2535 Test( 

2536 "tract.id,visit.id", 

2537 "tract,visit", 

2538 ((0, 1), (0, 1), (0, 2)), 

2539 limit=(3,), 

2540 ), 

2541 Test("-tract,-visit", "tract,visit", ((1, 2), (1, 2), (0, 2)), limit=(3,)), 

2542 Test("tract,visit", "tract,visit", ((0, 2), (1, 2), (1, 2)), limit=(3, 3)), 

2543 Test("-tract,-visit", "tract,visit", ((0, 1),), limit=(3, 5)), 

2544 Test( 

2545 "tract,visit.exposure_time", "tract,visit", ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2)) 

2546 ), 

2547 Test( 

2548 "-tract,-visit.exposure_time", "tract,visit", ((1, 2), (1, 2), (0, 1), (0, 1), (0, 2), (0, 2)) 

2549 ), 

2550 Test("tract,-exposure_time", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))), 

2551 Test("tract,visit.name", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))), 

2552 Test( 

2553 "tract,-timespan.begin,timespan.end", 

2554 "tract,visit", 

2555 ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2)), 

2556 ), 

2557 Test("visit.day_obs,exposure.day_obs", "visit,exposure", ()), 

2558 Test("visit.timespan.begin,-exposure.timespan.begin", "visit,exposure", ()), 

2559 Test( 

2560 "tract,detector", 

2561 "tract,detector", 

2562 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)), 

2563 datasets="flat", 

2564 collections="imported_r", 

2565 ), 

2566 Test( 

2567 "tract,detector.full_name", 

2568 "tract,detector", 

2569 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)), 

2570 datasets="flat", 

2571 collections="imported_r", 

2572 ), 

2573 Test( 

2574 "tract,detector.raft,detector.name_in_raft", 

2575 "tract,detector", 

2576 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)), 

2577 datasets="flat", 

2578 collections="imported_r", 

2579 ), 

2580 ) 

2581 

2582 for test in test_data: 

2583 order_by = test.order_by.split(",") 

2584 keys = test.keys.split(",") 

2585 query = do_query(keys, test.datasets, test.collections).order_by(*order_by) 

2586 if test.limit is not None: 

2587 query = query.limit(*test.limit) 

2588 dataIds = tuple(tuple(dataId[k] for k in keys) for dataId in query) 

2589 self.assertEqual(dataIds, test.result) 

2590 

2591 # and materialize 

2592 query = do_query(keys).order_by(*order_by) 

2593 if test.limit is not None: 

2594 query = query.limit(*test.limit) 

2595 with query.materialize() as materialized: 

2596 dataIds = tuple(tuple(dataId[k] for k in keys) for dataId in materialized) 

2597 self.assertEqual(dataIds, test.result) 

2598 

2599 # errors in a name 

2600 for order_by in ("", "-"): 

2601 with self.assertRaisesRegex(ValueError, "Empty dimension name in ORDER BY"): 

2602 list(do_query().order_by(order_by)) 

2603 

2604 for order_by in ("undimension.name", "-undimension.name"): 

2605 with self.assertRaisesRegex(ValueError, "Unknown dimension element name 'undimension'"): 

2606 list(do_query().order_by(order_by)) 

2607 

2608 for order_by in ("attract", "-attract"): 

2609 with self.assertRaisesRegex(ValueError, "Metadata 'attract' cannot be found in any dimension"): 

2610 list(do_query().order_by(order_by)) 

2611 

2612 with self.assertRaisesRegex(ValueError, "Metadata 'exposure_time' exists in more than one dimension"): 

2613 list(do_query(("exposure", "visit")).order_by("exposure_time")) 

2614 

2615 with self.assertRaisesRegex(ValueError, "Timespan exists in more than one dimesion"): 

2616 list(do_query(("exposure", "visit")).order_by("timespan.begin")) 

2617 

2618 with self.assertRaisesRegex( 

2619 ValueError, "Cannot find any temporal dimension element for 'timespan.begin'" 

2620 ): 

2621 list(do_query(("tract")).order_by("timespan.begin")) 

2622 

2623 with self.assertRaisesRegex(ValueError, "Cannot use 'timespan.begin' with non-temporal element"): 

2624 list(do_query(("tract")).order_by("tract.timespan.begin")) 

2625 

2626 with self.assertRaisesRegex(ValueError, "Field 'name' does not exist in 'tract'."): 

2627 list(do_query(("tract")).order_by("tract.name")) 

2628 

2629 def testQueryDimensionRecordsOrderBy(self): 

2630 """Test order_by and limit on result returned by 

2631 queryDimensionRecords(). 

2632 """ 

2633 registry = self.makeRegistry() 

2634 self.loadData(registry, "base.yaml") 

2635 self.loadData(registry, "datasets.yaml") 

2636 self.loadData(registry, "spatial.yaml") 

2637 

2638 def do_query(): 

2639 return registry.queryDimensionRecords("detector", instrument="Cam1") 

2640 

2641 query = do_query() 

2642 self.assertEqual(len(list(query)), 4) 

2643 

2644 # For each test three items are defined here: 

2645 # - order_by column names, comma separated 

2646 # - limit tuple or None 

2647 # - tuple of the detector IDs 

2648 test_data = ( 

2649 ("detector", None, (1, 2, 3, 4)), 

2650 ("-detector", None, (4, 3, 2, 1)), 

2651 ("raft,-name_in_raft", None, (2, 1, 4, 3)), 

2652 ("-detector.purpose", (1,), (4,)), 

2653 ("-purpose,detector.raft,name_in_raft", (2, 2), (2, 3)), 

2654 ) 

2655 

2656 for order_by, limit, expected in test_data: 

2657 order_by = order_by.split(",") 

2658 query = do_query().order_by(*order_by) 

2659 if limit is not None: 

2660 query = query.limit(*limit) 

2661 dataIds = tuple(rec.id for rec in query) 

2662 self.assertEqual(dataIds, expected) 

2663 

2664 # errors in a name 

2665 for order_by in ("", "-"): 

2666 with self.assertRaisesRegex(ValueError, "Empty dimension name in ORDER BY"): 

2667 list(do_query().order_by(order_by)) 

2668 

2669 for order_by in ("undimension.name", "-undimension.name"): 

2670 with self.assertRaisesRegex(ValueError, "Unknown dimension element name 'undimension'"): 

2671 list(do_query().order_by(order_by)) 

2672 

2673 for order_by in ("attract", "-attract"): 

2674 with self.assertRaisesRegex(ValueError, "Metadata 'attract' cannot be found in any dimension"): 

2675 list(do_query().order_by(order_by)) 

2676 

2677 def testDatasetConstrainedDimensionRecordQueries(self): 

2678 """Test that queryDimensionRecords works even when given a dataset 

2679 constraint whose dimensions extend beyond the requested dimension 

2680 element's. 

2681 """ 

2682 registry = self.makeRegistry() 

2683 self.loadData(registry, "base.yaml") 

2684 self.loadData(registry, "datasets.yaml") 

2685 # Query for physical_filter dimension records, using a dataset that 

2686 # has both physical_filter and dataset dimensions. 

2687 records = registry.queryDimensionRecords( 

2688 "physical_filter", 

2689 datasets=["flat"], 

2690 collections="imported_r", 

2691 ) 

2692 self.assertEqual({record.name for record in records}, {"Cam1-R1", "Cam1-R2"})