Coverage for python/lsst/daf/butler/registry/tests/_registry.py: 5%

Shortcuts on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1211 statements  

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = ["RegistryTests"] 

24 

25import itertools 

26import logging 

27import os 

28import re 

29import unittest 

30import uuid 

31from abc import ABC, abstractmethod 

32from collections import defaultdict, namedtuple 

33from datetime import datetime, timedelta 

34from typing import TYPE_CHECKING, Iterator, Optional, Type, Union 

35 

36import astropy.time 

37import sqlalchemy 

38 

39try: 

40 import numpy as np 

41except ImportError: 

42 np = None 

43 

44import lsst.sphgeom 

45 

46from ...core import ( 

47 DataCoordinate, 

48 DataCoordinateSet, 

49 DatasetAssociation, 

50 DatasetRef, 

51 DatasetType, 

52 DimensionGraph, 

53 NamedValueSet, 

54 StorageClass, 

55 Timespan, 

56 ddl, 

57) 

58from .._collectionType import CollectionType 

59from .._config import RegistryConfig 

60from .._exceptions import ( 

61 ConflictingDefinitionError, 

62 InconsistentDataIdError, 

63 MissingCollectionError, 

64 OrphanedRecordError, 

65) 

66from ..interfaces import ButlerAttributeExistsError, DatasetIdGenEnum 

67from ..summaries import CollectionSummary 

68 

69if TYPE_CHECKING: 69 ↛ 70line 69 didn't jump to line 70, because the condition on line 69 was never true

70 from .._registry import Registry 

71 

72 

73class RegistryTests(ABC): 

74 """Generic tests for the `Registry` class that can be subclassed to 

75 generate tests for different configurations. 

76 """ 

77 

78 collectionsManager: Optional[str] = None 

79 """Name of the collections manager class, if subclass provides value for 

80 this member then it overrides name specified in default configuration 

81 (`str`). 

82 """ 

83 

84 datasetsManager: Optional[str] = None 

85 """Name of the datasets manager class, if subclass provides value for 

86 this member then it overrides name specified in default configuration 

87 (`str`). 

88 """ 

89 

90 @classmethod 

91 @abstractmethod 

92 def getDataDir(cls) -> str: 

93 """Return the root directory containing test data YAML files.""" 

94 raise NotImplementedError() 

95 

96 def makeRegistryConfig(self) -> RegistryConfig: 

97 """Create RegistryConfig used to create a registry. 

98 

99 This method should be called by a subclass from `makeRegistry`. 

100 Returned instance will be pre-configured based on the values of class 

101 members, and default-configured for all other parameters. Subclasses 

102 that need default configuration should just instantiate 

103 `RegistryConfig` directly. 

104 """ 

105 config = RegistryConfig() 

106 if self.collectionsManager: 

107 config["managers", "collections"] = self.collectionsManager 

108 if self.datasetsManager: 

109 config["managers", "datasets"] = self.datasetsManager 

110 return config 

111 

112 @abstractmethod 

113 def makeRegistry(self) -> Registry: 

114 """Return the Registry instance to be tested.""" 

115 raise NotImplementedError() 

116 

117 def loadData(self, registry: Registry, filename: str): 

118 """Load registry test data from ``getDataDir/<filename>``, 

119 which should be a YAML import/export file. 

120 """ 

121 from ...transfers import YamlRepoImportBackend 

122 

123 with open(os.path.join(self.getDataDir(), filename), "r") as stream: 

124 backend = YamlRepoImportBackend(stream, registry) 

125 backend.register() 

126 backend.load(datastore=None) 

127 

128 def checkQueryResults(self, results, expected): 

129 """Check that a query results object contains expected values. 

130 

131 Parameters 

132 ---------- 

133 results : `DataCoordinateQueryResults` or `DatasetQueryResults` 

134 A lazy-evaluation query results object. 

135 expected : `list` 

136 A list of `DataCoordinate` o `DatasetRef` objects that should be 

137 equal to results of the query, aside from ordering. 

138 """ 

139 self.assertCountEqual(list(results), expected) 

140 self.assertEqual(results.count(), len(expected)) 

141 if expected: 

142 self.assertTrue(results.any()) 

143 else: 

144 self.assertFalse(results.any()) 

145 

146 def testOpaque(self): 

147 """Tests for `Registry.registerOpaqueTable`, 

148 `Registry.insertOpaqueData`, `Registry.fetchOpaqueData`, and 

149 `Registry.deleteOpaqueData`. 

150 """ 

151 registry = self.makeRegistry() 

152 table = "opaque_table_for_testing" 

153 registry.registerOpaqueTable( 

154 table, 

155 spec=ddl.TableSpec( 

156 fields=[ 

157 ddl.FieldSpec("id", dtype=sqlalchemy.BigInteger, primaryKey=True), 

158 ddl.FieldSpec("name", dtype=sqlalchemy.String, length=16, nullable=False), 

159 ddl.FieldSpec("count", dtype=sqlalchemy.SmallInteger, nullable=True), 

160 ], 

161 ), 

162 ) 

163 rows = [ 

164 {"id": 1, "name": "one", "count": None}, 

165 {"id": 2, "name": "two", "count": 5}, 

166 {"id": 3, "name": "three", "count": 6}, 

167 ] 

168 registry.insertOpaqueData(table, *rows) 

169 self.assertCountEqual(rows, list(registry.fetchOpaqueData(table))) 

170 self.assertEqual(rows[0:1], list(registry.fetchOpaqueData(table, id=1))) 

171 self.assertEqual(rows[1:2], list(registry.fetchOpaqueData(table, name="two"))) 

172 self.assertEqual(rows[0:1], list(registry.fetchOpaqueData(table, id=(1, 3), name=("one", "two")))) 

173 self.assertEqual(rows, list(registry.fetchOpaqueData(table, id=(1, 2, 3)))) 

174 # Test very long IN clause which exceeds sqlite limit on number of 

175 # parameters. SQLite says the limit is 32k but it looks like it is 

176 # much higher. 

177 self.assertEqual(rows, list(registry.fetchOpaqueData(table, id=list(range(300_000))))) 

178 # Two IN clauses, each longer than 1k batch size, first with 

179 # duplicates, second has matching elements in different batches (after 

180 # sorting). 

181 self.assertEqual( 

182 rows[0:2], 

183 list( 

184 registry.fetchOpaqueData( 

185 table, 

186 id=list(range(1000)) + list(range(100, 0, -1)), 

187 name=["one"] + [f"q{i}" for i in range(2200)] + ["two"], 

188 ) 

189 ), 

190 ) 

191 self.assertEqual([], list(registry.fetchOpaqueData(table, id=1, name="two"))) 

192 registry.deleteOpaqueData(table, id=3) 

193 self.assertCountEqual(rows[:2], list(registry.fetchOpaqueData(table))) 

194 registry.deleteOpaqueData(table) 

195 self.assertEqual([], list(registry.fetchOpaqueData(table))) 

196 

197 def testDatasetType(self): 

198 """Tests for `Registry.registerDatasetType` and 

199 `Registry.getDatasetType`. 

200 """ 

201 registry = self.makeRegistry() 

202 # Check valid insert 

203 datasetTypeName = "test" 

204 storageClass = StorageClass("testDatasetType") 

205 registry.storageClasses.registerStorageClass(storageClass) 

206 dimensions = registry.dimensions.extract(("instrument", "visit")) 

207 differentDimensions = registry.dimensions.extract(("instrument", "patch")) 

208 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

209 # Inserting for the first time should return True 

210 self.assertTrue(registry.registerDatasetType(inDatasetType)) 

211 outDatasetType1 = registry.getDatasetType(datasetTypeName) 

212 self.assertEqual(outDatasetType1, inDatasetType) 

213 

214 # Re-inserting should work 

215 self.assertFalse(registry.registerDatasetType(inDatasetType)) 

216 # Except when they are not identical 

217 with self.assertRaises(ConflictingDefinitionError): 

218 nonIdenticalDatasetType = DatasetType(datasetTypeName, differentDimensions, storageClass) 

219 registry.registerDatasetType(nonIdenticalDatasetType) 

220 

221 # Template can be None 

222 datasetTypeName = "testNoneTemplate" 

223 storageClass = StorageClass("testDatasetType2") 

224 registry.storageClasses.registerStorageClass(storageClass) 

225 dimensions = registry.dimensions.extract(("instrument", "visit")) 

226 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

227 registry.registerDatasetType(inDatasetType) 

228 outDatasetType2 = registry.getDatasetType(datasetTypeName) 

229 self.assertEqual(outDatasetType2, inDatasetType) 

230 

231 allTypes = set(registry.queryDatasetTypes()) 

232 self.assertEqual(allTypes, {outDatasetType1, outDatasetType2}) 

233 

234 def testDimensions(self): 

235 """Tests for `Registry.insertDimensionData`, 

236 `Registry.syncDimensionData`, and `Registry.expandDataId`. 

237 """ 

238 registry = self.makeRegistry() 

239 dimensionName = "instrument" 

240 dimension = registry.dimensions[dimensionName] 

241 dimensionValue = { 

242 "name": "DummyCam", 

243 "visit_max": 10, 

244 "exposure_max": 10, 

245 "detector_max": 2, 

246 "class_name": "lsst.obs.base.Instrument", 

247 } 

248 registry.insertDimensionData(dimensionName, dimensionValue) 

249 # Inserting the same value twice should fail 

250 with self.assertRaises(sqlalchemy.exc.IntegrityError): 

251 registry.insertDimensionData(dimensionName, dimensionValue) 

252 # expandDataId should retrieve the record we just inserted 

253 self.assertEqual( 

254 registry.expandDataId(instrument="DummyCam", graph=dimension.graph) 

255 .records[dimensionName] 

256 .toDict(), 

257 dimensionValue, 

258 ) 

259 # expandDataId should raise if there is no record with the given ID. 

260 with self.assertRaises(LookupError): 

261 registry.expandDataId({"instrument": "Unknown"}, graph=dimension.graph) 

262 # band doesn't have a table; insert should fail. 

263 with self.assertRaises(TypeError): 

264 registry.insertDimensionData("band", {"band": "i"}) 

265 dimensionName2 = "physical_filter" 

266 dimension2 = registry.dimensions[dimensionName2] 

267 dimensionValue2 = {"name": "DummyCam_i", "band": "i"} 

268 # Missing required dependency ("instrument") should fail 

269 with self.assertRaises(KeyError): 

270 registry.insertDimensionData(dimensionName2, dimensionValue2) 

271 # Adding required dependency should fix the failure 

272 dimensionValue2["instrument"] = "DummyCam" 

273 registry.insertDimensionData(dimensionName2, dimensionValue2) 

274 # expandDataId should retrieve the record we just inserted. 

275 self.assertEqual( 

276 registry.expandDataId(instrument="DummyCam", physical_filter="DummyCam_i", graph=dimension2.graph) 

277 .records[dimensionName2] 

278 .toDict(), 

279 dimensionValue2, 

280 ) 

281 # Use syncDimensionData to insert a new record successfully. 

282 dimensionName3 = "detector" 

283 dimensionValue3 = { 

284 "instrument": "DummyCam", 

285 "id": 1, 

286 "full_name": "one", 

287 "name_in_raft": "zero", 

288 "purpose": "SCIENCE", 

289 } 

290 self.assertTrue(registry.syncDimensionData(dimensionName3, dimensionValue3)) 

291 # Sync that again. Note that one field ("raft") is NULL, and that 

292 # should be okay. 

293 self.assertFalse(registry.syncDimensionData(dimensionName3, dimensionValue3)) 

294 # Now try that sync with the same primary key but a different value. 

295 # This should fail. 

296 with self.assertRaises(ConflictingDefinitionError): 

297 registry.syncDimensionData( 

298 dimensionName3, 

299 { 

300 "instrument": "DummyCam", 

301 "id": 1, 

302 "full_name": "one", 

303 "name_in_raft": "four", 

304 "purpose": "SCIENCE", 

305 }, 

306 ) 

307 

308 @unittest.skipIf(np is None, "numpy not available.") 

309 def testNumpyDataId(self): 

310 """Test that we can use a numpy int in a dataId.""" 

311 registry = self.makeRegistry() 

312 dimensionEntries = [ 

313 ("instrument", {"instrument": "DummyCam"}), 

314 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}), 

315 # Using an np.int64 here fails unless Records.fromDict is also 

316 # patched to look for numbers.Integral 

317 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}), 

318 ] 

319 for args in dimensionEntries: 

320 registry.insertDimensionData(*args) 

321 

322 # Try a normal integer and something that looks like an int but 

323 # is not. 

324 for visit_id in (42, np.int64(42)): 

325 with self.subTest(visit_id=visit_id, id_type=type(visit_id).__name__): 

326 expanded = registry.expandDataId({"instrument": "DummyCam", "visit": visit_id}) 

327 self.assertEqual(expanded["visit"], int(visit_id)) 

328 self.assertIsInstance(expanded["visit"], int) 

329 

330 def testDataIdRelationships(self): 

331 """Test that `Registry.expandDataId` raises an exception when the given 

332 keys are inconsistent. 

333 """ 

334 registry = self.makeRegistry() 

335 self.loadData(registry, "base.yaml") 

336 # Insert a few more dimension records for the next test. 

337 registry.insertDimensionData( 

338 "exposure", 

339 {"instrument": "Cam1", "id": 1, "obs_id": "one", "physical_filter": "Cam1-G"}, 

340 ) 

341 registry.insertDimensionData( 

342 "exposure", 

343 {"instrument": "Cam1", "id": 2, "obs_id": "two", "physical_filter": "Cam1-G"}, 

344 ) 

345 registry.insertDimensionData( 

346 "visit_system", 

347 {"instrument": "Cam1", "id": 0, "name": "one-to-one"}, 

348 ) 

349 registry.insertDimensionData( 

350 "visit", 

351 {"instrument": "Cam1", "id": 1, "name": "one", "physical_filter": "Cam1-G", "visit_system": 0}, 

352 ) 

353 registry.insertDimensionData( 

354 "visit_definition", 

355 {"instrument": "Cam1", "visit": 1, "exposure": 1, "visit_system": 0}, 

356 ) 

357 with self.assertRaises(InconsistentDataIdError): 

358 registry.expandDataId( 

359 {"instrument": "Cam1", "visit": 1, "exposure": 2}, 

360 ) 

361 

362 def testDataset(self): 

363 """Basic tests for `Registry.insertDatasets`, `Registry.getDataset`, 

364 and `Registry.removeDatasets`. 

365 """ 

366 registry = self.makeRegistry() 

367 self.loadData(registry, "base.yaml") 

368 run = "test" 

369 registry.registerRun(run) 

370 datasetType = registry.getDatasetType("bias") 

371 dataId = {"instrument": "Cam1", "detector": 2} 

372 (ref,) = registry.insertDatasets(datasetType, dataIds=[dataId], run=run) 

373 outRef = registry.getDataset(ref.id) 

374 self.assertIsNotNone(ref.id) 

375 self.assertEqual(ref, outRef) 

376 with self.assertRaises(ConflictingDefinitionError): 

377 registry.insertDatasets(datasetType, dataIds=[dataId], run=run) 

378 registry.removeDatasets([ref]) 

379 self.assertIsNone(registry.findDataset(datasetType, dataId, collections=[run])) 

380 

381 def testFindDataset(self): 

382 """Tests for `Registry.findDataset`.""" 

383 registry = self.makeRegistry() 

384 self.loadData(registry, "base.yaml") 

385 run = "test" 

386 datasetType = registry.getDatasetType("bias") 

387 dataId = {"instrument": "Cam1", "detector": 4} 

388 registry.registerRun(run) 

389 (inputRef,) = registry.insertDatasets(datasetType, dataIds=[dataId], run=run) 

390 outputRef = registry.findDataset(datasetType, dataId, collections=[run]) 

391 self.assertEqual(outputRef, inputRef) 

392 # Check that retrieval with invalid dataId raises 

393 with self.assertRaises(LookupError): 

394 dataId = {"instrument": "Cam1"} # no detector 

395 registry.findDataset(datasetType, dataId, collections=run) 

396 # Check that different dataIds match to different datasets 

397 dataId1 = {"instrument": "Cam1", "detector": 1} 

398 (inputRef1,) = registry.insertDatasets(datasetType, dataIds=[dataId1], run=run) 

399 dataId2 = {"instrument": "Cam1", "detector": 2} 

400 (inputRef2,) = registry.insertDatasets(datasetType, dataIds=[dataId2], run=run) 

401 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef1) 

402 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef2) 

403 self.assertNotEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef2) 

404 self.assertNotEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef1) 

405 # Check that requesting a non-existing dataId returns None 

406 nonExistingDataId = {"instrument": "Cam1", "detector": 3} 

407 self.assertIsNone(registry.findDataset(datasetType, nonExistingDataId, collections=run)) 

408 

409 def testRemoveDatasetTypeSuccess(self): 

410 """Test that Registry.removeDatasetType works when there are no 

411 datasets of that type present. 

412 """ 

413 registry = self.makeRegistry() 

414 self.loadData(registry, "base.yaml") 

415 registry.removeDatasetType("flat") 

416 with self.assertRaises(KeyError): 

417 registry.getDatasetType("flat") 

418 

419 def testRemoveDatasetTypeFailure(self): 

420 """Test that Registry.removeDatasetType raises when there are datasets 

421 of that type present or if the dataset type is for a component. 

422 """ 

423 registry = self.makeRegistry() 

424 self.loadData(registry, "base.yaml") 

425 self.loadData(registry, "datasets.yaml") 

426 with self.assertRaises(OrphanedRecordError): 

427 registry.removeDatasetType("flat") 

428 with self.assertRaises(ValueError): 

429 registry.removeDatasetType(DatasetType.nameWithComponent("flat", "image")) 

430 

431 def testImportDatasetsUUID(self): 

432 """Test for `Registry._importDatasets` with UUID dataset ID.""" 

433 if not self.datasetsManager.endswith(".ByDimensionsDatasetRecordStorageManagerUUID"): 

434 self.skipTest(f"Unexpected dataset manager {self.datasetsManager}") 

435 

436 registry = self.makeRegistry() 

437 self.loadData(registry, "base.yaml") 

438 for run in range(6): 

439 registry.registerRun(f"run{run}") 

440 datasetTypeBias = registry.getDatasetType("bias") 

441 datasetTypeFlat = registry.getDatasetType("flat") 

442 dataIdBias1 = {"instrument": "Cam1", "detector": 1} 

443 dataIdBias2 = {"instrument": "Cam1", "detector": 2} 

444 dataIdFlat1 = {"instrument": "Cam1", "detector": 1, "physical_filter": "Cam1-G", "band": "g"} 

445 

446 dataset_id = uuid.uuid4() 

447 ref = DatasetRef(datasetTypeBias, dataIdBias1, id=dataset_id, run="run0") 

448 (ref1,) = registry._importDatasets([ref]) 

449 # UUID is used without change 

450 self.assertEqual(ref.id, ref1.id) 

451 

452 # All different failure modes 

453 refs = ( 

454 # Importing same DatasetRef with different dataset ID is an error 

455 DatasetRef(datasetTypeBias, dataIdBias1, id=uuid.uuid4(), run="run0"), 

456 # Same DatasetId but different DataId 

457 DatasetRef(datasetTypeBias, dataIdBias2, id=ref1.id, run="run0"), 

458 DatasetRef(datasetTypeFlat, dataIdFlat1, id=ref1.id, run="run0"), 

459 # Same DatasetRef and DatasetId but different run 

460 DatasetRef(datasetTypeBias, dataIdBias1, id=ref1.id, run="run1"), 

461 ) 

462 for ref in refs: 

463 with self.assertRaises(ConflictingDefinitionError): 

464 registry._importDatasets([ref]) 

465 

466 # Test for non-unique IDs, they can be re-imported multiple times. 

467 for run, idGenMode in ((2, DatasetIdGenEnum.DATAID_TYPE), (4, DatasetIdGenEnum.DATAID_TYPE_RUN)): 

468 with self.subTest(idGenMode=idGenMode): 

469 

470 # Use integer dataset ID to force UUID calculation in _import 

471 ref = DatasetRef(datasetTypeBias, dataIdBias1, id=0, run=f"run{run}") 

472 (ref1,) = registry._importDatasets([ref], idGenerationMode=idGenMode) 

473 self.assertIsInstance(ref1.id, uuid.UUID) 

474 self.assertEqual(ref1.id.version, 5) 

475 

476 # Importing it again is OK 

477 (ref2,) = registry._importDatasets([ref1]) 

478 self.assertEqual(ref2.id, ref1.id) 

479 

480 # Cannot import to different run with the same ID 

481 ref = DatasetRef(datasetTypeBias, dataIdBias1, id=ref1.id, run=f"run{run+1}") 

482 with self.assertRaises(ConflictingDefinitionError): 

483 registry._importDatasets([ref]) 

484 

485 ref = DatasetRef(datasetTypeBias, dataIdBias1, id=0, run=f"run{run+1}") 

486 if idGenMode is DatasetIdGenEnum.DATAID_TYPE: 

487 # Cannot import same DATAID_TYPE ref into a new run 

488 with self.assertRaises(ConflictingDefinitionError): 

489 (ref2,) = registry._importDatasets([ref], idGenerationMode=idGenMode) 

490 else: 

491 # DATAID_TYPE_RUN ref can be imported into a new run 

492 (ref2,) = registry._importDatasets([ref], idGenerationMode=idGenMode) 

493 

494 def testImportDatasetsInt(self): 

495 """Test for `Registry._importDatasets` with integer dataset ID.""" 

496 if not self.datasetsManager.endswith(".ByDimensionsDatasetRecordStorageManager"): 

497 self.skipTest(f"Unexpected dataset manager {self.datasetsManager}") 

498 

499 registry = self.makeRegistry() 

500 self.loadData(registry, "base.yaml") 

501 run = "test" 

502 registry.registerRun(run) 

503 datasetTypeBias = registry.getDatasetType("bias") 

504 datasetTypeFlat = registry.getDatasetType("flat") 

505 dataIdBias1 = {"instrument": "Cam1", "detector": 1} 

506 dataIdBias2 = {"instrument": "Cam1", "detector": 2} 

507 dataIdFlat1 = {"instrument": "Cam1", "detector": 1, "physical_filter": "Cam1-G", "band": "g"} 

508 dataset_id = 999999999 

509 

510 ref = DatasetRef(datasetTypeBias, dataIdBias1, id=dataset_id, run=run) 

511 (ref1,) = registry._importDatasets([ref]) 

512 # Should make new integer ID. 

513 self.assertNotEqual(ref1.id, ref.id) 

514 

515 # Ingesting same dataId with different dataset ID is an error 

516 ref2 = ref1.unresolved().resolved(dataset_id, run=run) 

517 with self.assertRaises(ConflictingDefinitionError): 

518 registry._importDatasets([ref2]) 

519 

520 # Ingesting different dataId with the same dataset ID should work 

521 ref3 = DatasetRef(datasetTypeBias, dataIdBias2, id=ref1.id, run=run) 

522 (ref4,) = registry._importDatasets([ref3]) 

523 self.assertNotEqual(ref4.id, ref1.id) 

524 

525 ref3 = DatasetRef(datasetTypeFlat, dataIdFlat1, id=ref1.id, run=run) 

526 (ref4,) = registry._importDatasets([ref3]) 

527 self.assertNotEqual(ref4.id, ref1.id) 

528 

529 def testDatasetTypeComponentQueries(self): 

530 """Test component options when querying for dataset types.""" 

531 registry = self.makeRegistry() 

532 self.loadData(registry, "base.yaml") 

533 self.loadData(registry, "datasets.yaml") 

534 # Test querying for dataset types with different inputs. 

535 # First query for all dataset types; components should only be included 

536 # when components=True. 

537 self.assertEqual({"bias", "flat"}, NamedValueSet(registry.queryDatasetTypes()).names) 

538 self.assertEqual({"bias", "flat"}, NamedValueSet(registry.queryDatasetTypes(components=False)).names) 

539 self.assertLess( 

540 {"bias", "flat", "bias.wcs", "flat.photoCalib"}, 

541 NamedValueSet(registry.queryDatasetTypes(components=True)).names, 

542 ) 

543 # Use a pattern that can match either parent or components. Again, 

544 # components are only returned if components=True. 

545 self.assertEqual({"bias"}, NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"))).names) 

546 self.assertEqual( 

547 {"bias"}, NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"), components=False)).names 

548 ) 

549 self.assertLess( 

550 {"bias", "bias.wcs"}, 

551 NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"), components=True)).names, 

552 ) 

553 # This pattern matches only a component. In this case we also return 

554 # that component dataset type if components=None. 

555 self.assertEqual( 

556 {"bias.wcs"}, NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"))).names 

557 ) 

558 self.assertEqual( 

559 set(), 

560 NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"), components=False)).names, 

561 ) 

562 self.assertEqual( 

563 {"bias.wcs"}, 

564 NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"), components=True)).names, 

565 ) 

566 # Add a dataset type using a StorageClass that we'll then remove; check 

567 # that this does not affect our ability to query for dataset types 

568 # (though it will warn). 

569 tempStorageClass = StorageClass( 

570 name="TempStorageClass", 

571 components={"data", registry.storageClasses.getStorageClass("StructuredDataDict")}, 

572 ) 

573 registry.storageClasses.registerStorageClass(tempStorageClass) 

574 datasetType = DatasetType( 

575 "temporary", 

576 dimensions=["instrument"], 

577 storageClass=tempStorageClass, 

578 universe=registry.dimensions, 

579 ) 

580 registry.registerDatasetType(datasetType) 

581 registry.storageClasses._unregisterStorageClass(tempStorageClass.name) 

582 datasetType._storageClass = None 

583 del tempStorageClass 

584 # Querying for all dataset types, including components, should include 

585 # at least all non-component dataset types (and I don't want to 

586 # enumerate all of the Exposure components for bias and flat here). 

587 with self.assertLogs("lsst.daf.butler.registries", logging.WARN) as cm: 

588 everything = NamedValueSet(registry.queryDatasetTypes(components=True)) 

589 self.assertIn("TempStorageClass", cm.output[0]) 

590 self.assertLess({"bias", "flat", "temporary"}, everything.names) 

591 # It should not include "temporary.columns", because we tried to remove 

592 # the storage class that would tell it about that. So if the next line 

593 # fails (i.e. "temporary.columns" _is_ in everything.names), it means 

594 # this part of the test isn't doing anything, because the _unregister 

595 # call about isn't simulating the real-life case we want it to 

596 # simulate, in which different versions of daf_butler in entirely 

597 # different Python processes interact with the same repo. 

598 self.assertNotIn("temporary.data", everything.names) 

599 # Query for dataset types that start with "temp". This should again 

600 # not include the component, and also not fail. 

601 with self.assertLogs("lsst.daf.butler.registries", logging.WARN) as cm: 

602 startsWithTemp = NamedValueSet(registry.queryDatasetTypes(re.compile("temp.*"))) 

603 self.assertIn("TempStorageClass", cm.output[0]) 

604 self.assertEqual({"temporary"}, startsWithTemp.names) 

605 

606 def testComponentLookups(self): 

607 """Test searching for component datasets via their parents.""" 

608 registry = self.makeRegistry() 

609 self.loadData(registry, "base.yaml") 

610 self.loadData(registry, "datasets.yaml") 

611 # Test getting the child dataset type (which does still exist in the 

612 # Registry), and check for consistency with 

613 # DatasetRef.makeComponentRef. 

614 collection = "imported_g" 

615 parentType = registry.getDatasetType("bias") 

616 childType = registry.getDatasetType("bias.wcs") 

617 parentRefResolved = registry.findDataset( 

618 parentType, collections=collection, instrument="Cam1", detector=1 

619 ) 

620 self.assertIsInstance(parentRefResolved, DatasetRef) 

621 self.assertEqual(childType, parentRefResolved.makeComponentRef("wcs").datasetType) 

622 # Search for a single dataset with findDataset. 

623 childRef1 = registry.findDataset("bias.wcs", collections=collection, dataId=parentRefResolved.dataId) 

624 self.assertEqual(childRef1, parentRefResolved.makeComponentRef("wcs")) 

625 # Search for detector data IDs constrained by component dataset 

626 # existence with queryDataIds. 

627 dataIds = registry.queryDataIds( 

628 ["detector"], 

629 datasets=["bias.wcs"], 

630 collections=collection, 

631 ).toSet() 

632 self.assertEqual( 

633 dataIds, 

634 DataCoordinateSet( 

635 { 

636 DataCoordinate.standardize(instrument="Cam1", detector=d, graph=parentType.dimensions) 

637 for d in (1, 2, 3) 

638 }, 

639 parentType.dimensions, 

640 ), 

641 ) 

642 # Search for multiple datasets of a single type with queryDatasets. 

643 childRefs2 = set( 

644 registry.queryDatasets( 

645 "bias.wcs", 

646 collections=collection, 

647 ) 

648 ) 

649 self.assertEqual( 

650 {ref.unresolved() for ref in childRefs2}, {DatasetRef(childType, dataId) for dataId in dataIds} 

651 ) 

652 

653 def testCollections(self): 

654 """Tests for registry methods that manage collections.""" 

655 registry = self.makeRegistry() 

656 self.loadData(registry, "base.yaml") 

657 self.loadData(registry, "datasets.yaml") 

658 run1 = "imported_g" 

659 run2 = "imported_r" 

660 # Test setting a collection docstring after it has been created. 

661 registry.setCollectionDocumentation(run1, "doc for run1") 

662 self.assertEqual(registry.getCollectionDocumentation(run1), "doc for run1") 

663 registry.setCollectionDocumentation(run1, None) 

664 self.assertIsNone(registry.getCollectionDocumentation(run1)) 

665 datasetType = "bias" 

666 # Find some datasets via their run's collection. 

667 dataId1 = {"instrument": "Cam1", "detector": 1} 

668 ref1 = registry.findDataset(datasetType, dataId1, collections=run1) 

669 self.assertIsNotNone(ref1) 

670 dataId2 = {"instrument": "Cam1", "detector": 2} 

671 ref2 = registry.findDataset(datasetType, dataId2, collections=run1) 

672 self.assertIsNotNone(ref2) 

673 # Associate those into a new collection, then look for them there. 

674 tag1 = "tag1" 

675 registry.registerCollection(tag1, type=CollectionType.TAGGED, doc="doc for tag1") 

676 # Check that we can query for old and new collections by type. 

677 self.assertEqual(set(registry.queryCollections(collectionTypes=CollectionType.RUN)), {run1, run2}) 

678 self.assertEqual( 

679 set(registry.queryCollections(collectionTypes={CollectionType.TAGGED, CollectionType.RUN})), 

680 {tag1, run1, run2}, 

681 ) 

682 self.assertEqual(registry.getCollectionDocumentation(tag1), "doc for tag1") 

683 registry.associate(tag1, [ref1, ref2]) 

684 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1) 

685 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

686 # Disassociate one and verify that we can't it there anymore... 

687 registry.disassociate(tag1, [ref1]) 

688 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=tag1)) 

689 # ...but we can still find ref2 in tag1, and ref1 in the run. 

690 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run1), ref1) 

691 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

692 collections = set(registry.queryCollections()) 

693 self.assertEqual(collections, {run1, run2, tag1}) 

694 # Associate both refs into tag1 again; ref2 is already there, but that 

695 # should be a harmless no-op. 

696 registry.associate(tag1, [ref1, ref2]) 

697 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1) 

698 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

699 # Get a different dataset (from a different run) that has the same 

700 # dataset type and data ID as ref2. 

701 ref2b = registry.findDataset(datasetType, dataId2, collections=run2) 

702 self.assertNotEqual(ref2, ref2b) 

703 # Attempting to associate that into tag1 should be an error. 

704 with self.assertRaises(ConflictingDefinitionError): 

705 registry.associate(tag1, [ref2b]) 

706 # That error shouldn't have messed up what we had before. 

707 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1) 

708 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

709 # Attempt to associate the conflicting dataset again, this time with 

710 # a dataset that isn't in the collection and won't cause a conflict. 

711 # Should also fail without modifying anything. 

712 dataId3 = {"instrument": "Cam1", "detector": 3} 

713 ref3 = registry.findDataset(datasetType, dataId3, collections=run1) 

714 with self.assertRaises(ConflictingDefinitionError): 

715 registry.associate(tag1, [ref3, ref2b]) 

716 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1) 

717 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

718 self.assertIsNone(registry.findDataset(datasetType, dataId3, collections=tag1)) 

719 # Register a chained collection that searches [tag1, run2] 

720 chain1 = "chain1" 

721 registry.registerCollection(chain1, type=CollectionType.CHAINED) 

722 self.assertIs(registry.getCollectionType(chain1), CollectionType.CHAINED) 

723 # Chained collection exists, but has no collections in it. 

724 self.assertFalse(registry.getCollectionChain(chain1)) 

725 # If we query for all collections, we should get the chained collection 

726 # only if we don't ask to flatten it (i.e. yield only its children). 

727 self.assertEqual(set(registry.queryCollections(flattenChains=False)), {tag1, run1, run2, chain1}) 

728 self.assertEqual(set(registry.queryCollections(flattenChains=True)), {tag1, run1, run2}) 

729 # Attempt to set its child collections to something circular; that 

730 # should fail. 

731 with self.assertRaises(ValueError): 

732 registry.setCollectionChain(chain1, [tag1, chain1]) 

733 # Add the child collections. 

734 registry.setCollectionChain(chain1, [tag1, run2]) 

735 self.assertEqual(list(registry.getCollectionChain(chain1)), [tag1, run2]) 

736 self.assertEqual(registry.getCollectionParentChains(tag1), {chain1}) 

737 self.assertEqual(registry.getCollectionParentChains(run2), {chain1}) 

738 # Searching for dataId1 or dataId2 in the chain should return ref1 and 

739 # ref2, because both are in tag1. 

740 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain1), ref1) 

741 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain1), ref2) 

742 # Now disassociate ref2 from tag1. The search (for bias) with 

743 # dataId2 in chain1 should then: 

744 # 1. not find it in tag1 

745 # 2. find a different dataset in run2 

746 registry.disassociate(tag1, [ref2]) 

747 ref2b = registry.findDataset(datasetType, dataId2, collections=chain1) 

748 self.assertNotEqual(ref2b, ref2) 

749 self.assertEqual(ref2b, registry.findDataset(datasetType, dataId2, collections=run2)) 

750 # Define a new chain so we can test recursive chains. 

751 chain2 = "chain2" 

752 registry.registerCollection(chain2, type=CollectionType.CHAINED) 

753 registry.setCollectionChain(chain2, [run2, chain1]) 

754 self.assertEqual(registry.getCollectionParentChains(chain1), {chain2}) 

755 self.assertEqual(registry.getCollectionParentChains(run2), {chain1, chain2}) 

756 # Query for collections matching a regex. 

757 self.assertCountEqual( 

758 list(registry.queryCollections(re.compile("imported_."), flattenChains=False)), 

759 ["imported_r", "imported_g"], 

760 ) 

761 # Query for collections matching a regex or an explicit str. 

762 self.assertCountEqual( 

763 list(registry.queryCollections([re.compile("imported_."), "chain1"], flattenChains=False)), 

764 ["imported_r", "imported_g", "chain1"], 

765 ) 

766 # Search for bias with dataId1 should find it via tag1 in chain2, 

767 # recursing, because is not in run1. 

768 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=run2)) 

769 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain2), ref1) 

770 # Search for bias with dataId2 should find it in run2 (ref2b). 

771 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain2), ref2b) 

772 # Search for a flat that is in run2. That should not be found 

773 # at the front of chain2, because of the restriction to bias 

774 # on run2 there, but it should be found in at the end of chain1. 

775 dataId4 = {"instrument": "Cam1", "detector": 3, "physical_filter": "Cam1-R2"} 

776 ref4 = registry.findDataset("flat", dataId4, collections=run2) 

777 self.assertIsNotNone(ref4) 

778 self.assertEqual(ref4, registry.findDataset("flat", dataId4, collections=chain2)) 

779 # Deleting a collection that's part of a CHAINED collection is not 

780 # allowed, and is exception-safe. 

781 with self.assertRaises(Exception): 

782 registry.removeCollection(run2) 

783 self.assertEqual(registry.getCollectionType(run2), CollectionType.RUN) 

784 with self.assertRaises(Exception): 

785 registry.removeCollection(chain1) 

786 self.assertEqual(registry.getCollectionType(chain1), CollectionType.CHAINED) 

787 # Actually remove chain2, test that it's gone by asking for its type. 

788 registry.removeCollection(chain2) 

789 with self.assertRaises(MissingCollectionError): 

790 registry.getCollectionType(chain2) 

791 # Actually remove run2 and chain1, which should work now. 

792 registry.removeCollection(chain1) 

793 registry.removeCollection(run2) 

794 with self.assertRaises(MissingCollectionError): 

795 registry.getCollectionType(run2) 

796 with self.assertRaises(MissingCollectionError): 

797 registry.getCollectionType(chain1) 

798 # Remove tag1 as well, just to test that we can remove TAGGED 

799 # collections. 

800 registry.removeCollection(tag1) 

801 with self.assertRaises(MissingCollectionError): 

802 registry.getCollectionType(tag1) 

803 

804 def testCollectionChainFlatten(self): 

805 """Test that Registry.setCollectionChain obeys its 'flatten' option.""" 

806 registry = self.makeRegistry() 

807 registry.registerCollection("inner", CollectionType.CHAINED) 

808 registry.registerCollection("innermost", CollectionType.RUN) 

809 registry.setCollectionChain("inner", ["innermost"]) 

810 registry.registerCollection("outer", CollectionType.CHAINED) 

811 registry.setCollectionChain("outer", ["inner"], flatten=False) 

812 self.assertEqual(list(registry.getCollectionChain("outer")), ["inner"]) 

813 registry.setCollectionChain("outer", ["inner"], flatten=True) 

814 self.assertEqual(list(registry.getCollectionChain("outer")), ["innermost"]) 

815 

816 def testBasicTransaction(self): 

817 """Test that all operations within a single transaction block are 

818 rolled back if an exception propagates out of the block. 

819 """ 

820 registry = self.makeRegistry() 

821 storageClass = StorageClass("testDatasetType") 

822 registry.storageClasses.registerStorageClass(storageClass) 

823 with registry.transaction(): 

824 registry.insertDimensionData("instrument", {"name": "Cam1", "class_name": "A"}) 

825 with self.assertRaises(ValueError): 

826 with registry.transaction(): 

827 registry.insertDimensionData("instrument", {"name": "Cam2"}) 

828 raise ValueError("Oops, something went wrong") 

829 # Cam1 should exist 

830 self.assertEqual(registry.expandDataId(instrument="Cam1").records["instrument"].class_name, "A") 

831 # But Cam2 and Cam3 should both not exist 

832 with self.assertRaises(LookupError): 

833 registry.expandDataId(instrument="Cam2") 

834 with self.assertRaises(LookupError): 

835 registry.expandDataId(instrument="Cam3") 

836 

837 def testNestedTransaction(self): 

838 """Test that operations within a transaction block are not rolled back 

839 if an exception propagates out of an inner transaction block and is 

840 then caught. 

841 """ 

842 registry = self.makeRegistry() 

843 dimension = registry.dimensions["instrument"] 

844 dataId1 = {"instrument": "DummyCam"} 

845 dataId2 = {"instrument": "DummyCam2"} 

846 checkpointReached = False 

847 with registry.transaction(): 

848 # This should be added and (ultimately) committed. 

849 registry.insertDimensionData(dimension, dataId1) 

850 with self.assertRaises(sqlalchemy.exc.IntegrityError): 

851 with registry.transaction(savepoint=True): 

852 # This does not conflict, and should succeed (but not 

853 # be committed). 

854 registry.insertDimensionData(dimension, dataId2) 

855 checkpointReached = True 

856 # This should conflict and raise, triggerring a rollback 

857 # of the previous insertion within the same transaction 

858 # context, but not the original insertion in the outer 

859 # block. 

860 registry.insertDimensionData(dimension, dataId1) 

861 self.assertTrue(checkpointReached) 

862 self.assertIsNotNone(registry.expandDataId(dataId1, graph=dimension.graph)) 

863 with self.assertRaises(LookupError): 

864 registry.expandDataId(dataId2, graph=dimension.graph) 

865 

866 def testInstrumentDimensions(self): 

867 """Test queries involving only instrument dimensions, with no joins to 

868 skymap.""" 

869 registry = self.makeRegistry() 

870 

871 # need a bunch of dimensions and datasets for test 

872 registry.insertDimensionData( 

873 "instrument", dict(name="DummyCam", visit_max=25, exposure_max=300, detector_max=6) 

874 ) 

875 registry.insertDimensionData( 

876 "physical_filter", 

877 dict(instrument="DummyCam", name="dummy_r", band="r"), 

878 dict(instrument="DummyCam", name="dummy_i", band="i"), 

879 ) 

880 registry.insertDimensionData( 

881 "detector", *[dict(instrument="DummyCam", id=i, full_name=str(i)) for i in range(1, 6)] 

882 ) 

883 registry.insertDimensionData( 

884 "visit_system", 

885 dict(instrument="DummyCam", id=1, name="default"), 

886 ) 

887 registry.insertDimensionData( 

888 "visit", 

889 dict(instrument="DummyCam", id=10, name="ten", physical_filter="dummy_i", visit_system=1), 

890 dict(instrument="DummyCam", id=11, name="eleven", physical_filter="dummy_r", visit_system=1), 

891 dict(instrument="DummyCam", id=20, name="twelve", physical_filter="dummy_r", visit_system=1), 

892 ) 

893 registry.insertDimensionData( 

894 "exposure", 

895 dict(instrument="DummyCam", id=100, obs_id="100", physical_filter="dummy_i"), 

896 dict(instrument="DummyCam", id=101, obs_id="101", physical_filter="dummy_i"), 

897 dict(instrument="DummyCam", id=110, obs_id="110", physical_filter="dummy_r"), 

898 dict(instrument="DummyCam", id=111, obs_id="111", physical_filter="dummy_r"), 

899 dict(instrument="DummyCam", id=200, obs_id="200", physical_filter="dummy_r"), 

900 dict(instrument="DummyCam", id=201, obs_id="201", physical_filter="dummy_r"), 

901 ) 

902 registry.insertDimensionData( 

903 "visit_definition", 

904 dict(instrument="DummyCam", exposure=100, visit_system=1, visit=10), 

905 dict(instrument="DummyCam", exposure=101, visit_system=1, visit=10), 

906 dict(instrument="DummyCam", exposure=110, visit_system=1, visit=11), 

907 dict(instrument="DummyCam", exposure=111, visit_system=1, visit=11), 

908 dict(instrument="DummyCam", exposure=200, visit_system=1, visit=20), 

909 dict(instrument="DummyCam", exposure=201, visit_system=1, visit=20), 

910 ) 

911 # dataset types 

912 run1 = "test1_r" 

913 run2 = "test2_r" 

914 tagged2 = "test2_t" 

915 registry.registerRun(run1) 

916 registry.registerRun(run2) 

917 registry.registerCollection(tagged2) 

918 storageClass = StorageClass("testDataset") 

919 registry.storageClasses.registerStorageClass(storageClass) 

920 rawType = DatasetType( 

921 name="RAW", 

922 dimensions=registry.dimensions.extract(("instrument", "exposure", "detector")), 

923 storageClass=storageClass, 

924 ) 

925 registry.registerDatasetType(rawType) 

926 calexpType = DatasetType( 

927 name="CALEXP", 

928 dimensions=registry.dimensions.extract(("instrument", "visit", "detector")), 

929 storageClass=storageClass, 

930 ) 

931 registry.registerDatasetType(calexpType) 

932 

933 # add pre-existing datasets 

934 for exposure in (100, 101, 110, 111): 

935 for detector in (1, 2, 3): 

936 # note that only 3 of 5 detectors have datasets 

937 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector) 

938 (ref,) = registry.insertDatasets(rawType, dataIds=[dataId], run=run1) 

939 # exposures 100 and 101 appear in both run1 and tagged2. 

940 # 100 has different datasets in the different collections 

941 # 101 has the same dataset in both collections. 

942 if exposure == 100: 

943 (ref,) = registry.insertDatasets(rawType, dataIds=[dataId], run=run2) 

944 if exposure in (100, 101): 

945 registry.associate(tagged2, [ref]) 

946 # Add pre-existing datasets to tagged2. 

947 for exposure in (200, 201): 

948 for detector in (3, 4, 5): 

949 # note that only 3 of 5 detectors have datasets 

950 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector) 

951 (ref,) = registry.insertDatasets(rawType, dataIds=[dataId], run=run2) 

952 registry.associate(tagged2, [ref]) 

953 

954 dimensions = DimensionGraph( 

955 registry.dimensions, dimensions=(rawType.dimensions.required | calexpType.dimensions.required) 

956 ) 

957 # Test that single dim string works as well as list of str 

958 rows = registry.queryDataIds("visit", datasets=rawType, collections=run1).expanded().toSet() 

959 rowsI = registry.queryDataIds(["visit"], datasets=rawType, collections=run1).expanded().toSet() 

960 self.assertEqual(rows, rowsI) 

961 # with empty expression 

962 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1).expanded().toSet() 

963 self.assertEqual(len(rows), 4 * 3) # 4 exposures times 3 detectors 

964 for dataId in rows: 

965 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit")) 

966 packer1 = registry.dimensions.makePacker("visit_detector", dataId) 

967 packer2 = registry.dimensions.makePacker("exposure_detector", dataId) 

968 self.assertEqual( 

969 packer1.unpack(packer1.pack(dataId)), 

970 DataCoordinate.standardize(dataId, graph=packer1.dimensions), 

971 ) 

972 self.assertEqual( 

973 packer2.unpack(packer2.pack(dataId)), 

974 DataCoordinate.standardize(dataId, graph=packer2.dimensions), 

975 ) 

976 self.assertNotEqual(packer1.pack(dataId), packer2.pack(dataId)) 

977 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101, 110, 111)) 

978 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 11)) 

979 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3)) 

980 

981 # second collection 

982 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=tagged2).toSet() 

983 self.assertEqual(len(rows), 4 * 3) # 4 exposures times 3 detectors 

984 for dataId in rows: 

985 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit")) 

986 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101, 200, 201)) 

987 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 20)) 

988 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3, 4, 5)) 

989 

990 # with two input datasets 

991 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=[run1, tagged2]).toSet() 

992 self.assertEqual(len(set(rows)), 6 * 3) # 6 exposures times 3 detectors; set needed to de-dupe 

993 for dataId in rows: 

994 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit")) 

995 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101, 110, 111, 200, 201)) 

996 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 11, 20)) 

997 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3, 4, 5)) 

998 

999 # limit to single visit 

1000 rows = registry.queryDataIds( 

1001 dimensions, datasets=rawType, collections=run1, where="visit = 10", instrument="DummyCam" 

1002 ).toSet() 

1003 self.assertEqual(len(rows), 2 * 3) # 2 exposures times 3 detectors 

1004 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101)) 

1005 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10,)) 

1006 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3)) 

1007 

1008 # more limiting expression, using link names instead of Table.column 

1009 rows = registry.queryDataIds( 

1010 dimensions, 

1011 datasets=rawType, 

1012 collections=run1, 

1013 where="visit = 10 and detector > 1 and 'DummyCam'=instrument", 

1014 ).toSet() 

1015 self.assertEqual(len(rows), 2 * 2) # 2 exposures times 2 detectors 

1016 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101)) 

1017 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10,)) 

1018 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (2, 3)) 

1019 

1020 # queryDataIds with only one of `datasets` and `collections` is an 

1021 # error. 

1022 with self.assertRaises(TypeError): 

1023 registry.queryDataIds(dimensions, datasets=rawType) 

1024 with self.assertRaises(TypeError): 

1025 registry.queryDataIds(dimensions, collections=run1) 

1026 

1027 # expression excludes everything 

1028 rows = registry.queryDataIds( 

1029 dimensions, datasets=rawType, collections=run1, where="visit > 1000", instrument="DummyCam" 

1030 ).toSet() 

1031 self.assertEqual(len(rows), 0) 

1032 

1033 # Selecting by physical_filter, this is not in the dimensions, but it 

1034 # is a part of the full expression so it should work too. 

1035 rows = registry.queryDataIds( 

1036 dimensions, 

1037 datasets=rawType, 

1038 collections=run1, 

1039 where="physical_filter = 'dummy_r'", 

1040 instrument="DummyCam", 

1041 ).toSet() 

1042 self.assertEqual(len(rows), 2 * 3) # 2 exposures times 3 detectors 

1043 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (110, 111)) 

1044 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (11,)) 

1045 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3)) 

1046 

1047 def testSkyMapDimensions(self): 

1048 """Tests involving only skymap dimensions, no joins to instrument.""" 

1049 registry = self.makeRegistry() 

1050 

1051 # need a bunch of dimensions and datasets for test, we want 

1052 # "band" in the test so also have to add physical_filter 

1053 # dimensions 

1054 registry.insertDimensionData("instrument", dict(instrument="DummyCam")) 

1055 registry.insertDimensionData( 

1056 "physical_filter", 

1057 dict(instrument="DummyCam", name="dummy_r", band="r"), 

1058 dict(instrument="DummyCam", name="dummy_i", band="i"), 

1059 ) 

1060 registry.insertDimensionData("skymap", dict(name="DummyMap", hash="sha!".encode("utf8"))) 

1061 for tract in range(10): 

1062 registry.insertDimensionData("tract", dict(skymap="DummyMap", id=tract)) 

1063 registry.insertDimensionData( 

1064 "patch", 

1065 *[dict(skymap="DummyMap", tract=tract, id=patch, cell_x=0, cell_y=0) for patch in range(10)], 

1066 ) 

1067 

1068 # dataset types 

1069 run = "test" 

1070 registry.registerRun(run) 

1071 storageClass = StorageClass("testDataset") 

1072 registry.storageClasses.registerStorageClass(storageClass) 

1073 calexpType = DatasetType( 

1074 name="deepCoadd_calexp", 

1075 dimensions=registry.dimensions.extract(("skymap", "tract", "patch", "band")), 

1076 storageClass=storageClass, 

1077 ) 

1078 registry.registerDatasetType(calexpType) 

1079 mergeType = DatasetType( 

1080 name="deepCoadd_mergeDet", 

1081 dimensions=registry.dimensions.extract(("skymap", "tract", "patch")), 

1082 storageClass=storageClass, 

1083 ) 

1084 registry.registerDatasetType(mergeType) 

1085 measType = DatasetType( 

1086 name="deepCoadd_meas", 

1087 dimensions=registry.dimensions.extract(("skymap", "tract", "patch", "band")), 

1088 storageClass=storageClass, 

1089 ) 

1090 registry.registerDatasetType(measType) 

1091 

1092 dimensions = DimensionGraph( 

1093 registry.dimensions, 

1094 dimensions=( 

1095 calexpType.dimensions.required | mergeType.dimensions.required | measType.dimensions.required 

1096 ), 

1097 ) 

1098 

1099 # add pre-existing datasets 

1100 for tract in (1, 3, 5): 

1101 for patch in (2, 4, 6, 7): 

1102 dataId = dict(skymap="DummyMap", tract=tract, patch=patch) 

1103 registry.insertDatasets(mergeType, dataIds=[dataId], run=run) 

1104 for aFilter in ("i", "r"): 

1105 dataId = dict(skymap="DummyMap", tract=tract, patch=patch, band=aFilter) 

1106 registry.insertDatasets(calexpType, dataIds=[dataId], run=run) 

1107 

1108 # with empty expression 

1109 rows = registry.queryDataIds(dimensions, datasets=[calexpType, mergeType], collections=run).toSet() 

1110 self.assertEqual(len(rows), 3 * 4 * 2) # 4 tracts x 4 patches x 2 filters 

1111 for dataId in rows: 

1112 self.assertCountEqual(dataId.keys(), ("skymap", "tract", "patch", "band")) 

1113 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 3, 5)) 

1114 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 4, 6, 7)) 

1115 self.assertCountEqual(set(dataId["band"] for dataId in rows), ("i", "r")) 

1116 

1117 # limit to 2 tracts and 2 patches 

1118 rows = registry.queryDataIds( 

1119 dimensions, 

1120 datasets=[calexpType, mergeType], 

1121 collections=run, 

1122 where="tract IN (1, 5) AND patch IN (2, 7)", 

1123 skymap="DummyMap", 

1124 ).toSet() 

1125 self.assertEqual(len(rows), 2 * 2 * 2) # 2 tracts x 2 patches x 2 filters 

1126 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 5)) 

1127 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 7)) 

1128 self.assertCountEqual(set(dataId["band"] for dataId in rows), ("i", "r")) 

1129 

1130 # limit to single filter 

1131 rows = registry.queryDataIds( 

1132 dimensions, datasets=[calexpType, mergeType], collections=run, where="band = 'i'" 

1133 ).toSet() 

1134 self.assertEqual(len(rows), 3 * 4 * 1) # 4 tracts x 4 patches x 2 filters 

1135 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 3, 5)) 

1136 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 4, 6, 7)) 

1137 self.assertCountEqual(set(dataId["band"] for dataId in rows), ("i",)) 

1138 

1139 # expression excludes everything, specifying non-existing skymap is 

1140 # not a fatal error, it's operator error 

1141 rows = registry.queryDataIds( 

1142 dimensions, datasets=[calexpType, mergeType], collections=run, where="skymap = 'Mars'" 

1143 ).toSet() 

1144 self.assertEqual(len(rows), 0) 

1145 

1146 def testSpatialJoin(self): 

1147 """Test queries that involve spatial overlap joins.""" 

1148 registry = self.makeRegistry() 

1149 self.loadData(registry, "hsc-rc2-subset.yaml") 

1150 

1151 # Dictionary of spatial DatabaseDimensionElements, keyed by the name of 

1152 # the TopologicalFamily they belong to. We'll relate all elements in 

1153 # each family to all of the elements in each other family. 

1154 families = defaultdict(set) 

1155 # Dictionary of {element.name: {dataId: region}}. 

1156 regions = {} 

1157 for element in registry.dimensions.getDatabaseElements(): 

1158 if element.spatial is not None: 

1159 families[element.spatial.name].add(element) 

1160 regions[element.name] = { 

1161 record.dataId: record.region for record in registry.queryDimensionRecords(element) 

1162 } 

1163 

1164 # If this check fails, it's not necessarily a problem - it may just be 

1165 # a reasonable change to the default dimension definitions - but the 

1166 # test below depends on there being more than one family to do anything 

1167 # useful. 

1168 self.assertEqual(len(families), 2) 

1169 

1170 # Overlap DatabaseDimensionElements with each other. 

1171 for family1, family2 in itertools.combinations(families, 2): 

1172 for element1, element2 in itertools.product(families[family1], families[family2]): 

1173 graph = DimensionGraph.union(element1.graph, element2.graph) 

1174 # Construct expected set of overlapping data IDs via a 

1175 # brute-force comparison of the regions we've already fetched. 

1176 expected = { 

1177 DataCoordinate.standardize({**dataId1.byName(), **dataId2.byName()}, graph=graph) 

1178 for (dataId1, region1), (dataId2, region2) in itertools.product( 

1179 regions[element1.name].items(), regions[element2.name].items() 

1180 ) 

1181 if not region1.isDisjointFrom(region2) 

1182 } 

1183 self.assertGreater(len(expected), 2, msg="Test that we aren't just comparing empty sets.") 

1184 queried = set(registry.queryDataIds(graph)) 

1185 self.assertEqual(expected, queried) 

1186 

1187 # Overlap each DatabaseDimensionElement with the commonSkyPix system. 

1188 commonSkyPix = registry.dimensions.commonSkyPix 

1189 for elementName, regions in regions.items(): 

1190 graph = DimensionGraph.union(registry.dimensions[elementName].graph, commonSkyPix.graph) 

1191 expected = set() 

1192 for dataId, region in regions.items(): 

1193 for begin, end in commonSkyPix.pixelization.envelope(region): 

1194 expected.update( 

1195 DataCoordinate.standardize({commonSkyPix.name: index, **dataId.byName()}, graph=graph) 

1196 for index in range(begin, end) 

1197 ) 

1198 self.assertGreater(len(expected), 2, msg="Test that we aren't just comparing empty sets.") 

1199 queried = set(registry.queryDataIds(graph)) 

1200 self.assertEqual(expected, queried) 

1201 

1202 def testAbstractQuery(self): 

1203 """Test that we can run a query that just lists the known 

1204 bands. This is tricky because band is 

1205 backed by a query against physical_filter. 

1206 """ 

1207 registry = self.makeRegistry() 

1208 registry.insertDimensionData("instrument", dict(name="DummyCam")) 

1209 registry.insertDimensionData( 

1210 "physical_filter", 

1211 dict(instrument="DummyCam", name="dummy_i", band="i"), 

1212 dict(instrument="DummyCam", name="dummy_i2", band="i"), 

1213 dict(instrument="DummyCam", name="dummy_r", band="r"), 

1214 ) 

1215 rows = registry.queryDataIds(["band"]).toSet() 

1216 self.assertCountEqual( 

1217 rows, 

1218 [ 

1219 DataCoordinate.standardize(band="i", universe=registry.dimensions), 

1220 DataCoordinate.standardize(band="r", universe=registry.dimensions), 

1221 ], 

1222 ) 

1223 

1224 def testAttributeManager(self): 

1225 """Test basic functionality of attribute manager.""" 

1226 # number of attributes with schema versions in a fresh database, 

1227 # 6 managers with 3 records per manager, plus config for dimensions 

1228 VERSION_COUNT = 6 * 3 + 1 

1229 

1230 registry = self.makeRegistry() 

1231 attributes = registry._managers.attributes 

1232 

1233 # check what get() returns for non-existing key 

1234 self.assertIsNone(attributes.get("attr")) 

1235 self.assertEqual(attributes.get("attr", ""), "") 

1236 self.assertEqual(attributes.get("attr", "Value"), "Value") 

1237 self.assertEqual(len(list(attributes.items())), VERSION_COUNT) 

1238 

1239 # cannot store empty key or value 

1240 with self.assertRaises(ValueError): 

1241 attributes.set("", "value") 

1242 with self.assertRaises(ValueError): 

1243 attributes.set("attr", "") 

1244 

1245 # set value of non-existing key 

1246 attributes.set("attr", "value") 

1247 self.assertEqual(len(list(attributes.items())), VERSION_COUNT + 1) 

1248 self.assertEqual(attributes.get("attr"), "value") 

1249 

1250 # update value of existing key 

1251 with self.assertRaises(ButlerAttributeExistsError): 

1252 attributes.set("attr", "value2") 

1253 

1254 attributes.set("attr", "value2", force=True) 

1255 self.assertEqual(len(list(attributes.items())), VERSION_COUNT + 1) 

1256 self.assertEqual(attributes.get("attr"), "value2") 

1257 

1258 # delete existing key 

1259 self.assertTrue(attributes.delete("attr")) 

1260 self.assertEqual(len(list(attributes.items())), VERSION_COUNT) 

1261 

1262 # delete non-existing key 

1263 self.assertFalse(attributes.delete("non-attr")) 

1264 

1265 # store bunch of keys and get the list back 

1266 data = [ 

1267 ("version.core", "1.2.3"), 

1268 ("version.dimensions", "3.2.1"), 

1269 ("config.managers.opaque", "ByNameOpaqueTableStorageManager"), 

1270 ] 

1271 for key, value in data: 

1272 attributes.set(key, value) 

1273 items = dict(attributes.items()) 

1274 for key, value in data: 

1275 self.assertEqual(items[key], value) 

1276 

1277 def testQueryDatasetsDeduplication(self): 

1278 """Test that the findFirst option to queryDatasets selects datasets 

1279 from collections in the order given". 

1280 """ 

1281 registry = self.makeRegistry() 

1282 self.loadData(registry, "base.yaml") 

1283 self.loadData(registry, "datasets.yaml") 

1284 self.assertCountEqual( 

1285 list(registry.queryDatasets("bias", collections=["imported_g", "imported_r"])), 

1286 [ 

1287 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"), 

1288 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g"), 

1289 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g"), 

1290 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r"), 

1291 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r"), 

1292 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

1293 ], 

1294 ) 

1295 self.assertCountEqual( 

1296 list(registry.queryDatasets("bias", collections=["imported_g", "imported_r"], findFirst=True)), 

1297 [ 

1298 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"), 

1299 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g"), 

1300 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g"), 

1301 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

1302 ], 

1303 ) 

1304 self.assertCountEqual( 

1305 list(registry.queryDatasets("bias", collections=["imported_r", "imported_g"], findFirst=True)), 

1306 [ 

1307 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"), 

1308 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r"), 

1309 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r"), 

1310 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

1311 ], 

1312 ) 

1313 

1314 def testQueryResults(self): 

1315 """Test querying for data IDs and then manipulating the QueryResults 

1316 object returned to perform other queries. 

1317 """ 

1318 registry = self.makeRegistry() 

1319 self.loadData(registry, "base.yaml") 

1320 self.loadData(registry, "datasets.yaml") 

1321 bias = registry.getDatasetType("bias") 

1322 flat = registry.getDatasetType("flat") 

1323 # Obtain expected results from methods other than those we're testing 

1324 # here. That includes: 

1325 # - the dimensions of the data IDs we want to query: 

1326 expectedGraph = DimensionGraph(registry.dimensions, names=["detector", "physical_filter"]) 

1327 # - the dimensions of some other data IDs we'll extract from that: 

1328 expectedSubsetGraph = DimensionGraph(registry.dimensions, names=["detector"]) 

1329 # - the data IDs we expect to obtain from the first queries: 

1330 expectedDataIds = DataCoordinateSet( 

1331 { 

1332 DataCoordinate.standardize( 

1333 instrument="Cam1", detector=d, physical_filter=p, universe=registry.dimensions 

1334 ) 

1335 for d, p in itertools.product({1, 2, 3}, {"Cam1-G", "Cam1-R1", "Cam1-R2"}) 

1336 }, 

1337 graph=expectedGraph, 

1338 hasFull=False, 

1339 hasRecords=False, 

1340 ) 

1341 # - the flat datasets we expect to find from those data IDs, in just 

1342 # one collection (so deduplication is irrelevant): 

1343 expectedFlats = [ 

1344 registry.findDataset( 

1345 flat, instrument="Cam1", detector=1, physical_filter="Cam1-R1", collections="imported_r" 

1346 ), 

1347 registry.findDataset( 

1348 flat, instrument="Cam1", detector=2, physical_filter="Cam1-R1", collections="imported_r" 

1349 ), 

1350 registry.findDataset( 

1351 flat, instrument="Cam1", detector=3, physical_filter="Cam1-R2", collections="imported_r" 

1352 ), 

1353 ] 

1354 # - the data IDs we expect to extract from that: 

1355 expectedSubsetDataIds = expectedDataIds.subset(expectedSubsetGraph) 

1356 # - the bias datasets we expect to find from those data IDs, after we 

1357 # subset-out the physical_filter dimension, both with duplicates: 

1358 expectedAllBiases = [ 

1359 registry.findDataset(bias, instrument="Cam1", detector=1, collections="imported_g"), 

1360 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_g"), 

1361 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_g"), 

1362 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_r"), 

1363 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_r"), 

1364 ] 

1365 # - ...and without duplicates: 

1366 expectedDeduplicatedBiases = [ 

1367 registry.findDataset(bias, instrument="Cam1", detector=1, collections="imported_g"), 

1368 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_r"), 

1369 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_r"), 

1370 ] 

1371 # Test against those expected results, using a "lazy" query for the 

1372 # data IDs (which re-executes that query each time we use it to do 

1373 # something new). 

1374 dataIds = registry.queryDataIds( 

1375 ["detector", "physical_filter"], 

1376 where="detector.purpose = 'SCIENCE'", # this rejects detector=4 

1377 instrument="Cam1", 

1378 ) 

1379 self.assertEqual(dataIds.graph, expectedGraph) 

1380 self.assertEqual(dataIds.toSet(), expectedDataIds) 

1381 self.assertCountEqual( 

1382 list( 

1383 dataIds.findDatasets( 

1384 flat, 

1385 collections=["imported_r"], 

1386 ) 

1387 ), 

1388 expectedFlats, 

1389 ) 

1390 subsetDataIds = dataIds.subset(expectedSubsetGraph, unique=True) 

1391 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph) 

1392 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds) 

1393 self.assertCountEqual( 

1394 list(subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=False)), 

1395 expectedAllBiases, 

1396 ) 

1397 self.assertCountEqual( 

1398 list(subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=True)), 

1399 expectedDeduplicatedBiases, 

1400 ) 

1401 # Materialize the bias dataset queries (only) by putting the results 

1402 # into temporary tables, then repeat those tests. 

1403 with subsetDataIds.findDatasets( 

1404 bias, collections=["imported_r", "imported_g"], findFirst=False 

1405 ).materialize() as biases: 

1406 self.assertCountEqual(list(biases), expectedAllBiases) 

1407 with subsetDataIds.findDatasets( 

1408 bias, collections=["imported_r", "imported_g"], findFirst=True 

1409 ).materialize() as biases: 

1410 self.assertCountEqual(list(biases), expectedDeduplicatedBiases) 

1411 # Materialize the data ID subset query, but not the dataset queries. 

1412 with subsetDataIds.materialize() as subsetDataIds: 

1413 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph) 

1414 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds) 

1415 self.assertCountEqual( 

1416 list( 

1417 subsetDataIds.findDatasets( 

1418 bias, collections=["imported_r", "imported_g"], findFirst=False 

1419 ) 

1420 ), 

1421 expectedAllBiases, 

1422 ) 

1423 self.assertCountEqual( 

1424 list( 

1425 subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=True) 

1426 ), 

1427 expectedDeduplicatedBiases, 

1428 ) 

1429 # Materialize the dataset queries, too. 

1430 with subsetDataIds.findDatasets( 

1431 bias, collections=["imported_r", "imported_g"], findFirst=False 

1432 ).materialize() as biases: 

1433 self.assertCountEqual(list(biases), expectedAllBiases) 

1434 with subsetDataIds.findDatasets( 

1435 bias, collections=["imported_r", "imported_g"], findFirst=True 

1436 ).materialize() as biases: 

1437 self.assertCountEqual(list(biases), expectedDeduplicatedBiases) 

1438 # Materialize the original query, but none of the follow-up queries. 

1439 with dataIds.materialize() as dataIds: 

1440 self.assertEqual(dataIds.graph, expectedGraph) 

1441 self.assertEqual(dataIds.toSet(), expectedDataIds) 

1442 self.assertCountEqual( 

1443 list( 

1444 dataIds.findDatasets( 

1445 flat, 

1446 collections=["imported_r"], 

1447 ) 

1448 ), 

1449 expectedFlats, 

1450 ) 

1451 subsetDataIds = dataIds.subset(expectedSubsetGraph, unique=True) 

1452 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph) 

1453 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds) 

1454 self.assertCountEqual( 

1455 list( 

1456 subsetDataIds.findDatasets( 

1457 bias, collections=["imported_r", "imported_g"], findFirst=False 

1458 ) 

1459 ), 

1460 expectedAllBiases, 

1461 ) 

1462 self.assertCountEqual( 

1463 list( 

1464 subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=True) 

1465 ), 

1466 expectedDeduplicatedBiases, 

1467 ) 

1468 # Materialize just the bias dataset queries. 

1469 with subsetDataIds.findDatasets( 

1470 bias, collections=["imported_r", "imported_g"], findFirst=False 

1471 ).materialize() as biases: 

1472 self.assertCountEqual(list(biases), expectedAllBiases) 

1473 with subsetDataIds.findDatasets( 

1474 bias, collections=["imported_r", "imported_g"], findFirst=True 

1475 ).materialize() as biases: 

1476 self.assertCountEqual(list(biases), expectedDeduplicatedBiases) 

1477 # Materialize the subset data ID query, but not the dataset 

1478 # queries. 

1479 with subsetDataIds.materialize() as subsetDataIds: 

1480 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph) 

1481 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds) 

1482 self.assertCountEqual( 

1483 list( 

1484 subsetDataIds.findDatasets( 

1485 bias, collections=["imported_r", "imported_g"], findFirst=False 

1486 ) 

1487 ), 

1488 expectedAllBiases, 

1489 ) 

1490 self.assertCountEqual( 

1491 list( 

1492 subsetDataIds.findDatasets( 

1493 bias, collections=["imported_r", "imported_g"], findFirst=True 

1494 ) 

1495 ), 

1496 expectedDeduplicatedBiases, 

1497 ) 

1498 # Materialize the bias dataset queries, too, so now we're 

1499 # materializing every single step. 

1500 with subsetDataIds.findDatasets( 

1501 bias, collections=["imported_r", "imported_g"], findFirst=False 

1502 ).materialize() as biases: 

1503 self.assertCountEqual(list(biases), expectedAllBiases) 

1504 with subsetDataIds.findDatasets( 

1505 bias, collections=["imported_r", "imported_g"], findFirst=True 

1506 ).materialize() as biases: 

1507 self.assertCountEqual(list(biases), expectedDeduplicatedBiases) 

1508 

1509 def testEmptyDimensionsQueries(self): 

1510 """Test Query and QueryResults objects in the case where there are no 

1511 dimensions. 

1512 """ 

1513 # Set up test data: one dataset type, two runs, one dataset in each. 

1514 registry = self.makeRegistry() 

1515 self.loadData(registry, "base.yaml") 

1516 schema = DatasetType("schema", dimensions=registry.dimensions.empty, storageClass="Catalog") 

1517 registry.registerDatasetType(schema) 

1518 dataId = DataCoordinate.makeEmpty(registry.dimensions) 

1519 run1 = "run1" 

1520 run2 = "run2" 

1521 registry.registerRun(run1) 

1522 registry.registerRun(run2) 

1523 (dataset1,) = registry.insertDatasets(schema, dataIds=[dataId], run=run1) 

1524 (dataset2,) = registry.insertDatasets(schema, dataIds=[dataId], run=run2) 

1525 # Query directly for both of the datasets, and each one, one at a time. 

1526 self.checkQueryResults( 

1527 registry.queryDatasets(schema, collections=[run1, run2], findFirst=False), [dataset1, dataset2] 

1528 ) 

1529 self.checkQueryResults( 

1530 registry.queryDatasets(schema, collections=[run1, run2], findFirst=True), 

1531 [dataset1], 

1532 ) 

1533 self.checkQueryResults( 

1534 registry.queryDatasets(schema, collections=[run2, run1], findFirst=True), 

1535 [dataset2], 

1536 ) 

1537 # Query for data IDs with no dimensions. 

1538 dataIds = registry.queryDataIds([]) 

1539 self.checkQueryResults(dataIds, [dataId]) 

1540 # Use queried data IDs to find the datasets. 

1541 self.checkQueryResults( 

1542 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False), 

1543 [dataset1, dataset2], 

1544 ) 

1545 self.checkQueryResults( 

1546 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), 

1547 [dataset1], 

1548 ) 

1549 self.checkQueryResults( 

1550 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), 

1551 [dataset2], 

1552 ) 

1553 # Now materialize the data ID query results and repeat those tests. 

1554 with dataIds.materialize() as dataIds: 

1555 self.checkQueryResults(dataIds, [dataId]) 

1556 self.checkQueryResults( 

1557 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), 

1558 [dataset1], 

1559 ) 

1560 self.checkQueryResults( 

1561 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), 

1562 [dataset2], 

1563 ) 

1564 # Query for non-empty data IDs, then subset that to get the empty one. 

1565 # Repeat the above tests starting from that. 

1566 dataIds = registry.queryDataIds(["instrument"]).subset(registry.dimensions.empty, unique=True) 

1567 self.checkQueryResults(dataIds, [dataId]) 

1568 self.checkQueryResults( 

1569 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False), 

1570 [dataset1, dataset2], 

1571 ) 

1572 self.checkQueryResults( 

1573 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), 

1574 [dataset1], 

1575 ) 

1576 self.checkQueryResults( 

1577 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), 

1578 [dataset2], 

1579 ) 

1580 with dataIds.materialize() as dataIds: 

1581 self.checkQueryResults(dataIds, [dataId]) 

1582 self.checkQueryResults( 

1583 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False), 

1584 [dataset1, dataset2], 

1585 ) 

1586 self.checkQueryResults( 

1587 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), 

1588 [dataset1], 

1589 ) 

1590 self.checkQueryResults( 

1591 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), 

1592 [dataset2], 

1593 ) 

1594 # Query for non-empty data IDs, then materialize, then subset to get 

1595 # the empty one. Repeat again. 

1596 with registry.queryDataIds(["instrument"]).materialize() as nonEmptyDataIds: 

1597 dataIds = nonEmptyDataIds.subset(registry.dimensions.empty, unique=True) 

1598 self.checkQueryResults(dataIds, [dataId]) 

1599 self.checkQueryResults( 

1600 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False), 

1601 [dataset1, dataset2], 

1602 ) 

1603 self.checkQueryResults( 

1604 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), 

1605 [dataset1], 

1606 ) 

1607 self.checkQueryResults( 

1608 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), 

1609 [dataset2], 

1610 ) 

1611 with dataIds.materialize() as dataIds: 

1612 self.checkQueryResults(dataIds, [dataId]) 

1613 self.checkQueryResults( 

1614 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False), 

1615 [dataset1, dataset2], 

1616 ) 

1617 self.checkQueryResults( 

1618 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), 

1619 [dataset1], 

1620 ) 

1621 self.checkQueryResults( 

1622 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), 

1623 [dataset2], 

1624 ) 

1625 

1626 def testDimensionDataModifications(self): 

1627 """Test that modifying dimension records via: 

1628 syncDimensionData(..., update=True) and 

1629 insertDimensionData(..., replace=True) works as expected, even in the 

1630 presence of datasets using those dimensions and spatial overlap 

1631 relationships. 

1632 """ 

1633 

1634 def unpack_range_set(ranges: lsst.sphgeom.RangeSet) -> Iterator[int]: 

1635 """Unpack a sphgeom.RangeSet into the integers it contains.""" 

1636 for begin, end in ranges: 

1637 yield from range(begin, end) 

1638 

1639 def range_set_hull( 

1640 ranges: lsst.sphgeom.RangeSet, 

1641 pixelization: lsst.sphgeom.HtmPixelization, 

1642 ) -> lsst.sphgeom.ConvexPolygon: 

1643 """Create a ConvexPolygon hull of the region defined by a set of 

1644 HTM pixelization index ranges. 

1645 """ 

1646 points = [] 

1647 for index in unpack_range_set(ranges): 

1648 points.extend(pixelization.triangle(index).getVertices()) 

1649 return lsst.sphgeom.ConvexPolygon(points) 

1650 

1651 # Use HTM to set up an initial parent region (one arbitrary trixel) 

1652 # and four child regions (the trixels within the parent at the next 

1653 # level. We'll use the parent as a tract/visit region and the children 

1654 # as its patch/visit_detector regions. 

1655 registry = self.makeRegistry() 

1656 htm6 = registry.dimensions.skypix["htm"][6].pixelization 

1657 commonSkyPix = registry.dimensions.commonSkyPix.pixelization 

1658 index = 12288 

1659 child_ranges_small = lsst.sphgeom.RangeSet(index).scaled(4) 

1660 assert htm6.universe().contains(child_ranges_small) 

1661 child_regions_small = [htm6.triangle(i) for i in unpack_range_set(child_ranges_small)] 

1662 parent_region_small = lsst.sphgeom.ConvexPolygon( 

1663 list(itertools.chain.from_iterable(c.getVertices() for c in child_regions_small)) 

1664 ) 

1665 assert all(parent_region_small.contains(c) for c in child_regions_small) 

1666 # Make a larger version of each child region, defined to be the set of 

1667 # htm6 trixels that overlap the original's bounding circle. Make a new 

1668 # parent that's the convex hull of the new children. 

1669 child_regions_large = [ 

1670 range_set_hull(htm6.envelope(c.getBoundingCircle()), htm6) for c in child_regions_small 

1671 ] 

1672 assert all(large.contains(small) for large, small in zip(child_regions_large, child_regions_small)) 

1673 parent_region_large = lsst.sphgeom.ConvexPolygon( 

1674 list(itertools.chain.from_iterable(c.getVertices() for c in child_regions_large)) 

1675 ) 

1676 assert all(parent_region_large.contains(c) for c in child_regions_large) 

1677 assert parent_region_large.contains(parent_region_small) 

1678 assert not parent_region_small.contains(parent_region_large) 

1679 assert not all(parent_region_small.contains(c) for c in child_regions_large) 

1680 # Find some commonSkyPix indices that overlap the large regions but not 

1681 # overlap the small regions. We use commonSkyPix here to make sure the 

1682 # real tests later involve what's in the database, not just post-query 

1683 # region filtering. 

1684 child_difference_indices = [] 

1685 for large, small in zip(child_regions_large, child_regions_small): 

1686 difference = list(unpack_range_set(commonSkyPix.envelope(large) - commonSkyPix.envelope(small))) 

1687 assert difference, "if this is empty, we can't test anything useful with these regions" 

1688 assert all( 

1689 not commonSkyPix.triangle(d).isDisjointFrom(large) 

1690 and commonSkyPix.triangle(d).isDisjointFrom(small) 

1691 for d in difference 

1692 ) 

1693 child_difference_indices.append(difference) 

1694 parent_difference_indices = list( 

1695 unpack_range_set( 

1696 commonSkyPix.envelope(parent_region_large) - commonSkyPix.envelope(parent_region_small) 

1697 ) 

1698 ) 

1699 assert parent_difference_indices, "if this is empty, we can't test anything useful with these regions" 

1700 assert all( 

1701 ( 

1702 not commonSkyPix.triangle(d).isDisjointFrom(parent_region_large) 

1703 and commonSkyPix.triangle(d).isDisjointFrom(parent_region_small) 

1704 ) 

1705 for d in parent_difference_indices 

1706 ) 

1707 # Now that we've finally got those regions, we'll insert the large ones 

1708 # as tract/patch dimension records. 

1709 skymap_name = "testing_v1" 

1710 registry.insertDimensionData( 

1711 "skymap", 

1712 { 

1713 "name": skymap_name, 

1714 "hash": bytes([42]), 

1715 "tract_max": 1, 

1716 "patch_nx_max": 2, 

1717 "patch_ny_max": 2, 

1718 }, 

1719 ) 

1720 registry.insertDimensionData("tract", {"skymap": skymap_name, "id": 0, "region": parent_region_large}) 

1721 registry.insertDimensionData( 

1722 "patch", 

1723 *[ 

1724 {"skymap": skymap_name, "tract": 0, "id": n, "cell_x": n % 2, "cell_y": n // 2, "region": c} 

1725 for n, c in enumerate(child_regions_large) 

1726 ], 

1727 ) 

1728 # Add at dataset that uses these dimensions to make sure that modifying 

1729 # them doesn't disrupt foreign keys (need to make sure DB doesn't 

1730 # implement insert with replace=True as delete-then-insert). 

1731 dataset_type = DatasetType( 

1732 "coadd", 

1733 dimensions=["tract", "patch"], 

1734 universe=registry.dimensions, 

1735 storageClass="Exposure", 

1736 ) 

1737 registry.registerDatasetType(dataset_type) 

1738 registry.registerCollection("the_run", CollectionType.RUN) 

1739 registry.insertDatasets( 

1740 dataset_type, 

1741 [{"skymap": skymap_name, "tract": 0, "patch": 2}], 

1742 run="the_run", 

1743 ) 

1744 # Query for tracts and patches that overlap some "difference" htm9 

1745 # pixels; there should be overlaps, because the database has 

1746 # the "large" suite of regions. 

1747 self.assertEqual( 

1748 {0}, 

1749 { 

1750 data_id["tract"] 

1751 for data_id in registry.queryDataIds( 

1752 ["tract"], 

1753 skymap=skymap_name, 

1754 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]}, 

1755 ) 

1756 }, 

1757 ) 

1758 for patch_id, patch_difference_indices in enumerate(child_difference_indices): 

1759 self.assertIn( 

1760 patch_id, 

1761 { 

1762 data_id["patch"] 

1763 for data_id in registry.queryDataIds( 

1764 ["patch"], 

1765 skymap=skymap_name, 

1766 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]}, 

1767 ) 

1768 }, 

1769 ) 

1770 # Use sync to update the tract region and insert to update the patch 

1771 # regions, to the "small" suite. 

1772 updated = registry.syncDimensionData( 

1773 "tract", 

1774 {"skymap": skymap_name, "id": 0, "region": parent_region_small}, 

1775 update=True, 

1776 ) 

1777 self.assertEqual(updated, {"region": parent_region_large}) 

1778 registry.insertDimensionData( 

1779 "patch", 

1780 *[ 

1781 {"skymap": skymap_name, "tract": 0, "id": n, "cell_x": n % 2, "cell_y": n // 2, "region": c} 

1782 for n, c in enumerate(child_regions_small) 

1783 ], 

1784 replace=True, 

1785 ) 

1786 # Query again; there now should be no such overlaps, because the 

1787 # database has the "small" suite of regions. 

1788 self.assertFalse( 

1789 set( 

1790 registry.queryDataIds( 

1791 ["tract"], 

1792 skymap=skymap_name, 

1793 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]}, 

1794 ) 

1795 ) 

1796 ) 

1797 for patch_id, patch_difference_indices in enumerate(child_difference_indices): 

1798 self.assertNotIn( 

1799 patch_id, 

1800 { 

1801 data_id["patch"] 

1802 for data_id in registry.queryDataIds( 

1803 ["patch"], 

1804 skymap=skymap_name, 

1805 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]}, 

1806 ) 

1807 }, 

1808 ) 

1809 # Update back to the large regions and query one more time. 

1810 updated = registry.syncDimensionData( 

1811 "tract", 

1812 {"skymap": skymap_name, "id": 0, "region": parent_region_large}, 

1813 update=True, 

1814 ) 

1815 self.assertEqual(updated, {"region": parent_region_small}) 

1816 registry.insertDimensionData( 

1817 "patch", 

1818 *[ 

1819 {"skymap": skymap_name, "tract": 0, "id": n, "cell_x": n % 2, "cell_y": n // 2, "region": c} 

1820 for n, c in enumerate(child_regions_large) 

1821 ], 

1822 replace=True, 

1823 ) 

1824 self.assertEqual( 

1825 {0}, 

1826 { 

1827 data_id["tract"] 

1828 for data_id in registry.queryDataIds( 

1829 ["tract"], 

1830 skymap=skymap_name, 

1831 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]}, 

1832 ) 

1833 }, 

1834 ) 

1835 for patch_id, patch_difference_indices in enumerate(child_difference_indices): 

1836 self.assertIn( 

1837 patch_id, 

1838 { 

1839 data_id["patch"] 

1840 for data_id in registry.queryDataIds( 

1841 ["patch"], 

1842 skymap=skymap_name, 

1843 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]}, 

1844 ) 

1845 }, 

1846 ) 

1847 

1848 def testCalibrationCollections(self): 

1849 """Test operations on `~CollectionType.CALIBRATION` collections, 

1850 including `Registry.certify`, `Registry.decertify`, and 

1851 `Registry.findDataset`. 

1852 """ 

1853 # Setup - make a Registry, fill it with some datasets in 

1854 # non-calibration collections. 

1855 registry = self.makeRegistry() 

1856 self.loadData(registry, "base.yaml") 

1857 self.loadData(registry, "datasets.yaml") 

1858 # Set up some timestamps. 

1859 t1 = astropy.time.Time("2020-01-01T01:00:00", format="isot", scale="tai") 

1860 t2 = astropy.time.Time("2020-01-01T02:00:00", format="isot", scale="tai") 

1861 t3 = astropy.time.Time("2020-01-01T03:00:00", format="isot", scale="tai") 

1862 t4 = astropy.time.Time("2020-01-01T04:00:00", format="isot", scale="tai") 

1863 t5 = astropy.time.Time("2020-01-01T05:00:00", format="isot", scale="tai") 

1864 allTimespans = [ 

1865 Timespan(a, b) for a, b in itertools.combinations([None, t1, t2, t3, t4, t5, None], r=2) 

1866 ] 

1867 # Get references to some datasets. 

1868 bias2a = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g") 

1869 bias3a = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g") 

1870 bias2b = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r") 

1871 bias3b = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r") 

1872 # Register the main calibration collection we'll be working with. 

1873 collection = "Cam1/calibs/default" 

1874 registry.registerCollection(collection, type=CollectionType.CALIBRATION) 

1875 # Cannot associate into a calibration collection (no timespan). 

1876 with self.assertRaises(TypeError): 

1877 registry.associate(collection, [bias2a]) 

1878 # Certify 2a dataset with [t2, t4) validity. 

1879 registry.certify(collection, [bias2a], Timespan(begin=t2, end=t4)) 

1880 # We should not be able to certify 2b with anything overlapping that 

1881 # window. 

1882 with self.assertRaises(ConflictingDefinitionError): 

1883 registry.certify(collection, [bias2b], Timespan(begin=None, end=t3)) 

1884 with self.assertRaises(ConflictingDefinitionError): 

1885 registry.certify(collection, [bias2b], Timespan(begin=None, end=t5)) 

1886 with self.assertRaises(ConflictingDefinitionError): 

1887 registry.certify(collection, [bias2b], Timespan(begin=t1, end=t3)) 

1888 with self.assertRaises(ConflictingDefinitionError): 

1889 registry.certify(collection, [bias2b], Timespan(begin=t1, end=t5)) 

1890 with self.assertRaises(ConflictingDefinitionError): 

1891 registry.certify(collection, [bias2b], Timespan(begin=t1, end=None)) 

1892 with self.assertRaises(ConflictingDefinitionError): 

1893 registry.certify(collection, [bias2b], Timespan(begin=t2, end=t3)) 

1894 with self.assertRaises(ConflictingDefinitionError): 

1895 registry.certify(collection, [bias2b], Timespan(begin=t2, end=t5)) 

1896 with self.assertRaises(ConflictingDefinitionError): 

1897 registry.certify(collection, [bias2b], Timespan(begin=t2, end=None)) 

1898 # We should be able to certify 3a with a range overlapping that window, 

1899 # because it's for a different detector. 

1900 # We'll certify 3a over [t1, t3). 

1901 registry.certify(collection, [bias3a], Timespan(begin=t1, end=t3)) 

1902 # Now we'll certify 2b and 3b together over [t4, ∞). 

1903 registry.certify(collection, [bias2b, bias3b], Timespan(begin=t4, end=None)) 

1904 

1905 # Fetch all associations and check that they are what we expect. 

1906 self.assertCountEqual( 

1907 list( 

1908 registry.queryDatasetAssociations( 

1909 "bias", 

1910 collections=[collection, "imported_g", "imported_r"], 

1911 ) 

1912 ), 

1913 [ 

1914 DatasetAssociation( 

1915 ref=registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"), 

1916 collection="imported_g", 

1917 timespan=None, 

1918 ), 

1919 DatasetAssociation( 

1920 ref=registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

1921 collection="imported_r", 

1922 timespan=None, 

1923 ), 

1924 DatasetAssociation(ref=bias2a, collection="imported_g", timespan=None), 

1925 DatasetAssociation(ref=bias3a, collection="imported_g", timespan=None), 

1926 DatasetAssociation(ref=bias2b, collection="imported_r", timespan=None), 

1927 DatasetAssociation(ref=bias3b, collection="imported_r", timespan=None), 

1928 DatasetAssociation(ref=bias2a, collection=collection, timespan=Timespan(begin=t2, end=t4)), 

1929 DatasetAssociation(ref=bias3a, collection=collection, timespan=Timespan(begin=t1, end=t3)), 

1930 DatasetAssociation(ref=bias2b, collection=collection, timespan=Timespan(begin=t4, end=None)), 

1931 DatasetAssociation(ref=bias3b, collection=collection, timespan=Timespan(begin=t4, end=None)), 

1932 ], 

1933 ) 

1934 

1935 class Ambiguous: 

1936 """Tag class to denote lookups that should be ambiguous.""" 

1937 

1938 pass 

1939 

1940 def assertLookup( 

1941 detector: int, timespan: Timespan, expected: Optional[Union[DatasetRef, Type[Ambiguous]]] 

1942 ) -> None: 

1943 """Local function that asserts that a bias lookup returns the given 

1944 expected result. 

1945 """ 

1946 if expected is Ambiguous: 

1947 with self.assertRaises(RuntimeError): 

1948 registry.findDataset( 

1949 "bias", 

1950 collections=collection, 

1951 instrument="Cam1", 

1952 detector=detector, 

1953 timespan=timespan, 

1954 ) 

1955 else: 

1956 self.assertEqual( 

1957 expected, 

1958 registry.findDataset( 

1959 "bias", 

1960 collections=collection, 

1961 instrument="Cam1", 

1962 detector=detector, 

1963 timespan=timespan, 

1964 ), 

1965 ) 

1966 

1967 # Systematically test lookups against expected results. 

1968 assertLookup(detector=2, timespan=Timespan(None, t1), expected=None) 

1969 assertLookup(detector=2, timespan=Timespan(None, t2), expected=None) 

1970 assertLookup(detector=2, timespan=Timespan(None, t3), expected=bias2a) 

1971 assertLookup(detector=2, timespan=Timespan(None, t4), expected=bias2a) 

1972 assertLookup(detector=2, timespan=Timespan(None, t5), expected=Ambiguous) 

1973 assertLookup(detector=2, timespan=Timespan(None, None), expected=Ambiguous) 

1974 assertLookup(detector=2, timespan=Timespan(t1, t2), expected=None) 

1975 assertLookup(detector=2, timespan=Timespan(t1, t3), expected=bias2a) 

1976 assertLookup(detector=2, timespan=Timespan(t1, t4), expected=bias2a) 

1977 assertLookup(detector=2, timespan=Timespan(t1, t5), expected=Ambiguous) 

1978 assertLookup(detector=2, timespan=Timespan(t1, None), expected=Ambiguous) 

1979 assertLookup(detector=2, timespan=Timespan(t2, t3), expected=bias2a) 

1980 assertLookup(detector=2, timespan=Timespan(t2, t4), expected=bias2a) 

1981 assertLookup(detector=2, timespan=Timespan(t2, t5), expected=Ambiguous) 

1982 assertLookup(detector=2, timespan=Timespan(t2, None), expected=Ambiguous) 

1983 assertLookup(detector=2, timespan=Timespan(t3, t4), expected=bias2a) 

1984 assertLookup(detector=2, timespan=Timespan(t3, t5), expected=Ambiguous) 

1985 assertLookup(detector=2, timespan=Timespan(t3, None), expected=Ambiguous) 

1986 assertLookup(detector=2, timespan=Timespan(t4, t5), expected=bias2b) 

1987 assertLookup(detector=2, timespan=Timespan(t4, None), expected=bias2b) 

1988 assertLookup(detector=2, timespan=Timespan(t5, None), expected=bias2b) 

1989 assertLookup(detector=3, timespan=Timespan(None, t1), expected=None) 

1990 assertLookup(detector=3, timespan=Timespan(None, t2), expected=bias3a) 

1991 assertLookup(detector=3, timespan=Timespan(None, t3), expected=bias3a) 

1992 assertLookup(detector=3, timespan=Timespan(None, t4), expected=bias3a) 

1993 assertLookup(detector=3, timespan=Timespan(None, t5), expected=Ambiguous) 

1994 assertLookup(detector=3, timespan=Timespan(None, None), expected=Ambiguous) 

1995 assertLookup(detector=3, timespan=Timespan(t1, t2), expected=bias3a) 

1996 assertLookup(detector=3, timespan=Timespan(t1, t3), expected=bias3a) 

1997 assertLookup(detector=3, timespan=Timespan(t1, t4), expected=bias3a) 

1998 assertLookup(detector=3, timespan=Timespan(t1, t5), expected=Ambiguous) 

1999 assertLookup(detector=3, timespan=Timespan(t1, None), expected=Ambiguous) 

2000 assertLookup(detector=3, timespan=Timespan(t2, t3), expected=bias3a) 

2001 assertLookup(detector=3, timespan=Timespan(t2, t4), expected=bias3a) 

2002 assertLookup(detector=3, timespan=Timespan(t2, t5), expected=Ambiguous) 

2003 assertLookup(detector=3, timespan=Timespan(t2, None), expected=Ambiguous) 

2004 assertLookup(detector=3, timespan=Timespan(t3, t4), expected=None) 

2005 assertLookup(detector=3, timespan=Timespan(t3, t5), expected=bias3b) 

2006 assertLookup(detector=3, timespan=Timespan(t3, None), expected=bias3b) 

2007 assertLookup(detector=3, timespan=Timespan(t4, t5), expected=bias3b) 

2008 assertLookup(detector=3, timespan=Timespan(t4, None), expected=bias3b) 

2009 assertLookup(detector=3, timespan=Timespan(t5, None), expected=bias3b) 

2010 

2011 # Decertify [t3, t5) for all data IDs, and do test lookups again. 

2012 # This should truncate bias2a to [t2, t3), leave bias3a unchanged at 

2013 # [t1, t3), and truncate bias2b and bias3b to [t5, ∞). 

2014 registry.decertify(collection=collection, datasetType="bias", timespan=Timespan(t3, t5)) 

2015 assertLookup(detector=2, timespan=Timespan(None, t1), expected=None) 

2016 assertLookup(detector=2, timespan=Timespan(None, t2), expected=None) 

2017 assertLookup(detector=2, timespan=Timespan(None, t3), expected=bias2a) 

2018 assertLookup(detector=2, timespan=Timespan(None, t4), expected=bias2a) 

2019 assertLookup(detector=2, timespan=Timespan(None, t5), expected=bias2a) 

2020 assertLookup(detector=2, timespan=Timespan(None, None), expected=Ambiguous) 

2021 assertLookup(detector=2, timespan=Timespan(t1, t2), expected=None) 

2022 assertLookup(detector=2, timespan=Timespan(t1, t3), expected=bias2a) 

2023 assertLookup(detector=2, timespan=Timespan(t1, t4), expected=bias2a) 

2024 assertLookup(detector=2, timespan=Timespan(t1, t5), expected=bias2a) 

2025 assertLookup(detector=2, timespan=Timespan(t1, None), expected=Ambiguous) 

2026 assertLookup(detector=2, timespan=Timespan(t2, t3), expected=bias2a) 

2027 assertLookup(detector=2, timespan=Timespan(t2, t4), expected=bias2a) 

2028 assertLookup(detector=2, timespan=Timespan(t2, t5), expected=bias2a) 

2029 assertLookup(detector=2, timespan=Timespan(t2, None), expected=Ambiguous) 

2030 assertLookup(detector=2, timespan=Timespan(t3, t4), expected=None) 

2031 assertLookup(detector=2, timespan=Timespan(t3, t5), expected=None) 

2032 assertLookup(detector=2, timespan=Timespan(t3, None), expected=bias2b) 

2033 assertLookup(detector=2, timespan=Timespan(t4, t5), expected=None) 

2034 assertLookup(detector=2, timespan=Timespan(t4, None), expected=bias2b) 

2035 assertLookup(detector=2, timespan=Timespan(t5, None), expected=bias2b) 

2036 assertLookup(detector=3, timespan=Timespan(None, t1), expected=None) 

2037 assertLookup(detector=3, timespan=Timespan(None, t2), expected=bias3a) 

2038 assertLookup(detector=3, timespan=Timespan(None, t3), expected=bias3a) 

2039 assertLookup(detector=3, timespan=Timespan(None, t4), expected=bias3a) 

2040 assertLookup(detector=3, timespan=Timespan(None, t5), expected=bias3a) 

2041 assertLookup(detector=3, timespan=Timespan(None, None), expected=Ambiguous) 

2042 assertLookup(detector=3, timespan=Timespan(t1, t2), expected=bias3a) 

2043 assertLookup(detector=3, timespan=Timespan(t1, t3), expected=bias3a) 

2044 assertLookup(detector=3, timespan=Timespan(t1, t4), expected=bias3a) 

2045 assertLookup(detector=3, timespan=Timespan(t1, t5), expected=bias3a) 

2046 assertLookup(detector=3, timespan=Timespan(t1, None), expected=Ambiguous) 

2047 assertLookup(detector=3, timespan=Timespan(t2, t3), expected=bias3a) 

2048 assertLookup(detector=3, timespan=Timespan(t2, t4), expected=bias3a) 

2049 assertLookup(detector=3, timespan=Timespan(t2, t5), expected=bias3a) 

2050 assertLookup(detector=3, timespan=Timespan(t2, None), expected=Ambiguous) 

2051 assertLookup(detector=3, timespan=Timespan(t3, t4), expected=None) 

2052 assertLookup(detector=3, timespan=Timespan(t3, t5), expected=None) 

2053 assertLookup(detector=3, timespan=Timespan(t3, None), expected=bias3b) 

2054 assertLookup(detector=3, timespan=Timespan(t4, t5), expected=None) 

2055 assertLookup(detector=3, timespan=Timespan(t4, None), expected=bias3b) 

2056 assertLookup(detector=3, timespan=Timespan(t5, None), expected=bias3b) 

2057 

2058 # Decertify everything, this time with explicit data IDs, then check 

2059 # that no lookups succeed. 

2060 registry.decertify( 

2061 collection, 

2062 "bias", 

2063 Timespan(None, None), 

2064 dataIds=[ 

2065 dict(instrument="Cam1", detector=2), 

2066 dict(instrument="Cam1", detector=3), 

2067 ], 

2068 ) 

2069 for detector in (2, 3): 

2070 for timespan in allTimespans: 

2071 assertLookup(detector=detector, timespan=timespan, expected=None) 

2072 # Certify bias2a and bias3a over (-∞, ∞), check that all lookups return 

2073 # those. 

2074 registry.certify( 

2075 collection, 

2076 [bias2a, bias3a], 

2077 Timespan(None, None), 

2078 ) 

2079 for timespan in allTimespans: 

2080 assertLookup(detector=2, timespan=timespan, expected=bias2a) 

2081 assertLookup(detector=3, timespan=timespan, expected=bias3a) 

2082 # Decertify just bias2 over [t2, t4). 

2083 # This should split a single certification row into two (and leave the 

2084 # other existing row, for bias3a, alone). 

2085 registry.decertify( 

2086 collection, "bias", Timespan(t2, t4), dataIds=[dict(instrument="Cam1", detector=2)] 

2087 ) 

2088 for timespan in allTimespans: 

2089 assertLookup(detector=3, timespan=timespan, expected=bias3a) 

2090 overlapsBefore = timespan.overlaps(Timespan(None, t2)) 

2091 overlapsAfter = timespan.overlaps(Timespan(t4, None)) 

2092 if overlapsBefore and overlapsAfter: 

2093 expected = Ambiguous 

2094 elif overlapsBefore or overlapsAfter: 

2095 expected = bias2a 

2096 else: 

2097 expected = None 

2098 assertLookup(detector=2, timespan=timespan, expected=expected) 

2099 

2100 def testSkipCalibs(self): 

2101 """Test how queries handle skipping of calibration collections.""" 

2102 registry = self.makeRegistry() 

2103 self.loadData(registry, "base.yaml") 

2104 self.loadData(registry, "datasets.yaml") 

2105 

2106 coll_calib = "Cam1/calibs/default" 

2107 registry.registerCollection(coll_calib, type=CollectionType.CALIBRATION) 

2108 

2109 # Add all biases to the calibration collection. 

2110 # Without this, the logic that prunes dataset subqueries based on 

2111 # datasetType-collection summary information will fire before the logic 

2112 # we want to test below. This is a good thing (it avoids the dreaded 

2113 # NotImplementedError a bit more often) everywhere but here. 

2114 registry.certify(coll_calib, registry.queryDatasets("bias", collections=...), Timespan(None, None)) 

2115 

2116 coll_list = [coll_calib, "imported_g", "imported_r"] 

2117 chain = "Cam1/chain" 

2118 registry.registerCollection(chain, type=CollectionType.CHAINED) 

2119 registry.setCollectionChain(chain, coll_list) 

2120 

2121 # explicit list will raise if findFirst=True or there are temporal 

2122 # dimensions 

2123 with self.assertRaises(NotImplementedError): 

2124 registry.queryDatasets("bias", collections=coll_list, findFirst=True) 

2125 with self.assertRaises(NotImplementedError): 

2126 registry.queryDataIds( 

2127 ["instrument", "detector", "exposure"], datasets="bias", collections=coll_list 

2128 ).count() 

2129 

2130 # chain will skip 

2131 datasets = list(registry.queryDatasets("bias", collections=chain)) 

2132 self.assertGreater(len(datasets), 0) 

2133 

2134 dataIds = list(registry.queryDataIds(["instrument", "detector"], datasets="bias", collections=chain)) 

2135 self.assertGreater(len(dataIds), 0) 

2136 

2137 # glob will skip too 

2138 datasets = list(registry.queryDatasets("bias", collections="*d*")) 

2139 self.assertGreater(len(datasets), 0) 

2140 

2141 # regular expression will skip too 

2142 pattern = re.compile(".*") 

2143 datasets = list(registry.queryDatasets("bias", collections=pattern)) 

2144 self.assertGreater(len(datasets), 0) 

2145 

2146 # ellipsis should work as usual 

2147 datasets = list(registry.queryDatasets("bias", collections=...)) 

2148 self.assertGreater(len(datasets), 0) 

2149 

2150 # few tests with findFirst 

2151 datasets = list(registry.queryDatasets("bias", collections=chain, findFirst=True)) 

2152 self.assertGreater(len(datasets), 0) 

2153 

2154 def testIngestTimeQuery(self): 

2155 

2156 registry = self.makeRegistry() 

2157 self.loadData(registry, "base.yaml") 

2158 dt0 = datetime.utcnow() 

2159 self.loadData(registry, "datasets.yaml") 

2160 dt1 = datetime.utcnow() 

2161 

2162 datasets = list(registry.queryDatasets(..., collections=...)) 

2163 len0 = len(datasets) 

2164 self.assertGreater(len0, 0) 

2165 

2166 where = "ingest_date > T'2000-01-01'" 

2167 datasets = list(registry.queryDatasets(..., collections=..., where=where)) 

2168 len1 = len(datasets) 

2169 self.assertEqual(len0, len1) 

2170 

2171 # no one will ever use this piece of software in 30 years 

2172 where = "ingest_date > T'2050-01-01'" 

2173 datasets = list(registry.queryDatasets(..., collections=..., where=where)) 

2174 len2 = len(datasets) 

2175 self.assertEqual(len2, 0) 

2176 

2177 # Check more exact timing to make sure there is no 37 seconds offset 

2178 # (after fixing DM-30124). SQLite time precision is 1 second, make 

2179 # sure that we don't test with higher precision. 

2180 tests = [ 

2181 # format: (timestamp, operator, expected_len) 

2182 (dt0 - timedelta(seconds=1), ">", len0), 

2183 (dt0 - timedelta(seconds=1), "<", 0), 

2184 (dt1 + timedelta(seconds=1), "<", len0), 

2185 (dt1 + timedelta(seconds=1), ">", 0), 

2186 ] 

2187 for dt, op, expect_len in tests: 

2188 dt_str = dt.isoformat(sep=" ") 

2189 

2190 where = f"ingest_date {op} T'{dt_str}'" 

2191 datasets = list(registry.queryDatasets(..., collections=..., where=where)) 

2192 self.assertEqual(len(datasets), expect_len) 

2193 

2194 # same with bind using datetime or astropy Time 

2195 where = f"ingest_date {op} ingest_time" 

2196 datasets = list( 

2197 registry.queryDatasets(..., collections=..., where=where, bind={"ingest_time": dt}) 

2198 ) 

2199 self.assertEqual(len(datasets), expect_len) 

2200 

2201 dt_astropy = astropy.time.Time(dt, format="datetime") 

2202 datasets = list( 

2203 registry.queryDatasets(..., collections=..., where=where, bind={"ingest_time": dt_astropy}) 

2204 ) 

2205 self.assertEqual(len(datasets), expect_len) 

2206 

2207 def testTimespanQueries(self): 

2208 """Test query expressions involving timespans.""" 

2209 registry = self.makeRegistry() 

2210 self.loadData(registry, "hsc-rc2-subset.yaml") 

2211 # All exposures in the database; mapping from ID to timespan. 

2212 visits = {record.id: record.timespan for record in registry.queryDimensionRecords("visit")} 

2213 # Just those IDs, sorted (which is also temporal sorting, because HSC 

2214 # exposure IDs are monotonically increasing). 

2215 ids = sorted(visits.keys()) 

2216 self.assertGreater(len(ids), 20) 

2217 # Pick some quasi-random indexes into `ids` to play with. 

2218 i1 = int(len(ids) * 0.1) 

2219 i2 = int(len(ids) * 0.3) 

2220 i3 = int(len(ids) * 0.6) 

2221 i4 = int(len(ids) * 0.8) 

2222 # Extract some times from those: just before the beginning of i1 (which 

2223 # should be after the end of the exposure before), exactly the 

2224 # beginning of i2, just after the beginning of i3 (and before its end), 

2225 # and the exact end of i4. 

2226 t1 = visits[ids[i1]].begin - astropy.time.TimeDelta(1.0, format="sec") 

2227 self.assertGreater(t1, visits[ids[i1 - 1]].end) 

2228 t2 = visits[ids[i2]].begin 

2229 t3 = visits[ids[i3]].begin + astropy.time.TimeDelta(1.0, format="sec") 

2230 self.assertLess(t3, visits[ids[i3]].end) 

2231 t4 = visits[ids[i4]].end 

2232 # Make sure those are actually in order. 

2233 self.assertEqual([t1, t2, t3, t4], sorted([t4, t3, t2, t1])) 

2234 

2235 bind = { 

2236 "t1": t1, 

2237 "t2": t2, 

2238 "t3": t3, 

2239 "t4": t4, 

2240 "ts23": Timespan(t2, t3), 

2241 } 

2242 

2243 def query(where): 

2244 """Helper function that queries for visit data IDs and returns 

2245 results as a sorted, deduplicated list of visit IDs. 

2246 """ 

2247 return sorted( 

2248 { 

2249 dataId["visit"] 

2250 for dataId in registry.queryDataIds("visit", instrument="HSC", bind=bind, where=where) 

2251 } 

2252 ) 

2253 

2254 # Try a bunch of timespan queries, mixing up the bounds themselves, 

2255 # where they appear in the expression, and how we get the timespan into 

2256 # the expression. 

2257 

2258 # t1 is before the start of i1, so this should not include i1. 

2259 self.assertEqual(ids[:i1], query("visit.timespan OVERLAPS (null, t1)")) 

2260 # t2 is exactly at the start of i2, but ends are exclusive, so these 

2261 # should not include i2. 

2262 self.assertEqual(ids[i1:i2], query("(t1, t2) OVERLAPS visit.timespan")) 

2263 self.assertEqual(ids[:i2], query("visit.timespan < (t2, t4)")) 

2264 # t3 is in the middle of i3, so this should include i3. 

2265 self.assertEqual(ids[i2 : i3 + 1], query("visit.timespan OVERLAPS ts23")) 

2266 # This one should not include t3 by the same reasoning. 

2267 self.assertEqual(ids[i3 + 1 :], query("visit.timespan > (t1, t3)")) 

2268 # t4 is exactly at the end of i4, so this should include i4. 

2269 self.assertEqual(ids[i3 : i4 + 1], query(f"visit.timespan OVERLAPS (T'{t3.tai.isot}', t4)")) 

2270 # i4's upper bound of t4 is exclusive so this should not include t4. 

2271 self.assertEqual(ids[i4 + 1 :], query("visit.timespan OVERLAPS (t4, NULL)")) 

2272 

2273 # Now some timespan vs. time scalar queries. 

2274 self.assertEqual(ids[:i2], query("visit.timespan < t2")) 

2275 self.assertEqual(ids[:i2], query("t2 > visit.timespan")) 

2276 self.assertEqual(ids[i3 + 1 :], query("visit.timespan > t3")) 

2277 self.assertEqual(ids[i3 + 1 :], query("t3 < visit.timespan")) 

2278 self.assertEqual(ids[i3 : i3 + 1], query("visit.timespan OVERLAPS t3")) 

2279 self.assertEqual(ids[i3 : i3 + 1], query(f"T'{t3.tai.isot}' OVERLAPS visit.timespan")) 

2280 

2281 # Empty timespans should not overlap anything. 

2282 self.assertEqual([], query("visit.timespan OVERLAPS (t3, t2)")) 

2283 

2284 def testCollectionSummaries(self): 

2285 """Test recording and retrieval of collection summaries.""" 

2286 self.maxDiff = None 

2287 registry = self.makeRegistry() 

2288 # Importing datasets from yaml should go through the code path where 

2289 # we update collection summaries as we insert datasets. 

2290 self.loadData(registry, "base.yaml") 

2291 self.loadData(registry, "datasets.yaml") 

2292 flat = registry.getDatasetType("flat") 

2293 expected1 = CollectionSummary.makeEmpty(registry.dimensions) 

2294 expected1.datasetTypes.add(registry.getDatasetType("bias")) 

2295 expected1.datasetTypes.add(flat) 

2296 expected1.dimensions.update_extract( 

2297 DataCoordinate.standardize(instrument="Cam1", universe=registry.dimensions) 

2298 ) 

2299 self.assertEqual(registry.getCollectionSummary("imported_g"), expected1) 

2300 self.assertEqual(registry.getCollectionSummary("imported_r"), expected1) 

2301 # Create a chained collection with both of the imported runs; the 

2302 # summary should be the same, because it's a union with itself. 

2303 chain = "chain" 

2304 registry.registerCollection(chain, CollectionType.CHAINED) 

2305 registry.setCollectionChain(chain, ["imported_r", "imported_g"]) 

2306 self.assertEqual(registry.getCollectionSummary(chain), expected1) 

2307 # Associate flats only into a tagged collection and a calibration 

2308 # collection to check summaries of those. 

2309 tag = "tag" 

2310 registry.registerCollection(tag, CollectionType.TAGGED) 

2311 registry.associate(tag, registry.queryDatasets(flat, collections="imported_g")) 

2312 calibs = "calibs" 

2313 registry.registerCollection(calibs, CollectionType.CALIBRATION) 

2314 registry.certify( 

2315 calibs, registry.queryDatasets(flat, collections="imported_g"), timespan=Timespan(None, None) 

2316 ) 

2317 expected2 = expected1.copy() 

2318 expected2.datasetTypes.discard("bias") 

2319 self.assertEqual(registry.getCollectionSummary(tag), expected2) 

2320 self.assertEqual(registry.getCollectionSummary(calibs), expected2) 

2321 # Explicitly calling Registry.refresh() should load those same 

2322 # summaries, via a totally different code path. 

2323 registry.refresh() 

2324 self.assertEqual(registry.getCollectionSummary("imported_g"), expected1) 

2325 self.assertEqual(registry.getCollectionSummary("imported_r"), expected1) 

2326 self.assertEqual(registry.getCollectionSummary(tag), expected2) 

2327 self.assertEqual(registry.getCollectionSummary(calibs), expected2) 

2328 

2329 def testUnrelatedDimensionQueries(self): 

2330 """Test that WHERE expressions in queries can reference dimensions that 

2331 are not in the result set. 

2332 """ 

2333 registry = self.makeRegistry() 

2334 # There is no data to back this query, but it should still return 

2335 # zero records instead of raising. 

2336 self.assertFalse( 

2337 set( 

2338 registry.queryDataIds( 

2339 ["visit", "detector"], where="instrument='Cam1' AND skymap='not_here' AND tract=0" 

2340 ) 

2341 ), 

2342 ) 

2343 

2344 def testBindInQueryDatasets(self): 

2345 """Test that the bind parameter is correctly forwarded in 

2346 queryDatasets recursion. 

2347 """ 

2348 registry = self.makeRegistry() 

2349 # Importing datasets from yaml should go through the code path where 

2350 # we update collection summaries as we insert datasets. 

2351 self.loadData(registry, "base.yaml") 

2352 self.loadData(registry, "datasets.yaml") 

2353 self.assertEqual( 

2354 set(registry.queryDatasets("flat", band="r", collections=...)), 

2355 set(registry.queryDatasets("flat", where="band=my_band", bind={"my_band": "r"}, collections=...)), 

2356 ) 

2357 

2358 def testQueryResultSummaries(self): 

2359 """Test summary methods like `count`, `any`, and `explain_no_results` 

2360 on `DataCoordinateQueryResults` and `DatasetQueryResults` 

2361 """ 

2362 registry = self.makeRegistry() 

2363 self.loadData(registry, "base.yaml") 

2364 self.loadData(registry, "datasets.yaml") 

2365 self.loadData(registry, "spatial.yaml") 

2366 # Default test dataset has two collections, each with both flats and 

2367 # biases. Add a new collection with only biases. 

2368 registry.registerCollection("biases", CollectionType.TAGGED) 

2369 registry.associate("biases", registry.queryDatasets("bias", collections=["imported_g"])) 

2370 # First query yields two results, and involves no postprocessing. 

2371 query1 = registry.queryDataIds(["physical_filter"], band="r") 

2372 self.assertTrue(query1.any(execute=False, exact=False)) 

2373 self.assertTrue(query1.any(execute=True, exact=False)) 

2374 self.assertTrue(query1.any(execute=True, exact=True)) 

2375 self.assertEqual(query1.count(exact=False), 2) 

2376 self.assertEqual(query1.count(exact=True), 2) 

2377 self.assertFalse(list(query1.explain_no_results())) 

2378 # Second query should yield no results, but this isn't detectable 

2379 # unless we actually run a query. 

2380 query2 = registry.queryDataIds(["physical_filter"], band="h") 

2381 self.assertTrue(query2.any(execute=False, exact=False)) 

2382 self.assertFalse(query2.any(execute=True, exact=False)) 

2383 self.assertFalse(query2.any(execute=True, exact=True)) 

2384 self.assertEqual(query2.count(exact=False), 0) 

2385 self.assertEqual(query2.count(exact=True), 0) 

2386 self.assertFalse(list(query2.explain_no_results())) 

2387 # These queries yield no results due to various problems that can be 

2388 # spotted prior to execution, yielding helpful diagnostics. 

2389 for query, snippets in [ 

2390 ( 

2391 # Dataset type name doesn't match any existing dataset types. 

2392 registry.queryDatasets("nonexistent", collections=...), 

2393 ["nonexistent"], 

2394 ), 

2395 ( 

2396 # Dataset type name doesn't match any existing dataset types. 

2397 registry.queryDataIds(["detector"], datasets=["nonexistent"], collections=...), 

2398 ["nonexistent"], 

2399 ), 

2400 ( 

2401 # Dataset type object isn't registered. 

2402 registry.queryDatasets( 

2403 DatasetType( 

2404 "nonexistent", 

2405 dimensions=["instrument"], 

2406 universe=registry.dimensions, 

2407 storageClass="Image", 

2408 ), 

2409 collections=..., 

2410 ), 

2411 ["nonexistent"], 

2412 ), 

2413 ( 

2414 # No datasets of this type in this collection. 

2415 registry.queryDatasets("flat", collections=["biases"]), 

2416 ["flat", "biases"], 

2417 ), 

2418 ( 

2419 # No collections matching at all. 

2420 registry.queryDatasets("flat", collections=re.compile("potato.+")), 

2421 ["potato"], 

2422 ), 

2423 ]: 

2424 

2425 self.assertFalse(query.any(execute=False, exact=False)) 

2426 self.assertFalse(query.any(execute=True, exact=False)) 

2427 self.assertFalse(query.any(execute=True, exact=True)) 

2428 self.assertEqual(query.count(exact=False), 0) 

2429 self.assertEqual(query.count(exact=True), 0) 

2430 messages = list(query.explain_no_results()) 

2431 self.assertTrue(messages) 

2432 # Want all expected snippets to appear in at least one message. 

2433 self.assertTrue( 

2434 any( 

2435 all(snippet in message for snippet in snippets) for message in query.explain_no_results() 

2436 ), 

2437 messages, 

2438 ) 

2439 

2440 # These queries yield no results due to problems that can be identified 

2441 # by cheap follow-up queries, yielding helpful diagnostics. 

2442 for query, snippets in [ 

2443 ( 

2444 # No records for one of the involved dimensions. 

2445 registry.queryDataIds(["subfilter"]), 

2446 ["dimension records", "subfilter"], 

2447 ), 

2448 ]: 

2449 self.assertFalse(query.any(execute=True, exact=False)) 

2450 self.assertFalse(query.any(execute=True, exact=True)) 

2451 self.assertEqual(query.count(exact=True), 0) 

2452 messages = list(query.explain_no_results()) 

2453 self.assertTrue(messages) 

2454 # Want all expected snippets to appear in at least one message. 

2455 self.assertTrue( 

2456 any( 

2457 all(snippet in message for snippet in snippets) for message in query.explain_no_results() 

2458 ), 

2459 messages, 

2460 ) 

2461 

2462 # This query yields four overlaps in the database, but one is filtered 

2463 # out in postprocessing. The count queries aren't accurate because 

2464 # they don't account for duplication that happens due to an internal 

2465 # join against commonSkyPix. 

2466 query3 = registry.queryDataIds(["visit", "tract"], instrument="Cam1", skymap="SkyMap1") 

2467 self.assertEqual( 

2468 { 

2469 DataCoordinate.standardize( 

2470 instrument="Cam1", 

2471 skymap="SkyMap1", 

2472 visit=v, 

2473 tract=t, 

2474 universe=registry.dimensions, 

2475 ) 

2476 for v, t in [(1, 0), (2, 0), (2, 1)] 

2477 }, 

2478 set(query3), 

2479 ) 

2480 self.assertTrue(query3.any(execute=False, exact=False)) 

2481 self.assertTrue(query3.any(execute=True, exact=False)) 

2482 self.assertTrue(query3.any(execute=True, exact=True)) 

2483 self.assertGreaterEqual(query3.count(exact=False), 4) 

2484 self.assertGreaterEqual(query3.count(exact=True), 3) 

2485 self.assertFalse(list(query3.explain_no_results())) 

2486 # This query yields overlaps in the database, but all are filtered 

2487 # out in postprocessing. The count queries again aren't very useful. 

2488 # We have to use `where=` here to avoid an optimization that 

2489 # (currently) skips the spatial postprocess-filtering because it 

2490 # recognizes that no spatial join is necessary. That's not ideal, but 

2491 # fixing it is out of scope for this ticket. 

2492 query4 = registry.queryDataIds( 

2493 ["visit", "tract"], 

2494 instrument="Cam1", 

2495 skymap="SkyMap1", 

2496 where="visit=1 AND detector=1 AND tract=0 AND patch=4", 

2497 ) 

2498 self.assertFalse(set(query4)) 

2499 self.assertTrue(query4.any(execute=False, exact=False)) 

2500 self.assertTrue(query4.any(execute=True, exact=False)) 

2501 self.assertFalse(query4.any(execute=True, exact=True)) 

2502 self.assertGreaterEqual(query4.count(exact=False), 1) 

2503 self.assertEqual(query4.count(exact=True), 0) 

2504 messages = list(query4.explain_no_results()) 

2505 self.assertTrue(messages) 

2506 self.assertTrue(any("regions did not overlap" in message for message in messages)) 

2507 

2508 def testQueryDataIdsOrderBy(self): 

2509 """Test order_by and limit on result returned by queryDataIds().""" 

2510 registry = self.makeRegistry() 

2511 self.loadData(registry, "base.yaml") 

2512 self.loadData(registry, "datasets.yaml") 

2513 self.loadData(registry, "spatial.yaml") 

2514 

2515 def do_query(dimensions=("visit", "tract"), datasets=None, collections=None): 

2516 return registry.queryDataIds( 

2517 dimensions, datasets=datasets, collections=collections, instrument="Cam1", skymap="SkyMap1" 

2518 ) 

2519 

2520 Test = namedtuple( 

2521 "testQueryDataIdsOrderByTest", 

2522 ("order_by", "keys", "result", "limit", "datasets", "collections"), 

2523 defaults=(None, None, None), 

2524 ) 

2525 

2526 test_data = ( 

2527 Test("tract,visit", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))), 

2528 Test("-tract,visit", "tract,visit", ((1, 2), (1, 2), (0, 1), (0, 1), (0, 2), (0, 2))), 

2529 Test("tract,-visit", "tract,visit", ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2))), 

2530 Test("-tract,-visit", "tract,visit", ((1, 2), (1, 2), (0, 2), (0, 2), (0, 1), (0, 1))), 

2531 Test( 

2532 "tract.id,visit.id", 

2533 "tract,visit", 

2534 ((0, 1), (0, 1), (0, 2)), 

2535 limit=(3,), 

2536 ), 

2537 Test("-tract,-visit", "tract,visit", ((1, 2), (1, 2), (0, 2)), limit=(3,)), 

2538 Test("tract,visit", "tract,visit", ((0, 2), (1, 2), (1, 2)), limit=(3, 3)), 

2539 Test("-tract,-visit", "tract,visit", ((0, 1),), limit=(3, 5)), 

2540 Test( 

2541 "tract,visit.exposure_time", "tract,visit", ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2)) 

2542 ), 

2543 Test( 

2544 "-tract,-visit.exposure_time", "tract,visit", ((1, 2), (1, 2), (0, 1), (0, 1), (0, 2), (0, 2)) 

2545 ), 

2546 Test("tract,-exposure_time", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))), 

2547 Test("tract,visit.name", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))), 

2548 Test( 

2549 "tract,-timespan.begin,timespan.end", 

2550 "tract,visit", 

2551 ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2)), 

2552 ), 

2553 Test("visit.day_obs,exposure.day_obs", "visit,exposure", ()), 

2554 Test("visit.timespan.begin,-exposure.timespan.begin", "visit,exposure", ()), 

2555 Test( 

2556 "tract,detector", 

2557 "tract,detector", 

2558 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)), 

2559 datasets="flat", 

2560 collections="imported_r", 

2561 ), 

2562 Test( 

2563 "tract,detector.full_name", 

2564 "tract,detector", 

2565 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)), 

2566 datasets="flat", 

2567 collections="imported_r", 

2568 ), 

2569 Test( 

2570 "tract,detector.raft,detector.name_in_raft", 

2571 "tract,detector", 

2572 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)), 

2573 datasets="flat", 

2574 collections="imported_r", 

2575 ), 

2576 ) 

2577 

2578 for test in test_data: 

2579 order_by = test.order_by.split(",") 

2580 keys = test.keys.split(",") 

2581 query = do_query(keys, test.datasets, test.collections).order_by(*order_by) 

2582 if test.limit is not None: 

2583 query = query.limit(*test.limit) 

2584 dataIds = tuple(tuple(dataId[k] for k in keys) for dataId in query) 

2585 self.assertEqual(dataIds, test.result) 

2586 

2587 # and materialize 

2588 query = do_query(keys).order_by(*order_by) 

2589 if test.limit is not None: 

2590 query = query.limit(*test.limit) 

2591 with query.materialize() as materialized: 

2592 dataIds = tuple(tuple(dataId[k] for k in keys) for dataId in materialized) 

2593 self.assertEqual(dataIds, test.result) 

2594 

2595 # errors in a name 

2596 for order_by in ("", "-"): 

2597 with self.assertRaisesRegex(ValueError, "Empty dimension name in ORDER BY"): 

2598 list(do_query().order_by(order_by)) 

2599 

2600 for order_by in ("undimension.name", "-undimension.name"): 

2601 with self.assertRaisesRegex(ValueError, "Unknown dimension element name 'undimension'"): 

2602 list(do_query().order_by(order_by)) 

2603 

2604 for order_by in ("attract", "-attract"): 

2605 with self.assertRaisesRegex(ValueError, "Metadata 'attract' cannot be found in any dimension"): 

2606 list(do_query().order_by(order_by)) 

2607 

2608 with self.assertRaisesRegex(ValueError, "Metadata 'exposure_time' exists in more than one dimension"): 

2609 list(do_query(("exposure", "visit")).order_by("exposure_time")) 

2610 

2611 with self.assertRaisesRegex(ValueError, "Timespan exists in more than one dimesion"): 

2612 list(do_query(("exposure", "visit")).order_by("timespan.begin")) 

2613 

2614 with self.assertRaisesRegex( 

2615 ValueError, "Cannot find any temporal dimension element for 'timespan.begin'" 

2616 ): 

2617 list(do_query(("tract")).order_by("timespan.begin")) 

2618 

2619 with self.assertRaisesRegex(ValueError, "Cannot use 'timespan.begin' with non-temporal element"): 

2620 list(do_query(("tract")).order_by("tract.timespan.begin")) 

2621 

2622 with self.assertRaisesRegex(ValueError, "Field 'name' does not exist in 'tract'."): 

2623 list(do_query(("tract")).order_by("tract.name")) 

2624 

2625 def testQueryDimensionRecordsOrderBy(self): 

2626 """Test order_by and limit on result returned by 

2627 queryDimensionRecords(). 

2628 """ 

2629 registry = self.makeRegistry() 

2630 self.loadData(registry, "base.yaml") 

2631 self.loadData(registry, "datasets.yaml") 

2632 self.loadData(registry, "spatial.yaml") 

2633 

2634 def do_query(element, datasets=None, collections=None): 

2635 return registry.queryDimensionRecords( 

2636 element, instrument="Cam1", datasets=datasets, collections=collections 

2637 ) 

2638 

2639 query = do_query("detector") 

2640 self.assertEqual(len(list(query)), 4) 

2641 

2642 Test = namedtuple( 

2643 "testQueryDataIdsOrderByTest", 

2644 ("element", "order_by", "result", "limit", "datasets", "collections"), 

2645 defaults=(None, None, None), 

2646 ) 

2647 

2648 test_data = ( 

2649 Test("detector", "detector", (1, 2, 3, 4)), 

2650 Test("detector", "-detector", (4, 3, 2, 1)), 

2651 Test("detector", "raft,-name_in_raft", (2, 1, 4, 3)), 

2652 Test("detector", "-detector.purpose", (4,), limit=(1,)), 

2653 Test("detector", "-purpose,detector.raft,name_in_raft", (2, 3), limit=(2, 2)), 

2654 Test("visit", "visit", (1, 2)), 

2655 Test("visit", "-visit.id", (2, 1)), 

2656 Test("visit", "zenith_angle", (1, 2)), 

2657 Test("visit", "-visit.name", (2, 1)), 

2658 Test("visit", "day_obs,-timespan.begin", (2, 1)), 

2659 ) 

2660 

2661 for test in test_data: 

2662 order_by = test.order_by.split(",") 

2663 query = do_query(test.element).order_by(*order_by) 

2664 if test.limit is not None: 

2665 query = query.limit(*test.limit) 

2666 dataIds = tuple(rec.id for rec in query) 

2667 self.assertEqual(dataIds, test.result) 

2668 

2669 # errors in a name 

2670 for order_by in ("", "-"): 

2671 with self.assertRaisesRegex(ValueError, "Empty dimension name in ORDER BY"): 

2672 list(do_query("detector").order_by(order_by)) 

2673 

2674 for order_by in ("undimension.name", "-undimension.name"): 

2675 with self.assertRaisesRegex(ValueError, "Element name mismatch: 'undimension'"): 

2676 list(do_query("detector").order_by(order_by)) 

2677 

2678 for order_by in ("attract", "-attract"): 

2679 with self.assertRaisesRegex(ValueError, "Field 'attract' does not exist in 'detector'."): 

2680 list(do_query("detector").order_by(order_by)) 

2681 

2682 def testDatasetConstrainedDimensionRecordQueries(self): 

2683 """Test that queryDimensionRecords works even when given a dataset 

2684 constraint whose dimensions extend beyond the requested dimension 

2685 element's. 

2686 """ 

2687 registry = self.makeRegistry() 

2688 self.loadData(registry, "base.yaml") 

2689 self.loadData(registry, "datasets.yaml") 

2690 # Query for physical_filter dimension records, using a dataset that 

2691 # has both physical_filter and dataset dimensions. 

2692 records = registry.queryDimensionRecords( 

2693 "physical_filter", 

2694 datasets=["flat"], 

2695 collections="imported_r", 

2696 ) 

2697 self.assertEqual({record.name for record in records}, {"Cam1-R1", "Cam1-R2"})