Coverage for python/lsst/daf/butler/registry/tests/_registry.py: 5%

Shortcuts on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1216 statements  

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = ["RegistryTests"] 

24 

25import itertools 

26import logging 

27import os 

28import re 

29import unittest 

30import uuid 

31from abc import ABC, abstractmethod 

32from collections import defaultdict, namedtuple 

33from datetime import datetime, timedelta 

34from typing import TYPE_CHECKING, Iterator, Optional, Type, Union 

35 

36import astropy.time 

37import sqlalchemy 

38 

39try: 

40 import numpy as np 

41except ImportError: 

42 np = None 

43 

44import lsst.sphgeom 

45 

46from ...core import ( 

47 DataCoordinate, 

48 DataCoordinateSet, 

49 DatasetAssociation, 

50 DatasetRef, 

51 DatasetType, 

52 DimensionGraph, 

53 NamedValueSet, 

54 StorageClass, 

55 Timespan, 

56 ddl, 

57) 

58from .._collectionType import CollectionType 

59from .._config import RegistryConfig 

60from .._exceptions import ( 

61 ConflictingDefinitionError, 

62 InconsistentDataIdError, 

63 MissingCollectionError, 

64 OrphanedRecordError, 

65) 

66from ..interfaces import ButlerAttributeExistsError, DatasetIdGenEnum 

67from ..summaries import CollectionSummary 

68 

69if TYPE_CHECKING: 69 ↛ 70line 69 didn't jump to line 70, because the condition on line 69 was never true

70 from .._registry import Registry 

71 

72 

73class RegistryTests(ABC): 

74 """Generic tests for the `Registry` class that can be subclassed to 

75 generate tests for different configurations. 

76 """ 

77 

78 collectionsManager: Optional[str] = None 

79 """Name of the collections manager class, if subclass provides value for 

80 this member then it overrides name specified in default configuration 

81 (`str`). 

82 """ 

83 

84 datasetsManager: Optional[str] = None 

85 """Name of the datasets manager class, if subclass provides value for 

86 this member then it overrides name specified in default configuration 

87 (`str`). 

88 """ 

89 

90 @classmethod 

91 @abstractmethod 

92 def getDataDir(cls) -> str: 

93 """Return the root directory containing test data YAML files.""" 

94 raise NotImplementedError() 

95 

96 def makeRegistryConfig(self) -> RegistryConfig: 

97 """Create RegistryConfig used to create a registry. 

98 

99 This method should be called by a subclass from `makeRegistry`. 

100 Returned instance will be pre-configured based on the values of class 

101 members, and default-configured for all other parameters. Subclasses 

102 that need default configuration should just instantiate 

103 `RegistryConfig` directly. 

104 """ 

105 config = RegistryConfig() 

106 if self.collectionsManager: 

107 config["managers", "collections"] = self.collectionsManager 

108 if self.datasetsManager: 

109 config["managers", "datasets"] = self.datasetsManager 

110 return config 

111 

112 @abstractmethod 

113 def makeRegistry(self) -> Registry: 

114 """Return the Registry instance to be tested.""" 

115 raise NotImplementedError() 

116 

117 def loadData(self, registry: Registry, filename: str): 

118 """Load registry test data from ``getDataDir/<filename>``, 

119 which should be a YAML import/export file. 

120 """ 

121 from ...transfers import YamlRepoImportBackend 

122 

123 with open(os.path.join(self.getDataDir(), filename), "r") as stream: 

124 backend = YamlRepoImportBackend(stream, registry) 

125 backend.register() 

126 backend.load(datastore=None) 

127 

128 def checkQueryResults(self, results, expected): 

129 """Check that a query results object contains expected values. 

130 

131 Parameters 

132 ---------- 

133 results : `DataCoordinateQueryResults` or `DatasetQueryResults` 

134 A lazy-evaluation query results object. 

135 expected : `list` 

136 A list of `DataCoordinate` o `DatasetRef` objects that should be 

137 equal to results of the query, aside from ordering. 

138 """ 

139 self.assertCountEqual(list(results), expected) 

140 self.assertEqual(results.count(), len(expected)) 

141 if expected: 

142 self.assertTrue(results.any()) 

143 else: 

144 self.assertFalse(results.any()) 

145 

146 def testOpaque(self): 

147 """Tests for `Registry.registerOpaqueTable`, 

148 `Registry.insertOpaqueData`, `Registry.fetchOpaqueData`, and 

149 `Registry.deleteOpaqueData`. 

150 """ 

151 registry = self.makeRegistry() 

152 table = "opaque_table_for_testing" 

153 registry.registerOpaqueTable( 

154 table, 

155 spec=ddl.TableSpec( 

156 fields=[ 

157 ddl.FieldSpec("id", dtype=sqlalchemy.BigInteger, primaryKey=True), 

158 ddl.FieldSpec("name", dtype=sqlalchemy.String, length=16, nullable=False), 

159 ddl.FieldSpec("count", dtype=sqlalchemy.SmallInteger, nullable=True), 

160 ], 

161 ), 

162 ) 

163 rows = [ 

164 {"id": 1, "name": "one", "count": None}, 

165 {"id": 2, "name": "two", "count": 5}, 

166 {"id": 3, "name": "three", "count": 6}, 

167 ] 

168 registry.insertOpaqueData(table, *rows) 

169 self.assertCountEqual(rows, list(registry.fetchOpaqueData(table))) 

170 self.assertEqual(rows[0:1], list(registry.fetchOpaqueData(table, id=1))) 

171 self.assertEqual(rows[1:2], list(registry.fetchOpaqueData(table, name="two"))) 

172 self.assertEqual(rows[0:1], list(registry.fetchOpaqueData(table, id=(1, 3), name=("one", "two")))) 

173 self.assertEqual(rows, list(registry.fetchOpaqueData(table, id=(1, 2, 3)))) 

174 # Test very long IN clause which exceeds sqlite limit on number of 

175 # parameters. SQLite says the limit is 32k but it looks like it is 

176 # much higher. 

177 self.assertEqual(rows, list(registry.fetchOpaqueData(table, id=list(range(300_000))))) 

178 # Two IN clauses, each longer than 1k batch size, first with 

179 # duplicates, second has matching elements in different batches (after 

180 # sorting). 

181 self.assertEqual( 

182 rows[0:2], 

183 list( 

184 registry.fetchOpaqueData( 

185 table, 

186 id=list(range(1000)) + list(range(100, 0, -1)), 

187 name=["one"] + [f"q{i}" for i in range(2200)] + ["two"], 

188 ) 

189 ), 

190 ) 

191 self.assertEqual([], list(registry.fetchOpaqueData(table, id=1, name="two"))) 

192 registry.deleteOpaqueData(table, id=3) 

193 self.assertCountEqual(rows[:2], list(registry.fetchOpaqueData(table))) 

194 registry.deleteOpaqueData(table) 

195 self.assertEqual([], list(registry.fetchOpaqueData(table))) 

196 

197 def testDatasetType(self): 

198 """Tests for `Registry.registerDatasetType` and 

199 `Registry.getDatasetType`. 

200 """ 

201 registry = self.makeRegistry() 

202 # Check valid insert 

203 datasetTypeName = "test" 

204 storageClass = StorageClass("testDatasetType") 

205 registry.storageClasses.registerStorageClass(storageClass) 

206 dimensions = registry.dimensions.extract(("instrument", "visit")) 

207 differentDimensions = registry.dimensions.extract(("instrument", "patch")) 

208 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

209 # Inserting for the first time should return True 

210 self.assertTrue(registry.registerDatasetType(inDatasetType)) 

211 outDatasetType1 = registry.getDatasetType(datasetTypeName) 

212 self.assertEqual(outDatasetType1, inDatasetType) 

213 

214 # Re-inserting should work 

215 self.assertFalse(registry.registerDatasetType(inDatasetType)) 

216 # Except when they are not identical 

217 with self.assertRaises(ConflictingDefinitionError): 

218 nonIdenticalDatasetType = DatasetType(datasetTypeName, differentDimensions, storageClass) 

219 registry.registerDatasetType(nonIdenticalDatasetType) 

220 

221 # Template can be None 

222 datasetTypeName = "testNoneTemplate" 

223 storageClass = StorageClass("testDatasetType2") 

224 registry.storageClasses.registerStorageClass(storageClass) 

225 dimensions = registry.dimensions.extract(("instrument", "visit")) 

226 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

227 registry.registerDatasetType(inDatasetType) 

228 outDatasetType2 = registry.getDatasetType(datasetTypeName) 

229 self.assertEqual(outDatasetType2, inDatasetType) 

230 

231 allTypes = set(registry.queryDatasetTypes()) 

232 self.assertEqual(allTypes, {outDatasetType1, outDatasetType2}) 

233 

234 def testDimensions(self): 

235 """Tests for `Registry.insertDimensionData`, 

236 `Registry.syncDimensionData`, and `Registry.expandDataId`. 

237 """ 

238 registry = self.makeRegistry() 

239 dimensionName = "instrument" 

240 dimension = registry.dimensions[dimensionName] 

241 dimensionValue = { 

242 "name": "DummyCam", 

243 "visit_max": 10, 

244 "exposure_max": 10, 

245 "detector_max": 2, 

246 "class_name": "lsst.obs.base.Instrument", 

247 } 

248 registry.insertDimensionData(dimensionName, dimensionValue) 

249 # Inserting the same value twice should fail 

250 with self.assertRaises(sqlalchemy.exc.IntegrityError): 

251 registry.insertDimensionData(dimensionName, dimensionValue) 

252 # expandDataId should retrieve the record we just inserted 

253 self.assertEqual( 

254 registry.expandDataId(instrument="DummyCam", graph=dimension.graph) 

255 .records[dimensionName] 

256 .toDict(), 

257 dimensionValue, 

258 ) 

259 # expandDataId should raise if there is no record with the given ID. 

260 with self.assertRaises(LookupError): 

261 registry.expandDataId({"instrument": "Unknown"}, graph=dimension.graph) 

262 # band doesn't have a table; insert should fail. 

263 with self.assertRaises(TypeError): 

264 registry.insertDimensionData("band", {"band": "i"}) 

265 dimensionName2 = "physical_filter" 

266 dimension2 = registry.dimensions[dimensionName2] 

267 dimensionValue2 = {"name": "DummyCam_i", "band": "i"} 

268 # Missing required dependency ("instrument") should fail 

269 with self.assertRaises(KeyError): 

270 registry.insertDimensionData(dimensionName2, dimensionValue2) 

271 # Adding required dependency should fix the failure 

272 dimensionValue2["instrument"] = "DummyCam" 

273 registry.insertDimensionData(dimensionName2, dimensionValue2) 

274 # expandDataId should retrieve the record we just inserted. 

275 self.assertEqual( 

276 registry.expandDataId(instrument="DummyCam", physical_filter="DummyCam_i", graph=dimension2.graph) 

277 .records[dimensionName2] 

278 .toDict(), 

279 dimensionValue2, 

280 ) 

281 # Use syncDimensionData to insert a new record successfully. 

282 dimensionName3 = "detector" 

283 dimensionValue3 = { 

284 "instrument": "DummyCam", 

285 "id": 1, 

286 "full_name": "one", 

287 "name_in_raft": "zero", 

288 "purpose": "SCIENCE", 

289 } 

290 self.assertTrue(registry.syncDimensionData(dimensionName3, dimensionValue3)) 

291 # Sync that again. Note that one field ("raft") is NULL, and that 

292 # should be okay. 

293 self.assertFalse(registry.syncDimensionData(dimensionName3, dimensionValue3)) 

294 # Now try that sync with the same primary key but a different value. 

295 # This should fail. 

296 with self.assertRaises(ConflictingDefinitionError): 

297 registry.syncDimensionData( 

298 dimensionName3, 

299 { 

300 "instrument": "DummyCam", 

301 "id": 1, 

302 "full_name": "one", 

303 "name_in_raft": "four", 

304 "purpose": "SCIENCE", 

305 }, 

306 ) 

307 

308 @unittest.skipIf(np is None, "numpy not available.") 

309 def testNumpyDataId(self): 

310 """Test that we can use a numpy int in a dataId.""" 

311 registry = self.makeRegistry() 

312 dimensionEntries = [ 

313 ("instrument", {"instrument": "DummyCam"}), 

314 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}), 

315 # Using an np.int64 here fails unless Records.fromDict is also 

316 # patched to look for numbers.Integral 

317 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}), 

318 ] 

319 for args in dimensionEntries: 

320 registry.insertDimensionData(*args) 

321 

322 # Try a normal integer and something that looks like an int but 

323 # is not. 

324 for visit_id in (42, np.int64(42)): 

325 with self.subTest(visit_id=visit_id, id_type=type(visit_id).__name__): 

326 expanded = registry.expandDataId({"instrument": "DummyCam", "visit": visit_id}) 

327 self.assertEqual(expanded["visit"], int(visit_id)) 

328 self.assertIsInstance(expanded["visit"], int) 

329 

330 def testDataIdRelationships(self): 

331 """Test that `Registry.expandDataId` raises an exception when the given 

332 keys are inconsistent. 

333 """ 

334 registry = self.makeRegistry() 

335 self.loadData(registry, "base.yaml") 

336 # Insert a few more dimension records for the next test. 

337 registry.insertDimensionData( 

338 "exposure", 

339 {"instrument": "Cam1", "id": 1, "obs_id": "one", "physical_filter": "Cam1-G"}, 

340 ) 

341 registry.insertDimensionData( 

342 "exposure", 

343 {"instrument": "Cam1", "id": 2, "obs_id": "two", "physical_filter": "Cam1-G"}, 

344 ) 

345 registry.insertDimensionData( 

346 "visit_system", 

347 {"instrument": "Cam1", "id": 0, "name": "one-to-one"}, 

348 ) 

349 registry.insertDimensionData( 

350 "visit", 

351 {"instrument": "Cam1", "id": 1, "name": "one", "physical_filter": "Cam1-G", "visit_system": 0}, 

352 ) 

353 registry.insertDimensionData( 

354 "visit_definition", 

355 {"instrument": "Cam1", "visit": 1, "exposure": 1, "visit_system": 0}, 

356 ) 

357 with self.assertRaises(InconsistentDataIdError): 

358 registry.expandDataId( 

359 {"instrument": "Cam1", "visit": 1, "exposure": 2}, 

360 ) 

361 

362 def testDataset(self): 

363 """Basic tests for `Registry.insertDatasets`, `Registry.getDataset`, 

364 and `Registry.removeDatasets`. 

365 """ 

366 registry = self.makeRegistry() 

367 self.loadData(registry, "base.yaml") 

368 run = "test" 

369 registry.registerRun(run) 

370 datasetType = registry.getDatasetType("bias") 

371 dataId = {"instrument": "Cam1", "detector": 2} 

372 (ref,) = registry.insertDatasets(datasetType, dataIds=[dataId], run=run) 

373 outRef = registry.getDataset(ref.id) 

374 self.assertIsNotNone(ref.id) 

375 self.assertEqual(ref, outRef) 

376 with self.assertRaises(ConflictingDefinitionError): 

377 registry.insertDatasets(datasetType, dataIds=[dataId], run=run) 

378 registry.removeDatasets([ref]) 

379 self.assertIsNone(registry.findDataset(datasetType, dataId, collections=[run])) 

380 

381 def testFindDataset(self): 

382 """Tests for `Registry.findDataset`.""" 

383 registry = self.makeRegistry() 

384 self.loadData(registry, "base.yaml") 

385 run = "test" 

386 datasetType = registry.getDatasetType("bias") 

387 dataId = {"instrument": "Cam1", "detector": 4} 

388 registry.registerRun(run) 

389 (inputRef,) = registry.insertDatasets(datasetType, dataIds=[dataId], run=run) 

390 outputRef = registry.findDataset(datasetType, dataId, collections=[run]) 

391 self.assertEqual(outputRef, inputRef) 

392 # Check that retrieval with invalid dataId raises 

393 with self.assertRaises(LookupError): 

394 dataId = {"instrument": "Cam1"} # no detector 

395 registry.findDataset(datasetType, dataId, collections=run) 

396 # Check that different dataIds match to different datasets 

397 dataId1 = {"instrument": "Cam1", "detector": 1} 

398 (inputRef1,) = registry.insertDatasets(datasetType, dataIds=[dataId1], run=run) 

399 dataId2 = {"instrument": "Cam1", "detector": 2} 

400 (inputRef2,) = registry.insertDatasets(datasetType, dataIds=[dataId2], run=run) 

401 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef1) 

402 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef2) 

403 self.assertNotEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef2) 

404 self.assertNotEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef1) 

405 # Check that requesting a non-existing dataId returns None 

406 nonExistingDataId = {"instrument": "Cam1", "detector": 3} 

407 self.assertIsNone(registry.findDataset(datasetType, nonExistingDataId, collections=run)) 

408 

409 def testRemoveDatasetTypeSuccess(self): 

410 """Test that Registry.removeDatasetType works when there are no 

411 datasets of that type present. 

412 """ 

413 registry = self.makeRegistry() 

414 self.loadData(registry, "base.yaml") 

415 registry.removeDatasetType("flat") 

416 with self.assertRaises(KeyError): 

417 registry.getDatasetType("flat") 

418 

419 def testRemoveDatasetTypeFailure(self): 

420 """Test that Registry.removeDatasetType raises when there are datasets 

421 of that type present or if the dataset type is for a component. 

422 """ 

423 registry = self.makeRegistry() 

424 self.loadData(registry, "base.yaml") 

425 self.loadData(registry, "datasets.yaml") 

426 with self.assertRaises(OrphanedRecordError): 

427 registry.removeDatasetType("flat") 

428 with self.assertRaises(ValueError): 

429 registry.removeDatasetType(DatasetType.nameWithComponent("flat", "image")) 

430 

431 def testImportDatasetsUUID(self): 

432 """Test for `Registry._importDatasets` with UUID dataset ID.""" 

433 if not self.datasetsManager.endswith(".ByDimensionsDatasetRecordStorageManagerUUID"): 

434 self.skipTest(f"Unexpected dataset manager {self.datasetsManager}") 

435 

436 registry = self.makeRegistry() 

437 self.loadData(registry, "base.yaml") 

438 for run in range(6): 

439 registry.registerRun(f"run{run}") 

440 datasetTypeBias = registry.getDatasetType("bias") 

441 datasetTypeFlat = registry.getDatasetType("flat") 

442 dataIdBias1 = {"instrument": "Cam1", "detector": 1} 

443 dataIdBias2 = {"instrument": "Cam1", "detector": 2} 

444 dataIdFlat1 = {"instrument": "Cam1", "detector": 1, "physical_filter": "Cam1-G", "band": "g"} 

445 

446 dataset_id = uuid.uuid4() 

447 ref = DatasetRef(datasetTypeBias, dataIdBias1, id=dataset_id, run="run0") 

448 (ref1,) = registry._importDatasets([ref]) 

449 # UUID is used without change 

450 self.assertEqual(ref.id, ref1.id) 

451 

452 # All different failure modes 

453 refs = ( 

454 # Importing same DatasetRef with different dataset ID is an error 

455 DatasetRef(datasetTypeBias, dataIdBias1, id=uuid.uuid4(), run="run0"), 

456 # Same DatasetId but different DataId 

457 DatasetRef(datasetTypeBias, dataIdBias2, id=ref1.id, run="run0"), 

458 DatasetRef(datasetTypeFlat, dataIdFlat1, id=ref1.id, run="run0"), 

459 # Same DatasetRef and DatasetId but different run 

460 DatasetRef(datasetTypeBias, dataIdBias1, id=ref1.id, run="run1"), 

461 ) 

462 for ref in refs: 

463 with self.assertRaises(ConflictingDefinitionError): 

464 registry._importDatasets([ref]) 

465 

466 # Test for non-unique IDs, they can be re-imported multiple times. 

467 for run, idGenMode in ((2, DatasetIdGenEnum.DATAID_TYPE), (4, DatasetIdGenEnum.DATAID_TYPE_RUN)): 

468 with self.subTest(idGenMode=idGenMode): 

469 

470 # Use integer dataset ID to force UUID calculation in _import 

471 ref = DatasetRef(datasetTypeBias, dataIdBias1, id=0, run=f"run{run}") 

472 (ref1,) = registry._importDatasets([ref], idGenerationMode=idGenMode) 

473 self.assertIsInstance(ref1.id, uuid.UUID) 

474 self.assertEqual(ref1.id.version, 5) 

475 

476 # Importing it again is OK 

477 (ref2,) = registry._importDatasets([ref1]) 

478 self.assertEqual(ref2.id, ref1.id) 

479 

480 # Cannot import to different run with the same ID 

481 ref = DatasetRef(datasetTypeBias, dataIdBias1, id=ref1.id, run=f"run{run+1}") 

482 with self.assertRaises(ConflictingDefinitionError): 

483 registry._importDatasets([ref]) 

484 

485 ref = DatasetRef(datasetTypeBias, dataIdBias1, id=0, run=f"run{run+1}") 

486 if idGenMode is DatasetIdGenEnum.DATAID_TYPE: 

487 # Cannot import same DATAID_TYPE ref into a new run 

488 with self.assertRaises(ConflictingDefinitionError): 

489 (ref2,) = registry._importDatasets([ref], idGenerationMode=idGenMode) 

490 else: 

491 # DATAID_TYPE_RUN ref can be imported into a new run 

492 (ref2,) = registry._importDatasets([ref], idGenerationMode=idGenMode) 

493 

494 def testImportDatasetsInt(self): 

495 """Test for `Registry._importDatasets` with integer dataset ID.""" 

496 if not self.datasetsManager.endswith(".ByDimensionsDatasetRecordStorageManager"): 

497 self.skipTest(f"Unexpected dataset manager {self.datasetsManager}") 

498 

499 registry = self.makeRegistry() 

500 self.loadData(registry, "base.yaml") 

501 run = "test" 

502 registry.registerRun(run) 

503 datasetTypeBias = registry.getDatasetType("bias") 

504 datasetTypeFlat = registry.getDatasetType("flat") 

505 dataIdBias1 = {"instrument": "Cam1", "detector": 1} 

506 dataIdBias2 = {"instrument": "Cam1", "detector": 2} 

507 dataIdFlat1 = {"instrument": "Cam1", "detector": 1, "physical_filter": "Cam1-G", "band": "g"} 

508 dataset_id = 999999999 

509 

510 ref = DatasetRef(datasetTypeBias, dataIdBias1, id=dataset_id, run=run) 

511 (ref1,) = registry._importDatasets([ref]) 

512 # Should make new integer ID. 

513 self.assertNotEqual(ref1.id, ref.id) 

514 

515 # Ingesting same dataId with different dataset ID is an error 

516 ref2 = ref1.unresolved().resolved(dataset_id, run=run) 

517 with self.assertRaises(ConflictingDefinitionError): 

518 registry._importDatasets([ref2]) 

519 

520 # Ingesting different dataId with the same dataset ID should work 

521 ref3 = DatasetRef(datasetTypeBias, dataIdBias2, id=ref1.id, run=run) 

522 (ref4,) = registry._importDatasets([ref3]) 

523 self.assertNotEqual(ref4.id, ref1.id) 

524 

525 ref3 = DatasetRef(datasetTypeFlat, dataIdFlat1, id=ref1.id, run=run) 

526 (ref4,) = registry._importDatasets([ref3]) 

527 self.assertNotEqual(ref4.id, ref1.id) 

528 

529 def testDatasetTypeComponentQueries(self): 

530 """Test component options when querying for dataset types.""" 

531 registry = self.makeRegistry() 

532 self.loadData(registry, "base.yaml") 

533 self.loadData(registry, "datasets.yaml") 

534 # Test querying for dataset types with different inputs. 

535 # First query for all dataset types; components should only be included 

536 # when components=True. 

537 self.assertEqual({"bias", "flat"}, NamedValueSet(registry.queryDatasetTypes()).names) 

538 self.assertEqual({"bias", "flat"}, NamedValueSet(registry.queryDatasetTypes(components=False)).names) 

539 self.assertLess( 

540 {"bias", "flat", "bias.wcs", "flat.photoCalib"}, 

541 NamedValueSet(registry.queryDatasetTypes(components=True)).names, 

542 ) 

543 # Use a pattern that can match either parent or components. Again, 

544 # components are only returned if components=True. 

545 self.assertEqual({"bias"}, NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"))).names) 

546 self.assertEqual( 

547 {"bias"}, NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"), components=False)).names 

548 ) 

549 self.assertLess( 

550 {"bias", "bias.wcs"}, 

551 NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"), components=True)).names, 

552 ) 

553 # This pattern matches only a component. In this case we also return 

554 # that component dataset type if components=None. 

555 self.assertEqual( 

556 {"bias.wcs"}, NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"))).names 

557 ) 

558 self.assertEqual( 

559 set(), 

560 NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"), components=False)).names, 

561 ) 

562 self.assertEqual( 

563 {"bias.wcs"}, 

564 NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"), components=True)).names, 

565 ) 

566 # Add a dataset type using a StorageClass that we'll then remove; check 

567 # that this does not affect our ability to query for dataset types 

568 # (though it will warn). 

569 tempStorageClass = StorageClass( 

570 name="TempStorageClass", 

571 components={"data", registry.storageClasses.getStorageClass("StructuredDataDict")}, 

572 ) 

573 registry.storageClasses.registerStorageClass(tempStorageClass) 

574 datasetType = DatasetType( 

575 "temporary", 

576 dimensions=["instrument"], 

577 storageClass=tempStorageClass, 

578 universe=registry.dimensions, 

579 ) 

580 registry.registerDatasetType(datasetType) 

581 registry.storageClasses._unregisterStorageClass(tempStorageClass.name) 

582 datasetType._storageClass = None 

583 del tempStorageClass 

584 # Querying for all dataset types, including components, should include 

585 # at least all non-component dataset types (and I don't want to 

586 # enumerate all of the Exposure components for bias and flat here). 

587 with self.assertLogs("lsst.daf.butler.registries", logging.WARN) as cm: 

588 everything = NamedValueSet(registry.queryDatasetTypes(components=True)) 

589 self.assertIn("TempStorageClass", cm.output[0]) 

590 self.assertLess({"bias", "flat", "temporary"}, everything.names) 

591 # It should not include "temporary.columns", because we tried to remove 

592 # the storage class that would tell it about that. So if the next line 

593 # fails (i.e. "temporary.columns" _is_ in everything.names), it means 

594 # this part of the test isn't doing anything, because the _unregister 

595 # call about isn't simulating the real-life case we want it to 

596 # simulate, in which different versions of daf_butler in entirely 

597 # different Python processes interact with the same repo. 

598 self.assertNotIn("temporary.data", everything.names) 

599 # Query for dataset types that start with "temp". This should again 

600 # not include the component, and also not fail. 

601 with self.assertLogs("lsst.daf.butler.registries", logging.WARN) as cm: 

602 startsWithTemp = NamedValueSet(registry.queryDatasetTypes(re.compile("temp.*"))) 

603 self.assertIn("TempStorageClass", cm.output[0]) 

604 self.assertEqual({"temporary"}, startsWithTemp.names) 

605 # Querying with no components should not warn at all. 

606 with self.assertLogs("lsst.daf.butler.registries", logging.WARN) as cm: 

607 startsWithTemp = NamedValueSet(registry.queryDatasetTypes(re.compile("temp.*"), components=False)) 

608 # Must issue a warning of our own to be captured. 

609 logging.getLogger("lsst.daf.butler.registries").warning("test message") 

610 self.assertEqual(len(cm.output), 1) 

611 self.assertIn("test message", cm.output[0]) 

612 

613 def testComponentLookups(self): 

614 """Test searching for component datasets via their parents.""" 

615 registry = self.makeRegistry() 

616 self.loadData(registry, "base.yaml") 

617 self.loadData(registry, "datasets.yaml") 

618 # Test getting the child dataset type (which does still exist in the 

619 # Registry), and check for consistency with 

620 # DatasetRef.makeComponentRef. 

621 collection = "imported_g" 

622 parentType = registry.getDatasetType("bias") 

623 childType = registry.getDatasetType("bias.wcs") 

624 parentRefResolved = registry.findDataset( 

625 parentType, collections=collection, instrument="Cam1", detector=1 

626 ) 

627 self.assertIsInstance(parentRefResolved, DatasetRef) 

628 self.assertEqual(childType, parentRefResolved.makeComponentRef("wcs").datasetType) 

629 # Search for a single dataset with findDataset. 

630 childRef1 = registry.findDataset("bias.wcs", collections=collection, dataId=parentRefResolved.dataId) 

631 self.assertEqual(childRef1, parentRefResolved.makeComponentRef("wcs")) 

632 # Search for detector data IDs constrained by component dataset 

633 # existence with queryDataIds. 

634 dataIds = registry.queryDataIds( 

635 ["detector"], 

636 datasets=["bias.wcs"], 

637 collections=collection, 

638 ).toSet() 

639 self.assertEqual( 

640 dataIds, 

641 DataCoordinateSet( 

642 { 

643 DataCoordinate.standardize(instrument="Cam1", detector=d, graph=parentType.dimensions) 

644 for d in (1, 2, 3) 

645 }, 

646 parentType.dimensions, 

647 ), 

648 ) 

649 # Search for multiple datasets of a single type with queryDatasets. 

650 childRefs2 = set( 

651 registry.queryDatasets( 

652 "bias.wcs", 

653 collections=collection, 

654 ) 

655 ) 

656 self.assertEqual( 

657 {ref.unresolved() for ref in childRefs2}, {DatasetRef(childType, dataId) for dataId in dataIds} 

658 ) 

659 

660 def testCollections(self): 

661 """Tests for registry methods that manage collections.""" 

662 registry = self.makeRegistry() 

663 self.loadData(registry, "base.yaml") 

664 self.loadData(registry, "datasets.yaml") 

665 run1 = "imported_g" 

666 run2 = "imported_r" 

667 # Test setting a collection docstring after it has been created. 

668 registry.setCollectionDocumentation(run1, "doc for run1") 

669 self.assertEqual(registry.getCollectionDocumentation(run1), "doc for run1") 

670 registry.setCollectionDocumentation(run1, None) 

671 self.assertIsNone(registry.getCollectionDocumentation(run1)) 

672 datasetType = "bias" 

673 # Find some datasets via their run's collection. 

674 dataId1 = {"instrument": "Cam1", "detector": 1} 

675 ref1 = registry.findDataset(datasetType, dataId1, collections=run1) 

676 self.assertIsNotNone(ref1) 

677 dataId2 = {"instrument": "Cam1", "detector": 2} 

678 ref2 = registry.findDataset(datasetType, dataId2, collections=run1) 

679 self.assertIsNotNone(ref2) 

680 # Associate those into a new collection, then look for them there. 

681 tag1 = "tag1" 

682 registry.registerCollection(tag1, type=CollectionType.TAGGED, doc="doc for tag1") 

683 # Check that we can query for old and new collections by type. 

684 self.assertEqual(set(registry.queryCollections(collectionTypes=CollectionType.RUN)), {run1, run2}) 

685 self.assertEqual( 

686 set(registry.queryCollections(collectionTypes={CollectionType.TAGGED, CollectionType.RUN})), 

687 {tag1, run1, run2}, 

688 ) 

689 self.assertEqual(registry.getCollectionDocumentation(tag1), "doc for tag1") 

690 registry.associate(tag1, [ref1, ref2]) 

691 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1) 

692 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

693 # Disassociate one and verify that we can't it there anymore... 

694 registry.disassociate(tag1, [ref1]) 

695 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=tag1)) 

696 # ...but we can still find ref2 in tag1, and ref1 in the run. 

697 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run1), ref1) 

698 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

699 collections = set(registry.queryCollections()) 

700 self.assertEqual(collections, {run1, run2, tag1}) 

701 # Associate both refs into tag1 again; ref2 is already there, but that 

702 # should be a harmless no-op. 

703 registry.associate(tag1, [ref1, ref2]) 

704 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1) 

705 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

706 # Get a different dataset (from a different run) that has the same 

707 # dataset type and data ID as ref2. 

708 ref2b = registry.findDataset(datasetType, dataId2, collections=run2) 

709 self.assertNotEqual(ref2, ref2b) 

710 # Attempting to associate that into tag1 should be an error. 

711 with self.assertRaises(ConflictingDefinitionError): 

712 registry.associate(tag1, [ref2b]) 

713 # That error shouldn't have messed up what we had before. 

714 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1) 

715 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

716 # Attempt to associate the conflicting dataset again, this time with 

717 # a dataset that isn't in the collection and won't cause a conflict. 

718 # Should also fail without modifying anything. 

719 dataId3 = {"instrument": "Cam1", "detector": 3} 

720 ref3 = registry.findDataset(datasetType, dataId3, collections=run1) 

721 with self.assertRaises(ConflictingDefinitionError): 

722 registry.associate(tag1, [ref3, ref2b]) 

723 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1) 

724 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

725 self.assertIsNone(registry.findDataset(datasetType, dataId3, collections=tag1)) 

726 # Register a chained collection that searches [tag1, run2] 

727 chain1 = "chain1" 

728 registry.registerCollection(chain1, type=CollectionType.CHAINED) 

729 self.assertIs(registry.getCollectionType(chain1), CollectionType.CHAINED) 

730 # Chained collection exists, but has no collections in it. 

731 self.assertFalse(registry.getCollectionChain(chain1)) 

732 # If we query for all collections, we should get the chained collection 

733 # only if we don't ask to flatten it (i.e. yield only its children). 

734 self.assertEqual(set(registry.queryCollections(flattenChains=False)), {tag1, run1, run2, chain1}) 

735 self.assertEqual(set(registry.queryCollections(flattenChains=True)), {tag1, run1, run2}) 

736 # Attempt to set its child collections to something circular; that 

737 # should fail. 

738 with self.assertRaises(ValueError): 

739 registry.setCollectionChain(chain1, [tag1, chain1]) 

740 # Add the child collections. 

741 registry.setCollectionChain(chain1, [tag1, run2]) 

742 self.assertEqual(list(registry.getCollectionChain(chain1)), [tag1, run2]) 

743 self.assertEqual(registry.getCollectionParentChains(tag1), {chain1}) 

744 self.assertEqual(registry.getCollectionParentChains(run2), {chain1}) 

745 # Searching for dataId1 or dataId2 in the chain should return ref1 and 

746 # ref2, because both are in tag1. 

747 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain1), ref1) 

748 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain1), ref2) 

749 # Now disassociate ref2 from tag1. The search (for bias) with 

750 # dataId2 in chain1 should then: 

751 # 1. not find it in tag1 

752 # 2. find a different dataset in run2 

753 registry.disassociate(tag1, [ref2]) 

754 ref2b = registry.findDataset(datasetType, dataId2, collections=chain1) 

755 self.assertNotEqual(ref2b, ref2) 

756 self.assertEqual(ref2b, registry.findDataset(datasetType, dataId2, collections=run2)) 

757 # Define a new chain so we can test recursive chains. 

758 chain2 = "chain2" 

759 registry.registerCollection(chain2, type=CollectionType.CHAINED) 

760 registry.setCollectionChain(chain2, [run2, chain1]) 

761 self.assertEqual(registry.getCollectionParentChains(chain1), {chain2}) 

762 self.assertEqual(registry.getCollectionParentChains(run2), {chain1, chain2}) 

763 # Query for collections matching a regex. 

764 self.assertCountEqual( 

765 list(registry.queryCollections(re.compile("imported_."), flattenChains=False)), 

766 ["imported_r", "imported_g"], 

767 ) 

768 # Query for collections matching a regex or an explicit str. 

769 self.assertCountEqual( 

770 list(registry.queryCollections([re.compile("imported_."), "chain1"], flattenChains=False)), 

771 ["imported_r", "imported_g", "chain1"], 

772 ) 

773 # Search for bias with dataId1 should find it via tag1 in chain2, 

774 # recursing, because is not in run1. 

775 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=run2)) 

776 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain2), ref1) 

777 # Search for bias with dataId2 should find it in run2 (ref2b). 

778 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain2), ref2b) 

779 # Search for a flat that is in run2. That should not be found 

780 # at the front of chain2, because of the restriction to bias 

781 # on run2 there, but it should be found in at the end of chain1. 

782 dataId4 = {"instrument": "Cam1", "detector": 3, "physical_filter": "Cam1-R2"} 

783 ref4 = registry.findDataset("flat", dataId4, collections=run2) 

784 self.assertIsNotNone(ref4) 

785 self.assertEqual(ref4, registry.findDataset("flat", dataId4, collections=chain2)) 

786 # Deleting a collection that's part of a CHAINED collection is not 

787 # allowed, and is exception-safe. 

788 with self.assertRaises(Exception): 

789 registry.removeCollection(run2) 

790 self.assertEqual(registry.getCollectionType(run2), CollectionType.RUN) 

791 with self.assertRaises(Exception): 

792 registry.removeCollection(chain1) 

793 self.assertEqual(registry.getCollectionType(chain1), CollectionType.CHAINED) 

794 # Actually remove chain2, test that it's gone by asking for its type. 

795 registry.removeCollection(chain2) 

796 with self.assertRaises(MissingCollectionError): 

797 registry.getCollectionType(chain2) 

798 # Actually remove run2 and chain1, which should work now. 

799 registry.removeCollection(chain1) 

800 registry.removeCollection(run2) 

801 with self.assertRaises(MissingCollectionError): 

802 registry.getCollectionType(run2) 

803 with self.assertRaises(MissingCollectionError): 

804 registry.getCollectionType(chain1) 

805 # Remove tag1 as well, just to test that we can remove TAGGED 

806 # collections. 

807 registry.removeCollection(tag1) 

808 with self.assertRaises(MissingCollectionError): 

809 registry.getCollectionType(tag1) 

810 

811 def testCollectionChainFlatten(self): 

812 """Test that Registry.setCollectionChain obeys its 'flatten' option.""" 

813 registry = self.makeRegistry() 

814 registry.registerCollection("inner", CollectionType.CHAINED) 

815 registry.registerCollection("innermost", CollectionType.RUN) 

816 registry.setCollectionChain("inner", ["innermost"]) 

817 registry.registerCollection("outer", CollectionType.CHAINED) 

818 registry.setCollectionChain("outer", ["inner"], flatten=False) 

819 self.assertEqual(list(registry.getCollectionChain("outer")), ["inner"]) 

820 registry.setCollectionChain("outer", ["inner"], flatten=True) 

821 self.assertEqual(list(registry.getCollectionChain("outer")), ["innermost"]) 

822 

823 def testBasicTransaction(self): 

824 """Test that all operations within a single transaction block are 

825 rolled back if an exception propagates out of the block. 

826 """ 

827 registry = self.makeRegistry() 

828 storageClass = StorageClass("testDatasetType") 

829 registry.storageClasses.registerStorageClass(storageClass) 

830 with registry.transaction(): 

831 registry.insertDimensionData("instrument", {"name": "Cam1", "class_name": "A"}) 

832 with self.assertRaises(ValueError): 

833 with registry.transaction(): 

834 registry.insertDimensionData("instrument", {"name": "Cam2"}) 

835 raise ValueError("Oops, something went wrong") 

836 # Cam1 should exist 

837 self.assertEqual(registry.expandDataId(instrument="Cam1").records["instrument"].class_name, "A") 

838 # But Cam2 and Cam3 should both not exist 

839 with self.assertRaises(LookupError): 

840 registry.expandDataId(instrument="Cam2") 

841 with self.assertRaises(LookupError): 

842 registry.expandDataId(instrument="Cam3") 

843 

844 def testNestedTransaction(self): 

845 """Test that operations within a transaction block are not rolled back 

846 if an exception propagates out of an inner transaction block and is 

847 then caught. 

848 """ 

849 registry = self.makeRegistry() 

850 dimension = registry.dimensions["instrument"] 

851 dataId1 = {"instrument": "DummyCam"} 

852 dataId2 = {"instrument": "DummyCam2"} 

853 checkpointReached = False 

854 with registry.transaction(): 

855 # This should be added and (ultimately) committed. 

856 registry.insertDimensionData(dimension, dataId1) 

857 with self.assertRaises(sqlalchemy.exc.IntegrityError): 

858 with registry.transaction(savepoint=True): 

859 # This does not conflict, and should succeed (but not 

860 # be committed). 

861 registry.insertDimensionData(dimension, dataId2) 

862 checkpointReached = True 

863 # This should conflict and raise, triggerring a rollback 

864 # of the previous insertion within the same transaction 

865 # context, but not the original insertion in the outer 

866 # block. 

867 registry.insertDimensionData(dimension, dataId1) 

868 self.assertTrue(checkpointReached) 

869 self.assertIsNotNone(registry.expandDataId(dataId1, graph=dimension.graph)) 

870 with self.assertRaises(LookupError): 

871 registry.expandDataId(dataId2, graph=dimension.graph) 

872 

873 def testInstrumentDimensions(self): 

874 """Test queries involving only instrument dimensions, with no joins to 

875 skymap.""" 

876 registry = self.makeRegistry() 

877 

878 # need a bunch of dimensions and datasets for test 

879 registry.insertDimensionData( 

880 "instrument", dict(name="DummyCam", visit_max=25, exposure_max=300, detector_max=6) 

881 ) 

882 registry.insertDimensionData( 

883 "physical_filter", 

884 dict(instrument="DummyCam", name="dummy_r", band="r"), 

885 dict(instrument="DummyCam", name="dummy_i", band="i"), 

886 ) 

887 registry.insertDimensionData( 

888 "detector", *[dict(instrument="DummyCam", id=i, full_name=str(i)) for i in range(1, 6)] 

889 ) 

890 registry.insertDimensionData( 

891 "visit_system", 

892 dict(instrument="DummyCam", id=1, name="default"), 

893 ) 

894 registry.insertDimensionData( 

895 "visit", 

896 dict(instrument="DummyCam", id=10, name="ten", physical_filter="dummy_i", visit_system=1), 

897 dict(instrument="DummyCam", id=11, name="eleven", physical_filter="dummy_r", visit_system=1), 

898 dict(instrument="DummyCam", id=20, name="twelve", physical_filter="dummy_r", visit_system=1), 

899 ) 

900 registry.insertDimensionData( 

901 "exposure", 

902 dict(instrument="DummyCam", id=100, obs_id="100", physical_filter="dummy_i"), 

903 dict(instrument="DummyCam", id=101, obs_id="101", physical_filter="dummy_i"), 

904 dict(instrument="DummyCam", id=110, obs_id="110", physical_filter="dummy_r"), 

905 dict(instrument="DummyCam", id=111, obs_id="111", physical_filter="dummy_r"), 

906 dict(instrument="DummyCam", id=200, obs_id="200", physical_filter="dummy_r"), 

907 dict(instrument="DummyCam", id=201, obs_id="201", physical_filter="dummy_r"), 

908 ) 

909 registry.insertDimensionData( 

910 "visit_definition", 

911 dict(instrument="DummyCam", exposure=100, visit_system=1, visit=10), 

912 dict(instrument="DummyCam", exposure=101, visit_system=1, visit=10), 

913 dict(instrument="DummyCam", exposure=110, visit_system=1, visit=11), 

914 dict(instrument="DummyCam", exposure=111, visit_system=1, visit=11), 

915 dict(instrument="DummyCam", exposure=200, visit_system=1, visit=20), 

916 dict(instrument="DummyCam", exposure=201, visit_system=1, visit=20), 

917 ) 

918 # dataset types 

919 run1 = "test1_r" 

920 run2 = "test2_r" 

921 tagged2 = "test2_t" 

922 registry.registerRun(run1) 

923 registry.registerRun(run2) 

924 registry.registerCollection(tagged2) 

925 storageClass = StorageClass("testDataset") 

926 registry.storageClasses.registerStorageClass(storageClass) 

927 rawType = DatasetType( 

928 name="RAW", 

929 dimensions=registry.dimensions.extract(("instrument", "exposure", "detector")), 

930 storageClass=storageClass, 

931 ) 

932 registry.registerDatasetType(rawType) 

933 calexpType = DatasetType( 

934 name="CALEXP", 

935 dimensions=registry.dimensions.extract(("instrument", "visit", "detector")), 

936 storageClass=storageClass, 

937 ) 

938 registry.registerDatasetType(calexpType) 

939 

940 # add pre-existing datasets 

941 for exposure in (100, 101, 110, 111): 

942 for detector in (1, 2, 3): 

943 # note that only 3 of 5 detectors have datasets 

944 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector) 

945 (ref,) = registry.insertDatasets(rawType, dataIds=[dataId], run=run1) 

946 # exposures 100 and 101 appear in both run1 and tagged2. 

947 # 100 has different datasets in the different collections 

948 # 101 has the same dataset in both collections. 

949 if exposure == 100: 

950 (ref,) = registry.insertDatasets(rawType, dataIds=[dataId], run=run2) 

951 if exposure in (100, 101): 

952 registry.associate(tagged2, [ref]) 

953 # Add pre-existing datasets to tagged2. 

954 for exposure in (200, 201): 

955 for detector in (3, 4, 5): 

956 # note that only 3 of 5 detectors have datasets 

957 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector) 

958 (ref,) = registry.insertDatasets(rawType, dataIds=[dataId], run=run2) 

959 registry.associate(tagged2, [ref]) 

960 

961 dimensions = DimensionGraph( 

962 registry.dimensions, dimensions=(rawType.dimensions.required | calexpType.dimensions.required) 

963 ) 

964 # Test that single dim string works as well as list of str 

965 rows = registry.queryDataIds("visit", datasets=rawType, collections=run1).expanded().toSet() 

966 rowsI = registry.queryDataIds(["visit"], datasets=rawType, collections=run1).expanded().toSet() 

967 self.assertEqual(rows, rowsI) 

968 # with empty expression 

969 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1).expanded().toSet() 

970 self.assertEqual(len(rows), 4 * 3) # 4 exposures times 3 detectors 

971 for dataId in rows: 

972 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit")) 

973 packer1 = registry.dimensions.makePacker("visit_detector", dataId) 

974 packer2 = registry.dimensions.makePacker("exposure_detector", dataId) 

975 self.assertEqual( 

976 packer1.unpack(packer1.pack(dataId)), 

977 DataCoordinate.standardize(dataId, graph=packer1.dimensions), 

978 ) 

979 self.assertEqual( 

980 packer2.unpack(packer2.pack(dataId)), 

981 DataCoordinate.standardize(dataId, graph=packer2.dimensions), 

982 ) 

983 self.assertNotEqual(packer1.pack(dataId), packer2.pack(dataId)) 

984 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101, 110, 111)) 

985 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 11)) 

986 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3)) 

987 

988 # second collection 

989 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=tagged2).toSet() 

990 self.assertEqual(len(rows), 4 * 3) # 4 exposures times 3 detectors 

991 for dataId in rows: 

992 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit")) 

993 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101, 200, 201)) 

994 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 20)) 

995 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3, 4, 5)) 

996 

997 # with two input datasets 

998 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=[run1, tagged2]).toSet() 

999 self.assertEqual(len(set(rows)), 6 * 3) # 6 exposures times 3 detectors; set needed to de-dupe 

1000 for dataId in rows: 

1001 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit")) 

1002 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101, 110, 111, 200, 201)) 

1003 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 11, 20)) 

1004 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3, 4, 5)) 

1005 

1006 # limit to single visit 

1007 rows = registry.queryDataIds( 

1008 dimensions, datasets=rawType, collections=run1, where="visit = 10", instrument="DummyCam" 

1009 ).toSet() 

1010 self.assertEqual(len(rows), 2 * 3) # 2 exposures times 3 detectors 

1011 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101)) 

1012 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10,)) 

1013 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3)) 

1014 

1015 # more limiting expression, using link names instead of Table.column 

1016 rows = registry.queryDataIds( 

1017 dimensions, 

1018 datasets=rawType, 

1019 collections=run1, 

1020 where="visit = 10 and detector > 1 and 'DummyCam'=instrument", 

1021 ).toSet() 

1022 self.assertEqual(len(rows), 2 * 2) # 2 exposures times 2 detectors 

1023 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101)) 

1024 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10,)) 

1025 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (2, 3)) 

1026 

1027 # queryDataIds with only one of `datasets` and `collections` is an 

1028 # error. 

1029 with self.assertRaises(TypeError): 

1030 registry.queryDataIds(dimensions, datasets=rawType) 

1031 with self.assertRaises(TypeError): 

1032 registry.queryDataIds(dimensions, collections=run1) 

1033 

1034 # expression excludes everything 

1035 rows = registry.queryDataIds( 

1036 dimensions, datasets=rawType, collections=run1, where="visit > 1000", instrument="DummyCam" 

1037 ).toSet() 

1038 self.assertEqual(len(rows), 0) 

1039 

1040 # Selecting by physical_filter, this is not in the dimensions, but it 

1041 # is a part of the full expression so it should work too. 

1042 rows = registry.queryDataIds( 

1043 dimensions, 

1044 datasets=rawType, 

1045 collections=run1, 

1046 where="physical_filter = 'dummy_r'", 

1047 instrument="DummyCam", 

1048 ).toSet() 

1049 self.assertEqual(len(rows), 2 * 3) # 2 exposures times 3 detectors 

1050 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (110, 111)) 

1051 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (11,)) 

1052 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3)) 

1053 

1054 def testSkyMapDimensions(self): 

1055 """Tests involving only skymap dimensions, no joins to instrument.""" 

1056 registry = self.makeRegistry() 

1057 

1058 # need a bunch of dimensions and datasets for test, we want 

1059 # "band" in the test so also have to add physical_filter 

1060 # dimensions 

1061 registry.insertDimensionData("instrument", dict(instrument="DummyCam")) 

1062 registry.insertDimensionData( 

1063 "physical_filter", 

1064 dict(instrument="DummyCam", name="dummy_r", band="r"), 

1065 dict(instrument="DummyCam", name="dummy_i", band="i"), 

1066 ) 

1067 registry.insertDimensionData("skymap", dict(name="DummyMap", hash="sha!".encode("utf8"))) 

1068 for tract in range(10): 

1069 registry.insertDimensionData("tract", dict(skymap="DummyMap", id=tract)) 

1070 registry.insertDimensionData( 

1071 "patch", 

1072 *[dict(skymap="DummyMap", tract=tract, id=patch, cell_x=0, cell_y=0) for patch in range(10)], 

1073 ) 

1074 

1075 # dataset types 

1076 run = "test" 

1077 registry.registerRun(run) 

1078 storageClass = StorageClass("testDataset") 

1079 registry.storageClasses.registerStorageClass(storageClass) 

1080 calexpType = DatasetType( 

1081 name="deepCoadd_calexp", 

1082 dimensions=registry.dimensions.extract(("skymap", "tract", "patch", "band")), 

1083 storageClass=storageClass, 

1084 ) 

1085 registry.registerDatasetType(calexpType) 

1086 mergeType = DatasetType( 

1087 name="deepCoadd_mergeDet", 

1088 dimensions=registry.dimensions.extract(("skymap", "tract", "patch")), 

1089 storageClass=storageClass, 

1090 ) 

1091 registry.registerDatasetType(mergeType) 

1092 measType = DatasetType( 

1093 name="deepCoadd_meas", 

1094 dimensions=registry.dimensions.extract(("skymap", "tract", "patch", "band")), 

1095 storageClass=storageClass, 

1096 ) 

1097 registry.registerDatasetType(measType) 

1098 

1099 dimensions = DimensionGraph( 

1100 registry.dimensions, 

1101 dimensions=( 

1102 calexpType.dimensions.required | mergeType.dimensions.required | measType.dimensions.required 

1103 ), 

1104 ) 

1105 

1106 # add pre-existing datasets 

1107 for tract in (1, 3, 5): 

1108 for patch in (2, 4, 6, 7): 

1109 dataId = dict(skymap="DummyMap", tract=tract, patch=patch) 

1110 registry.insertDatasets(mergeType, dataIds=[dataId], run=run) 

1111 for aFilter in ("i", "r"): 

1112 dataId = dict(skymap="DummyMap", tract=tract, patch=patch, band=aFilter) 

1113 registry.insertDatasets(calexpType, dataIds=[dataId], run=run) 

1114 

1115 # with empty expression 

1116 rows = registry.queryDataIds(dimensions, datasets=[calexpType, mergeType], collections=run).toSet() 

1117 self.assertEqual(len(rows), 3 * 4 * 2) # 4 tracts x 4 patches x 2 filters 

1118 for dataId in rows: 

1119 self.assertCountEqual(dataId.keys(), ("skymap", "tract", "patch", "band")) 

1120 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 3, 5)) 

1121 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 4, 6, 7)) 

1122 self.assertCountEqual(set(dataId["band"] for dataId in rows), ("i", "r")) 

1123 

1124 # limit to 2 tracts and 2 patches 

1125 rows = registry.queryDataIds( 

1126 dimensions, 

1127 datasets=[calexpType, mergeType], 

1128 collections=run, 

1129 where="tract IN (1, 5) AND patch IN (2, 7)", 

1130 skymap="DummyMap", 

1131 ).toSet() 

1132 self.assertEqual(len(rows), 2 * 2 * 2) # 2 tracts x 2 patches x 2 filters 

1133 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 5)) 

1134 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 7)) 

1135 self.assertCountEqual(set(dataId["band"] for dataId in rows), ("i", "r")) 

1136 

1137 # limit to single filter 

1138 rows = registry.queryDataIds( 

1139 dimensions, datasets=[calexpType, mergeType], collections=run, where="band = 'i'" 

1140 ).toSet() 

1141 self.assertEqual(len(rows), 3 * 4 * 1) # 4 tracts x 4 patches x 2 filters 

1142 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 3, 5)) 

1143 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 4, 6, 7)) 

1144 self.assertCountEqual(set(dataId["band"] for dataId in rows), ("i",)) 

1145 

1146 # expression excludes everything, specifying non-existing skymap is 

1147 # not a fatal error, it's operator error 

1148 rows = registry.queryDataIds( 

1149 dimensions, datasets=[calexpType, mergeType], collections=run, where="skymap = 'Mars'" 

1150 ).toSet() 

1151 self.assertEqual(len(rows), 0) 

1152 

1153 def testSpatialJoin(self): 

1154 """Test queries that involve spatial overlap joins.""" 

1155 registry = self.makeRegistry() 

1156 self.loadData(registry, "hsc-rc2-subset.yaml") 

1157 

1158 # Dictionary of spatial DatabaseDimensionElements, keyed by the name of 

1159 # the TopologicalFamily they belong to. We'll relate all elements in 

1160 # each family to all of the elements in each other family. 

1161 families = defaultdict(set) 

1162 # Dictionary of {element.name: {dataId: region}}. 

1163 regions = {} 

1164 for element in registry.dimensions.getDatabaseElements(): 

1165 if element.spatial is not None: 

1166 families[element.spatial.name].add(element) 

1167 regions[element.name] = { 

1168 record.dataId: record.region for record in registry.queryDimensionRecords(element) 

1169 } 

1170 

1171 # If this check fails, it's not necessarily a problem - it may just be 

1172 # a reasonable change to the default dimension definitions - but the 

1173 # test below depends on there being more than one family to do anything 

1174 # useful. 

1175 self.assertEqual(len(families), 2) 

1176 

1177 # Overlap DatabaseDimensionElements with each other. 

1178 for family1, family2 in itertools.combinations(families, 2): 

1179 for element1, element2 in itertools.product(families[family1], families[family2]): 

1180 graph = DimensionGraph.union(element1.graph, element2.graph) 

1181 # Construct expected set of overlapping data IDs via a 

1182 # brute-force comparison of the regions we've already fetched. 

1183 expected = { 

1184 DataCoordinate.standardize({**dataId1.byName(), **dataId2.byName()}, graph=graph) 

1185 for (dataId1, region1), (dataId2, region2) in itertools.product( 

1186 regions[element1.name].items(), regions[element2.name].items() 

1187 ) 

1188 if not region1.isDisjointFrom(region2) 

1189 } 

1190 self.assertGreater(len(expected), 2, msg="Test that we aren't just comparing empty sets.") 

1191 queried = set(registry.queryDataIds(graph)) 

1192 self.assertEqual(expected, queried) 

1193 

1194 # Overlap each DatabaseDimensionElement with the commonSkyPix system. 

1195 commonSkyPix = registry.dimensions.commonSkyPix 

1196 for elementName, regions in regions.items(): 

1197 graph = DimensionGraph.union(registry.dimensions[elementName].graph, commonSkyPix.graph) 

1198 expected = set() 

1199 for dataId, region in regions.items(): 

1200 for begin, end in commonSkyPix.pixelization.envelope(region): 

1201 expected.update( 

1202 DataCoordinate.standardize({commonSkyPix.name: index, **dataId.byName()}, graph=graph) 

1203 for index in range(begin, end) 

1204 ) 

1205 self.assertGreater(len(expected), 2, msg="Test that we aren't just comparing empty sets.") 

1206 queried = set(registry.queryDataIds(graph)) 

1207 self.assertEqual(expected, queried) 

1208 

1209 def testAbstractQuery(self): 

1210 """Test that we can run a query that just lists the known 

1211 bands. This is tricky because band is 

1212 backed by a query against physical_filter. 

1213 """ 

1214 registry = self.makeRegistry() 

1215 registry.insertDimensionData("instrument", dict(name="DummyCam")) 

1216 registry.insertDimensionData( 

1217 "physical_filter", 

1218 dict(instrument="DummyCam", name="dummy_i", band="i"), 

1219 dict(instrument="DummyCam", name="dummy_i2", band="i"), 

1220 dict(instrument="DummyCam", name="dummy_r", band="r"), 

1221 ) 

1222 rows = registry.queryDataIds(["band"]).toSet() 

1223 self.assertCountEqual( 

1224 rows, 

1225 [ 

1226 DataCoordinate.standardize(band="i", universe=registry.dimensions), 

1227 DataCoordinate.standardize(band="r", universe=registry.dimensions), 

1228 ], 

1229 ) 

1230 

1231 def testAttributeManager(self): 

1232 """Test basic functionality of attribute manager.""" 

1233 # number of attributes with schema versions in a fresh database, 

1234 # 6 managers with 3 records per manager, plus config for dimensions 

1235 VERSION_COUNT = 6 * 3 + 1 

1236 

1237 registry = self.makeRegistry() 

1238 attributes = registry._managers.attributes 

1239 

1240 # check what get() returns for non-existing key 

1241 self.assertIsNone(attributes.get("attr")) 

1242 self.assertEqual(attributes.get("attr", ""), "") 

1243 self.assertEqual(attributes.get("attr", "Value"), "Value") 

1244 self.assertEqual(len(list(attributes.items())), VERSION_COUNT) 

1245 

1246 # cannot store empty key or value 

1247 with self.assertRaises(ValueError): 

1248 attributes.set("", "value") 

1249 with self.assertRaises(ValueError): 

1250 attributes.set("attr", "") 

1251 

1252 # set value of non-existing key 

1253 attributes.set("attr", "value") 

1254 self.assertEqual(len(list(attributes.items())), VERSION_COUNT + 1) 

1255 self.assertEqual(attributes.get("attr"), "value") 

1256 

1257 # update value of existing key 

1258 with self.assertRaises(ButlerAttributeExistsError): 

1259 attributes.set("attr", "value2") 

1260 

1261 attributes.set("attr", "value2", force=True) 

1262 self.assertEqual(len(list(attributes.items())), VERSION_COUNT + 1) 

1263 self.assertEqual(attributes.get("attr"), "value2") 

1264 

1265 # delete existing key 

1266 self.assertTrue(attributes.delete("attr")) 

1267 self.assertEqual(len(list(attributes.items())), VERSION_COUNT) 

1268 

1269 # delete non-existing key 

1270 self.assertFalse(attributes.delete("non-attr")) 

1271 

1272 # store bunch of keys and get the list back 

1273 data = [ 

1274 ("version.core", "1.2.3"), 

1275 ("version.dimensions", "3.2.1"), 

1276 ("config.managers.opaque", "ByNameOpaqueTableStorageManager"), 

1277 ] 

1278 for key, value in data: 

1279 attributes.set(key, value) 

1280 items = dict(attributes.items()) 

1281 for key, value in data: 

1282 self.assertEqual(items[key], value) 

1283 

1284 def testQueryDatasetsDeduplication(self): 

1285 """Test that the findFirst option to queryDatasets selects datasets 

1286 from collections in the order given". 

1287 """ 

1288 registry = self.makeRegistry() 

1289 self.loadData(registry, "base.yaml") 

1290 self.loadData(registry, "datasets.yaml") 

1291 self.assertCountEqual( 

1292 list(registry.queryDatasets("bias", collections=["imported_g", "imported_r"])), 

1293 [ 

1294 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"), 

1295 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g"), 

1296 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g"), 

1297 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r"), 

1298 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r"), 

1299 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

1300 ], 

1301 ) 

1302 self.assertCountEqual( 

1303 list(registry.queryDatasets("bias", collections=["imported_g", "imported_r"], findFirst=True)), 

1304 [ 

1305 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"), 

1306 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g"), 

1307 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g"), 

1308 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

1309 ], 

1310 ) 

1311 self.assertCountEqual( 

1312 list(registry.queryDatasets("bias", collections=["imported_r", "imported_g"], findFirst=True)), 

1313 [ 

1314 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"), 

1315 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r"), 

1316 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r"), 

1317 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

1318 ], 

1319 ) 

1320 

1321 def testQueryResults(self): 

1322 """Test querying for data IDs and then manipulating the QueryResults 

1323 object returned to perform other queries. 

1324 """ 

1325 registry = self.makeRegistry() 

1326 self.loadData(registry, "base.yaml") 

1327 self.loadData(registry, "datasets.yaml") 

1328 bias = registry.getDatasetType("bias") 

1329 flat = registry.getDatasetType("flat") 

1330 # Obtain expected results from methods other than those we're testing 

1331 # here. That includes: 

1332 # - the dimensions of the data IDs we want to query: 

1333 expectedGraph = DimensionGraph(registry.dimensions, names=["detector", "physical_filter"]) 

1334 # - the dimensions of some other data IDs we'll extract from that: 

1335 expectedSubsetGraph = DimensionGraph(registry.dimensions, names=["detector"]) 

1336 # - the data IDs we expect to obtain from the first queries: 

1337 expectedDataIds = DataCoordinateSet( 

1338 { 

1339 DataCoordinate.standardize( 

1340 instrument="Cam1", detector=d, physical_filter=p, universe=registry.dimensions 

1341 ) 

1342 for d, p in itertools.product({1, 2, 3}, {"Cam1-G", "Cam1-R1", "Cam1-R2"}) 

1343 }, 

1344 graph=expectedGraph, 

1345 hasFull=False, 

1346 hasRecords=False, 

1347 ) 

1348 # - the flat datasets we expect to find from those data IDs, in just 

1349 # one collection (so deduplication is irrelevant): 

1350 expectedFlats = [ 

1351 registry.findDataset( 

1352 flat, instrument="Cam1", detector=1, physical_filter="Cam1-R1", collections="imported_r" 

1353 ), 

1354 registry.findDataset( 

1355 flat, instrument="Cam1", detector=2, physical_filter="Cam1-R1", collections="imported_r" 

1356 ), 

1357 registry.findDataset( 

1358 flat, instrument="Cam1", detector=3, physical_filter="Cam1-R2", collections="imported_r" 

1359 ), 

1360 ] 

1361 # - the data IDs we expect to extract from that: 

1362 expectedSubsetDataIds = expectedDataIds.subset(expectedSubsetGraph) 

1363 # - the bias datasets we expect to find from those data IDs, after we 

1364 # subset-out the physical_filter dimension, both with duplicates: 

1365 expectedAllBiases = [ 

1366 registry.findDataset(bias, instrument="Cam1", detector=1, collections="imported_g"), 

1367 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_g"), 

1368 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_g"), 

1369 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_r"), 

1370 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_r"), 

1371 ] 

1372 # - ...and without duplicates: 

1373 expectedDeduplicatedBiases = [ 

1374 registry.findDataset(bias, instrument="Cam1", detector=1, collections="imported_g"), 

1375 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_r"), 

1376 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_r"), 

1377 ] 

1378 # Test against those expected results, using a "lazy" query for the 

1379 # data IDs (which re-executes that query each time we use it to do 

1380 # something new). 

1381 dataIds = registry.queryDataIds( 

1382 ["detector", "physical_filter"], 

1383 where="detector.purpose = 'SCIENCE'", # this rejects detector=4 

1384 instrument="Cam1", 

1385 ) 

1386 self.assertEqual(dataIds.graph, expectedGraph) 

1387 self.assertEqual(dataIds.toSet(), expectedDataIds) 

1388 self.assertCountEqual( 

1389 list( 

1390 dataIds.findDatasets( 

1391 flat, 

1392 collections=["imported_r"], 

1393 ) 

1394 ), 

1395 expectedFlats, 

1396 ) 

1397 subsetDataIds = dataIds.subset(expectedSubsetGraph, unique=True) 

1398 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph) 

1399 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds) 

1400 self.assertCountEqual( 

1401 list(subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=False)), 

1402 expectedAllBiases, 

1403 ) 

1404 self.assertCountEqual( 

1405 list(subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=True)), 

1406 expectedDeduplicatedBiases, 

1407 ) 

1408 # Materialize the bias dataset queries (only) by putting the results 

1409 # into temporary tables, then repeat those tests. 

1410 with subsetDataIds.findDatasets( 

1411 bias, collections=["imported_r", "imported_g"], findFirst=False 

1412 ).materialize() as biases: 

1413 self.assertCountEqual(list(biases), expectedAllBiases) 

1414 with subsetDataIds.findDatasets( 

1415 bias, collections=["imported_r", "imported_g"], findFirst=True 

1416 ).materialize() as biases: 

1417 self.assertCountEqual(list(biases), expectedDeduplicatedBiases) 

1418 # Materialize the data ID subset query, but not the dataset queries. 

1419 with subsetDataIds.materialize() as subsetDataIds: 

1420 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph) 

1421 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds) 

1422 self.assertCountEqual( 

1423 list( 

1424 subsetDataIds.findDatasets( 

1425 bias, collections=["imported_r", "imported_g"], findFirst=False 

1426 ) 

1427 ), 

1428 expectedAllBiases, 

1429 ) 

1430 self.assertCountEqual( 

1431 list( 

1432 subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=True) 

1433 ), 

1434 expectedDeduplicatedBiases, 

1435 ) 

1436 # Materialize the dataset queries, too. 

1437 with subsetDataIds.findDatasets( 

1438 bias, collections=["imported_r", "imported_g"], findFirst=False 

1439 ).materialize() as biases: 

1440 self.assertCountEqual(list(biases), expectedAllBiases) 

1441 with subsetDataIds.findDatasets( 

1442 bias, collections=["imported_r", "imported_g"], findFirst=True 

1443 ).materialize() as biases: 

1444 self.assertCountEqual(list(biases), expectedDeduplicatedBiases) 

1445 # Materialize the original query, but none of the follow-up queries. 

1446 with dataIds.materialize() as dataIds: 

1447 self.assertEqual(dataIds.graph, expectedGraph) 

1448 self.assertEqual(dataIds.toSet(), expectedDataIds) 

1449 self.assertCountEqual( 

1450 list( 

1451 dataIds.findDatasets( 

1452 flat, 

1453 collections=["imported_r"], 

1454 ) 

1455 ), 

1456 expectedFlats, 

1457 ) 

1458 subsetDataIds = dataIds.subset(expectedSubsetGraph, unique=True) 

1459 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph) 

1460 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds) 

1461 self.assertCountEqual( 

1462 list( 

1463 subsetDataIds.findDatasets( 

1464 bias, collections=["imported_r", "imported_g"], findFirst=False 

1465 ) 

1466 ), 

1467 expectedAllBiases, 

1468 ) 

1469 self.assertCountEqual( 

1470 list( 

1471 subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=True) 

1472 ), 

1473 expectedDeduplicatedBiases, 

1474 ) 

1475 # Materialize just the bias dataset queries. 

1476 with subsetDataIds.findDatasets( 

1477 bias, collections=["imported_r", "imported_g"], findFirst=False 

1478 ).materialize() as biases: 

1479 self.assertCountEqual(list(biases), expectedAllBiases) 

1480 with subsetDataIds.findDatasets( 

1481 bias, collections=["imported_r", "imported_g"], findFirst=True 

1482 ).materialize() as biases: 

1483 self.assertCountEqual(list(biases), expectedDeduplicatedBiases) 

1484 # Materialize the subset data ID query, but not the dataset 

1485 # queries. 

1486 with subsetDataIds.materialize() as subsetDataIds: 

1487 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph) 

1488 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds) 

1489 self.assertCountEqual( 

1490 list( 

1491 subsetDataIds.findDatasets( 

1492 bias, collections=["imported_r", "imported_g"], findFirst=False 

1493 ) 

1494 ), 

1495 expectedAllBiases, 

1496 ) 

1497 self.assertCountEqual( 

1498 list( 

1499 subsetDataIds.findDatasets( 

1500 bias, collections=["imported_r", "imported_g"], findFirst=True 

1501 ) 

1502 ), 

1503 expectedDeduplicatedBiases, 

1504 ) 

1505 # Materialize the bias dataset queries, too, so now we're 

1506 # materializing every single step. 

1507 with subsetDataIds.findDatasets( 

1508 bias, collections=["imported_r", "imported_g"], findFirst=False 

1509 ).materialize() as biases: 

1510 self.assertCountEqual(list(biases), expectedAllBiases) 

1511 with subsetDataIds.findDatasets( 

1512 bias, collections=["imported_r", "imported_g"], findFirst=True 

1513 ).materialize() as biases: 

1514 self.assertCountEqual(list(biases), expectedDeduplicatedBiases) 

1515 

1516 def testEmptyDimensionsQueries(self): 

1517 """Test Query and QueryResults objects in the case where there are no 

1518 dimensions. 

1519 """ 

1520 # Set up test data: one dataset type, two runs, one dataset in each. 

1521 registry = self.makeRegistry() 

1522 self.loadData(registry, "base.yaml") 

1523 schema = DatasetType("schema", dimensions=registry.dimensions.empty, storageClass="Catalog") 

1524 registry.registerDatasetType(schema) 

1525 dataId = DataCoordinate.makeEmpty(registry.dimensions) 

1526 run1 = "run1" 

1527 run2 = "run2" 

1528 registry.registerRun(run1) 

1529 registry.registerRun(run2) 

1530 (dataset1,) = registry.insertDatasets(schema, dataIds=[dataId], run=run1) 

1531 (dataset2,) = registry.insertDatasets(schema, dataIds=[dataId], run=run2) 

1532 # Query directly for both of the datasets, and each one, one at a time. 

1533 self.checkQueryResults( 

1534 registry.queryDatasets(schema, collections=[run1, run2], findFirst=False), [dataset1, dataset2] 

1535 ) 

1536 self.checkQueryResults( 

1537 registry.queryDatasets(schema, collections=[run1, run2], findFirst=True), 

1538 [dataset1], 

1539 ) 

1540 self.checkQueryResults( 

1541 registry.queryDatasets(schema, collections=[run2, run1], findFirst=True), 

1542 [dataset2], 

1543 ) 

1544 # Query for data IDs with no dimensions. 

1545 dataIds = registry.queryDataIds([]) 

1546 self.checkQueryResults(dataIds, [dataId]) 

1547 # Use queried data IDs to find the datasets. 

1548 self.checkQueryResults( 

1549 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False), 

1550 [dataset1, dataset2], 

1551 ) 

1552 self.checkQueryResults( 

1553 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), 

1554 [dataset1], 

1555 ) 

1556 self.checkQueryResults( 

1557 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), 

1558 [dataset2], 

1559 ) 

1560 # Now materialize the data ID query results and repeat those tests. 

1561 with dataIds.materialize() as dataIds: 

1562 self.checkQueryResults(dataIds, [dataId]) 

1563 self.checkQueryResults( 

1564 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), 

1565 [dataset1], 

1566 ) 

1567 self.checkQueryResults( 

1568 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), 

1569 [dataset2], 

1570 ) 

1571 # Query for non-empty data IDs, then subset that to get the empty one. 

1572 # Repeat the above tests starting from that. 

1573 dataIds = registry.queryDataIds(["instrument"]).subset(registry.dimensions.empty, unique=True) 

1574 self.checkQueryResults(dataIds, [dataId]) 

1575 self.checkQueryResults( 

1576 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False), 

1577 [dataset1, dataset2], 

1578 ) 

1579 self.checkQueryResults( 

1580 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), 

1581 [dataset1], 

1582 ) 

1583 self.checkQueryResults( 

1584 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), 

1585 [dataset2], 

1586 ) 

1587 with dataIds.materialize() as dataIds: 

1588 self.checkQueryResults(dataIds, [dataId]) 

1589 self.checkQueryResults( 

1590 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False), 

1591 [dataset1, dataset2], 

1592 ) 

1593 self.checkQueryResults( 

1594 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), 

1595 [dataset1], 

1596 ) 

1597 self.checkQueryResults( 

1598 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), 

1599 [dataset2], 

1600 ) 

1601 # Query for non-empty data IDs, then materialize, then subset to get 

1602 # the empty one. Repeat again. 

1603 with registry.queryDataIds(["instrument"]).materialize() as nonEmptyDataIds: 

1604 dataIds = nonEmptyDataIds.subset(registry.dimensions.empty, unique=True) 

1605 self.checkQueryResults(dataIds, [dataId]) 

1606 self.checkQueryResults( 

1607 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False), 

1608 [dataset1, dataset2], 

1609 ) 

1610 self.checkQueryResults( 

1611 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), 

1612 [dataset1], 

1613 ) 

1614 self.checkQueryResults( 

1615 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), 

1616 [dataset2], 

1617 ) 

1618 with dataIds.materialize() as dataIds: 

1619 self.checkQueryResults(dataIds, [dataId]) 

1620 self.checkQueryResults( 

1621 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False), 

1622 [dataset1, dataset2], 

1623 ) 

1624 self.checkQueryResults( 

1625 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), 

1626 [dataset1], 

1627 ) 

1628 self.checkQueryResults( 

1629 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), 

1630 [dataset2], 

1631 ) 

1632 

1633 def testDimensionDataModifications(self): 

1634 """Test that modifying dimension records via: 

1635 syncDimensionData(..., update=True) and 

1636 insertDimensionData(..., replace=True) works as expected, even in the 

1637 presence of datasets using those dimensions and spatial overlap 

1638 relationships. 

1639 """ 

1640 

1641 def unpack_range_set(ranges: lsst.sphgeom.RangeSet) -> Iterator[int]: 

1642 """Unpack a sphgeom.RangeSet into the integers it contains.""" 

1643 for begin, end in ranges: 

1644 yield from range(begin, end) 

1645 

1646 def range_set_hull( 

1647 ranges: lsst.sphgeom.RangeSet, 

1648 pixelization: lsst.sphgeom.HtmPixelization, 

1649 ) -> lsst.sphgeom.ConvexPolygon: 

1650 """Create a ConvexPolygon hull of the region defined by a set of 

1651 HTM pixelization index ranges. 

1652 """ 

1653 points = [] 

1654 for index in unpack_range_set(ranges): 

1655 points.extend(pixelization.triangle(index).getVertices()) 

1656 return lsst.sphgeom.ConvexPolygon(points) 

1657 

1658 # Use HTM to set up an initial parent region (one arbitrary trixel) 

1659 # and four child regions (the trixels within the parent at the next 

1660 # level. We'll use the parent as a tract/visit region and the children 

1661 # as its patch/visit_detector regions. 

1662 registry = self.makeRegistry() 

1663 htm6 = registry.dimensions.skypix["htm"][6].pixelization 

1664 commonSkyPix = registry.dimensions.commonSkyPix.pixelization 

1665 index = 12288 

1666 child_ranges_small = lsst.sphgeom.RangeSet(index).scaled(4) 

1667 assert htm6.universe().contains(child_ranges_small) 

1668 child_regions_small = [htm6.triangle(i) for i in unpack_range_set(child_ranges_small)] 

1669 parent_region_small = lsst.sphgeom.ConvexPolygon( 

1670 list(itertools.chain.from_iterable(c.getVertices() for c in child_regions_small)) 

1671 ) 

1672 assert all(parent_region_small.contains(c) for c in child_regions_small) 

1673 # Make a larger version of each child region, defined to be the set of 

1674 # htm6 trixels that overlap the original's bounding circle. Make a new 

1675 # parent that's the convex hull of the new children. 

1676 child_regions_large = [ 

1677 range_set_hull(htm6.envelope(c.getBoundingCircle()), htm6) for c in child_regions_small 

1678 ] 

1679 assert all(large.contains(small) for large, small in zip(child_regions_large, child_regions_small)) 

1680 parent_region_large = lsst.sphgeom.ConvexPolygon( 

1681 list(itertools.chain.from_iterable(c.getVertices() for c in child_regions_large)) 

1682 ) 

1683 assert all(parent_region_large.contains(c) for c in child_regions_large) 

1684 assert parent_region_large.contains(parent_region_small) 

1685 assert not parent_region_small.contains(parent_region_large) 

1686 assert not all(parent_region_small.contains(c) for c in child_regions_large) 

1687 # Find some commonSkyPix indices that overlap the large regions but not 

1688 # overlap the small regions. We use commonSkyPix here to make sure the 

1689 # real tests later involve what's in the database, not just post-query 

1690 # region filtering. 

1691 child_difference_indices = [] 

1692 for large, small in zip(child_regions_large, child_regions_small): 

1693 difference = list(unpack_range_set(commonSkyPix.envelope(large) - commonSkyPix.envelope(small))) 

1694 assert difference, "if this is empty, we can't test anything useful with these regions" 

1695 assert all( 

1696 not commonSkyPix.triangle(d).isDisjointFrom(large) 

1697 and commonSkyPix.triangle(d).isDisjointFrom(small) 

1698 for d in difference 

1699 ) 

1700 child_difference_indices.append(difference) 

1701 parent_difference_indices = list( 

1702 unpack_range_set( 

1703 commonSkyPix.envelope(parent_region_large) - commonSkyPix.envelope(parent_region_small) 

1704 ) 

1705 ) 

1706 assert parent_difference_indices, "if this is empty, we can't test anything useful with these regions" 

1707 assert all( 

1708 ( 

1709 not commonSkyPix.triangle(d).isDisjointFrom(parent_region_large) 

1710 and commonSkyPix.triangle(d).isDisjointFrom(parent_region_small) 

1711 ) 

1712 for d in parent_difference_indices 

1713 ) 

1714 # Now that we've finally got those regions, we'll insert the large ones 

1715 # as tract/patch dimension records. 

1716 skymap_name = "testing_v1" 

1717 registry.insertDimensionData( 

1718 "skymap", 

1719 { 

1720 "name": skymap_name, 

1721 "hash": bytes([42]), 

1722 "tract_max": 1, 

1723 "patch_nx_max": 2, 

1724 "patch_ny_max": 2, 

1725 }, 

1726 ) 

1727 registry.insertDimensionData("tract", {"skymap": skymap_name, "id": 0, "region": parent_region_large}) 

1728 registry.insertDimensionData( 

1729 "patch", 

1730 *[ 

1731 {"skymap": skymap_name, "tract": 0, "id": n, "cell_x": n % 2, "cell_y": n // 2, "region": c} 

1732 for n, c in enumerate(child_regions_large) 

1733 ], 

1734 ) 

1735 # Add at dataset that uses these dimensions to make sure that modifying 

1736 # them doesn't disrupt foreign keys (need to make sure DB doesn't 

1737 # implement insert with replace=True as delete-then-insert). 

1738 dataset_type = DatasetType( 

1739 "coadd", 

1740 dimensions=["tract", "patch"], 

1741 universe=registry.dimensions, 

1742 storageClass="Exposure", 

1743 ) 

1744 registry.registerDatasetType(dataset_type) 

1745 registry.registerCollection("the_run", CollectionType.RUN) 

1746 registry.insertDatasets( 

1747 dataset_type, 

1748 [{"skymap": skymap_name, "tract": 0, "patch": 2}], 

1749 run="the_run", 

1750 ) 

1751 # Query for tracts and patches that overlap some "difference" htm9 

1752 # pixels; there should be overlaps, because the database has 

1753 # the "large" suite of regions. 

1754 self.assertEqual( 

1755 {0}, 

1756 { 

1757 data_id["tract"] 

1758 for data_id in registry.queryDataIds( 

1759 ["tract"], 

1760 skymap=skymap_name, 

1761 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]}, 

1762 ) 

1763 }, 

1764 ) 

1765 for patch_id, patch_difference_indices in enumerate(child_difference_indices): 

1766 self.assertIn( 

1767 patch_id, 

1768 { 

1769 data_id["patch"] 

1770 for data_id in registry.queryDataIds( 

1771 ["patch"], 

1772 skymap=skymap_name, 

1773 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]}, 

1774 ) 

1775 }, 

1776 ) 

1777 # Use sync to update the tract region and insert to update the patch 

1778 # regions, to the "small" suite. 

1779 updated = registry.syncDimensionData( 

1780 "tract", 

1781 {"skymap": skymap_name, "id": 0, "region": parent_region_small}, 

1782 update=True, 

1783 ) 

1784 self.assertEqual(updated, {"region": parent_region_large}) 

1785 registry.insertDimensionData( 

1786 "patch", 

1787 *[ 

1788 {"skymap": skymap_name, "tract": 0, "id": n, "cell_x": n % 2, "cell_y": n // 2, "region": c} 

1789 for n, c in enumerate(child_regions_small) 

1790 ], 

1791 replace=True, 

1792 ) 

1793 # Query again; there now should be no such overlaps, because the 

1794 # database has the "small" suite of regions. 

1795 self.assertFalse( 

1796 set( 

1797 registry.queryDataIds( 

1798 ["tract"], 

1799 skymap=skymap_name, 

1800 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]}, 

1801 ) 

1802 ) 

1803 ) 

1804 for patch_id, patch_difference_indices in enumerate(child_difference_indices): 

1805 self.assertNotIn( 

1806 patch_id, 

1807 { 

1808 data_id["patch"] 

1809 for data_id in registry.queryDataIds( 

1810 ["patch"], 

1811 skymap=skymap_name, 

1812 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]}, 

1813 ) 

1814 }, 

1815 ) 

1816 # Update back to the large regions and query one more time. 

1817 updated = registry.syncDimensionData( 

1818 "tract", 

1819 {"skymap": skymap_name, "id": 0, "region": parent_region_large}, 

1820 update=True, 

1821 ) 

1822 self.assertEqual(updated, {"region": parent_region_small}) 

1823 registry.insertDimensionData( 

1824 "patch", 

1825 *[ 

1826 {"skymap": skymap_name, "tract": 0, "id": n, "cell_x": n % 2, "cell_y": n // 2, "region": c} 

1827 for n, c in enumerate(child_regions_large) 

1828 ], 

1829 replace=True, 

1830 ) 

1831 self.assertEqual( 

1832 {0}, 

1833 { 

1834 data_id["tract"] 

1835 for data_id in registry.queryDataIds( 

1836 ["tract"], 

1837 skymap=skymap_name, 

1838 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]}, 

1839 ) 

1840 }, 

1841 ) 

1842 for patch_id, patch_difference_indices in enumerate(child_difference_indices): 

1843 self.assertIn( 

1844 patch_id, 

1845 { 

1846 data_id["patch"] 

1847 for data_id in registry.queryDataIds( 

1848 ["patch"], 

1849 skymap=skymap_name, 

1850 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]}, 

1851 ) 

1852 }, 

1853 ) 

1854 

1855 def testCalibrationCollections(self): 

1856 """Test operations on `~CollectionType.CALIBRATION` collections, 

1857 including `Registry.certify`, `Registry.decertify`, and 

1858 `Registry.findDataset`. 

1859 """ 

1860 # Setup - make a Registry, fill it with some datasets in 

1861 # non-calibration collections. 

1862 registry = self.makeRegistry() 

1863 self.loadData(registry, "base.yaml") 

1864 self.loadData(registry, "datasets.yaml") 

1865 # Set up some timestamps. 

1866 t1 = astropy.time.Time("2020-01-01T01:00:00", format="isot", scale="tai") 

1867 t2 = astropy.time.Time("2020-01-01T02:00:00", format="isot", scale="tai") 

1868 t3 = astropy.time.Time("2020-01-01T03:00:00", format="isot", scale="tai") 

1869 t4 = astropy.time.Time("2020-01-01T04:00:00", format="isot", scale="tai") 

1870 t5 = astropy.time.Time("2020-01-01T05:00:00", format="isot", scale="tai") 

1871 allTimespans = [ 

1872 Timespan(a, b) for a, b in itertools.combinations([None, t1, t2, t3, t4, t5, None], r=2) 

1873 ] 

1874 # Get references to some datasets. 

1875 bias2a = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g") 

1876 bias3a = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g") 

1877 bias2b = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r") 

1878 bias3b = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r") 

1879 # Register the main calibration collection we'll be working with. 

1880 collection = "Cam1/calibs/default" 

1881 registry.registerCollection(collection, type=CollectionType.CALIBRATION) 

1882 # Cannot associate into a calibration collection (no timespan). 

1883 with self.assertRaises(TypeError): 

1884 registry.associate(collection, [bias2a]) 

1885 # Certify 2a dataset with [t2, t4) validity. 

1886 registry.certify(collection, [bias2a], Timespan(begin=t2, end=t4)) 

1887 # We should not be able to certify 2b with anything overlapping that 

1888 # window. 

1889 with self.assertRaises(ConflictingDefinitionError): 

1890 registry.certify(collection, [bias2b], Timespan(begin=None, end=t3)) 

1891 with self.assertRaises(ConflictingDefinitionError): 

1892 registry.certify(collection, [bias2b], Timespan(begin=None, end=t5)) 

1893 with self.assertRaises(ConflictingDefinitionError): 

1894 registry.certify(collection, [bias2b], Timespan(begin=t1, end=t3)) 

1895 with self.assertRaises(ConflictingDefinitionError): 

1896 registry.certify(collection, [bias2b], Timespan(begin=t1, end=t5)) 

1897 with self.assertRaises(ConflictingDefinitionError): 

1898 registry.certify(collection, [bias2b], Timespan(begin=t1, end=None)) 

1899 with self.assertRaises(ConflictingDefinitionError): 

1900 registry.certify(collection, [bias2b], Timespan(begin=t2, end=t3)) 

1901 with self.assertRaises(ConflictingDefinitionError): 

1902 registry.certify(collection, [bias2b], Timespan(begin=t2, end=t5)) 

1903 with self.assertRaises(ConflictingDefinitionError): 

1904 registry.certify(collection, [bias2b], Timespan(begin=t2, end=None)) 

1905 # We should be able to certify 3a with a range overlapping that window, 

1906 # because it's for a different detector. 

1907 # We'll certify 3a over [t1, t3). 

1908 registry.certify(collection, [bias3a], Timespan(begin=t1, end=t3)) 

1909 # Now we'll certify 2b and 3b together over [t4, ∞). 

1910 registry.certify(collection, [bias2b, bias3b], Timespan(begin=t4, end=None)) 

1911 

1912 # Fetch all associations and check that they are what we expect. 

1913 self.assertCountEqual( 

1914 list( 

1915 registry.queryDatasetAssociations( 

1916 "bias", 

1917 collections=[collection, "imported_g", "imported_r"], 

1918 ) 

1919 ), 

1920 [ 

1921 DatasetAssociation( 

1922 ref=registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"), 

1923 collection="imported_g", 

1924 timespan=None, 

1925 ), 

1926 DatasetAssociation( 

1927 ref=registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

1928 collection="imported_r", 

1929 timespan=None, 

1930 ), 

1931 DatasetAssociation(ref=bias2a, collection="imported_g", timespan=None), 

1932 DatasetAssociation(ref=bias3a, collection="imported_g", timespan=None), 

1933 DatasetAssociation(ref=bias2b, collection="imported_r", timespan=None), 

1934 DatasetAssociation(ref=bias3b, collection="imported_r", timespan=None), 

1935 DatasetAssociation(ref=bias2a, collection=collection, timespan=Timespan(begin=t2, end=t4)), 

1936 DatasetAssociation(ref=bias3a, collection=collection, timespan=Timespan(begin=t1, end=t3)), 

1937 DatasetAssociation(ref=bias2b, collection=collection, timespan=Timespan(begin=t4, end=None)), 

1938 DatasetAssociation(ref=bias3b, collection=collection, timespan=Timespan(begin=t4, end=None)), 

1939 ], 

1940 ) 

1941 

1942 class Ambiguous: 

1943 """Tag class to denote lookups that should be ambiguous.""" 

1944 

1945 pass 

1946 

1947 def assertLookup( 

1948 detector: int, timespan: Timespan, expected: Optional[Union[DatasetRef, Type[Ambiguous]]] 

1949 ) -> None: 

1950 """Local function that asserts that a bias lookup returns the given 

1951 expected result. 

1952 """ 

1953 if expected is Ambiguous: 

1954 with self.assertRaises(RuntimeError): 

1955 registry.findDataset( 

1956 "bias", 

1957 collections=collection, 

1958 instrument="Cam1", 

1959 detector=detector, 

1960 timespan=timespan, 

1961 ) 

1962 else: 

1963 self.assertEqual( 

1964 expected, 

1965 registry.findDataset( 

1966 "bias", 

1967 collections=collection, 

1968 instrument="Cam1", 

1969 detector=detector, 

1970 timespan=timespan, 

1971 ), 

1972 ) 

1973 

1974 # Systematically test lookups against expected results. 

1975 assertLookup(detector=2, timespan=Timespan(None, t1), expected=None) 

1976 assertLookup(detector=2, timespan=Timespan(None, t2), expected=None) 

1977 assertLookup(detector=2, timespan=Timespan(None, t3), expected=bias2a) 

1978 assertLookup(detector=2, timespan=Timespan(None, t4), expected=bias2a) 

1979 assertLookup(detector=2, timespan=Timespan(None, t5), expected=Ambiguous) 

1980 assertLookup(detector=2, timespan=Timespan(None, None), expected=Ambiguous) 

1981 assertLookup(detector=2, timespan=Timespan(t1, t2), expected=None) 

1982 assertLookup(detector=2, timespan=Timespan(t1, t3), expected=bias2a) 

1983 assertLookup(detector=2, timespan=Timespan(t1, t4), expected=bias2a) 

1984 assertLookup(detector=2, timespan=Timespan(t1, t5), expected=Ambiguous) 

1985 assertLookup(detector=2, timespan=Timespan(t1, None), expected=Ambiguous) 

1986 assertLookup(detector=2, timespan=Timespan(t2, t3), expected=bias2a) 

1987 assertLookup(detector=2, timespan=Timespan(t2, t4), expected=bias2a) 

1988 assertLookup(detector=2, timespan=Timespan(t2, t5), expected=Ambiguous) 

1989 assertLookup(detector=2, timespan=Timespan(t2, None), expected=Ambiguous) 

1990 assertLookup(detector=2, timespan=Timespan(t3, t4), expected=bias2a) 

1991 assertLookup(detector=2, timespan=Timespan(t3, t5), expected=Ambiguous) 

1992 assertLookup(detector=2, timespan=Timespan(t3, None), expected=Ambiguous) 

1993 assertLookup(detector=2, timespan=Timespan(t4, t5), expected=bias2b) 

1994 assertLookup(detector=2, timespan=Timespan(t4, None), expected=bias2b) 

1995 assertLookup(detector=2, timespan=Timespan(t5, None), expected=bias2b) 

1996 assertLookup(detector=3, timespan=Timespan(None, t1), expected=None) 

1997 assertLookup(detector=3, timespan=Timespan(None, t2), expected=bias3a) 

1998 assertLookup(detector=3, timespan=Timespan(None, t3), expected=bias3a) 

1999 assertLookup(detector=3, timespan=Timespan(None, t4), expected=bias3a) 

2000 assertLookup(detector=3, timespan=Timespan(None, t5), expected=Ambiguous) 

2001 assertLookup(detector=3, timespan=Timespan(None, None), expected=Ambiguous) 

2002 assertLookup(detector=3, timespan=Timespan(t1, t2), expected=bias3a) 

2003 assertLookup(detector=3, timespan=Timespan(t1, t3), expected=bias3a) 

2004 assertLookup(detector=3, timespan=Timespan(t1, t4), expected=bias3a) 

2005 assertLookup(detector=3, timespan=Timespan(t1, t5), expected=Ambiguous) 

2006 assertLookup(detector=3, timespan=Timespan(t1, None), expected=Ambiguous) 

2007 assertLookup(detector=3, timespan=Timespan(t2, t3), expected=bias3a) 

2008 assertLookup(detector=3, timespan=Timespan(t2, t4), expected=bias3a) 

2009 assertLookup(detector=3, timespan=Timespan(t2, t5), expected=Ambiguous) 

2010 assertLookup(detector=3, timespan=Timespan(t2, None), expected=Ambiguous) 

2011 assertLookup(detector=3, timespan=Timespan(t3, t4), expected=None) 

2012 assertLookup(detector=3, timespan=Timespan(t3, t5), expected=bias3b) 

2013 assertLookup(detector=3, timespan=Timespan(t3, None), expected=bias3b) 

2014 assertLookup(detector=3, timespan=Timespan(t4, t5), expected=bias3b) 

2015 assertLookup(detector=3, timespan=Timespan(t4, None), expected=bias3b) 

2016 assertLookup(detector=3, timespan=Timespan(t5, None), expected=bias3b) 

2017 

2018 # Decertify [t3, t5) for all data IDs, and do test lookups again. 

2019 # This should truncate bias2a to [t2, t3), leave bias3a unchanged at 

2020 # [t1, t3), and truncate bias2b and bias3b to [t5, ∞). 

2021 registry.decertify(collection=collection, datasetType="bias", timespan=Timespan(t3, t5)) 

2022 assertLookup(detector=2, timespan=Timespan(None, t1), expected=None) 

2023 assertLookup(detector=2, timespan=Timespan(None, t2), expected=None) 

2024 assertLookup(detector=2, timespan=Timespan(None, t3), expected=bias2a) 

2025 assertLookup(detector=2, timespan=Timespan(None, t4), expected=bias2a) 

2026 assertLookup(detector=2, timespan=Timespan(None, t5), expected=bias2a) 

2027 assertLookup(detector=2, timespan=Timespan(None, None), expected=Ambiguous) 

2028 assertLookup(detector=2, timespan=Timespan(t1, t2), expected=None) 

2029 assertLookup(detector=2, timespan=Timespan(t1, t3), expected=bias2a) 

2030 assertLookup(detector=2, timespan=Timespan(t1, t4), expected=bias2a) 

2031 assertLookup(detector=2, timespan=Timespan(t1, t5), expected=bias2a) 

2032 assertLookup(detector=2, timespan=Timespan(t1, None), expected=Ambiguous) 

2033 assertLookup(detector=2, timespan=Timespan(t2, t3), expected=bias2a) 

2034 assertLookup(detector=2, timespan=Timespan(t2, t4), expected=bias2a) 

2035 assertLookup(detector=2, timespan=Timespan(t2, t5), expected=bias2a) 

2036 assertLookup(detector=2, timespan=Timespan(t2, None), expected=Ambiguous) 

2037 assertLookup(detector=2, timespan=Timespan(t3, t4), expected=None) 

2038 assertLookup(detector=2, timespan=Timespan(t3, t5), expected=None) 

2039 assertLookup(detector=2, timespan=Timespan(t3, None), expected=bias2b) 

2040 assertLookup(detector=2, timespan=Timespan(t4, t5), expected=None) 

2041 assertLookup(detector=2, timespan=Timespan(t4, None), expected=bias2b) 

2042 assertLookup(detector=2, timespan=Timespan(t5, None), expected=bias2b) 

2043 assertLookup(detector=3, timespan=Timespan(None, t1), expected=None) 

2044 assertLookup(detector=3, timespan=Timespan(None, t2), expected=bias3a) 

2045 assertLookup(detector=3, timespan=Timespan(None, t3), expected=bias3a) 

2046 assertLookup(detector=3, timespan=Timespan(None, t4), expected=bias3a) 

2047 assertLookup(detector=3, timespan=Timespan(None, t5), expected=bias3a) 

2048 assertLookup(detector=3, timespan=Timespan(None, None), expected=Ambiguous) 

2049 assertLookup(detector=3, timespan=Timespan(t1, t2), expected=bias3a) 

2050 assertLookup(detector=3, timespan=Timespan(t1, t3), expected=bias3a) 

2051 assertLookup(detector=3, timespan=Timespan(t1, t4), expected=bias3a) 

2052 assertLookup(detector=3, timespan=Timespan(t1, t5), expected=bias3a) 

2053 assertLookup(detector=3, timespan=Timespan(t1, None), expected=Ambiguous) 

2054 assertLookup(detector=3, timespan=Timespan(t2, t3), expected=bias3a) 

2055 assertLookup(detector=3, timespan=Timespan(t2, t4), expected=bias3a) 

2056 assertLookup(detector=3, timespan=Timespan(t2, t5), expected=bias3a) 

2057 assertLookup(detector=3, timespan=Timespan(t2, None), expected=Ambiguous) 

2058 assertLookup(detector=3, timespan=Timespan(t3, t4), expected=None) 

2059 assertLookup(detector=3, timespan=Timespan(t3, t5), expected=None) 

2060 assertLookup(detector=3, timespan=Timespan(t3, None), expected=bias3b) 

2061 assertLookup(detector=3, timespan=Timespan(t4, t5), expected=None) 

2062 assertLookup(detector=3, timespan=Timespan(t4, None), expected=bias3b) 

2063 assertLookup(detector=3, timespan=Timespan(t5, None), expected=bias3b) 

2064 

2065 # Decertify everything, this time with explicit data IDs, then check 

2066 # that no lookups succeed. 

2067 registry.decertify( 

2068 collection, 

2069 "bias", 

2070 Timespan(None, None), 

2071 dataIds=[ 

2072 dict(instrument="Cam1", detector=2), 

2073 dict(instrument="Cam1", detector=3), 

2074 ], 

2075 ) 

2076 for detector in (2, 3): 

2077 for timespan in allTimespans: 

2078 assertLookup(detector=detector, timespan=timespan, expected=None) 

2079 # Certify bias2a and bias3a over (-∞, ∞), check that all lookups return 

2080 # those. 

2081 registry.certify( 

2082 collection, 

2083 [bias2a, bias3a], 

2084 Timespan(None, None), 

2085 ) 

2086 for timespan in allTimespans: 

2087 assertLookup(detector=2, timespan=timespan, expected=bias2a) 

2088 assertLookup(detector=3, timespan=timespan, expected=bias3a) 

2089 # Decertify just bias2 over [t2, t4). 

2090 # This should split a single certification row into two (and leave the 

2091 # other existing row, for bias3a, alone). 

2092 registry.decertify( 

2093 collection, "bias", Timespan(t2, t4), dataIds=[dict(instrument="Cam1", detector=2)] 

2094 ) 

2095 for timespan in allTimespans: 

2096 assertLookup(detector=3, timespan=timespan, expected=bias3a) 

2097 overlapsBefore = timespan.overlaps(Timespan(None, t2)) 

2098 overlapsAfter = timespan.overlaps(Timespan(t4, None)) 

2099 if overlapsBefore and overlapsAfter: 

2100 expected = Ambiguous 

2101 elif overlapsBefore or overlapsAfter: 

2102 expected = bias2a 

2103 else: 

2104 expected = None 

2105 assertLookup(detector=2, timespan=timespan, expected=expected) 

2106 

2107 def testSkipCalibs(self): 

2108 """Test how queries handle skipping of calibration collections.""" 

2109 registry = self.makeRegistry() 

2110 self.loadData(registry, "base.yaml") 

2111 self.loadData(registry, "datasets.yaml") 

2112 

2113 coll_calib = "Cam1/calibs/default" 

2114 registry.registerCollection(coll_calib, type=CollectionType.CALIBRATION) 

2115 

2116 # Add all biases to the calibration collection. 

2117 # Without this, the logic that prunes dataset subqueries based on 

2118 # datasetType-collection summary information will fire before the logic 

2119 # we want to test below. This is a good thing (it avoids the dreaded 

2120 # NotImplementedError a bit more often) everywhere but here. 

2121 registry.certify(coll_calib, registry.queryDatasets("bias", collections=...), Timespan(None, None)) 

2122 

2123 coll_list = [coll_calib, "imported_g", "imported_r"] 

2124 chain = "Cam1/chain" 

2125 registry.registerCollection(chain, type=CollectionType.CHAINED) 

2126 registry.setCollectionChain(chain, coll_list) 

2127 

2128 # explicit list will raise if findFirst=True or there are temporal 

2129 # dimensions 

2130 with self.assertRaises(NotImplementedError): 

2131 registry.queryDatasets("bias", collections=coll_list, findFirst=True) 

2132 with self.assertRaises(NotImplementedError): 

2133 registry.queryDataIds( 

2134 ["instrument", "detector", "exposure"], datasets="bias", collections=coll_list 

2135 ).count() 

2136 

2137 # chain will skip 

2138 datasets = list(registry.queryDatasets("bias", collections=chain)) 

2139 self.assertGreater(len(datasets), 0) 

2140 

2141 dataIds = list(registry.queryDataIds(["instrument", "detector"], datasets="bias", collections=chain)) 

2142 self.assertGreater(len(dataIds), 0) 

2143 

2144 # glob will skip too 

2145 datasets = list(registry.queryDatasets("bias", collections="*d*")) 

2146 self.assertGreater(len(datasets), 0) 

2147 

2148 # regular expression will skip too 

2149 pattern = re.compile(".*") 

2150 datasets = list(registry.queryDatasets("bias", collections=pattern)) 

2151 self.assertGreater(len(datasets), 0) 

2152 

2153 # ellipsis should work as usual 

2154 datasets = list(registry.queryDatasets("bias", collections=...)) 

2155 self.assertGreater(len(datasets), 0) 

2156 

2157 # few tests with findFirst 

2158 datasets = list(registry.queryDatasets("bias", collections=chain, findFirst=True)) 

2159 self.assertGreater(len(datasets), 0) 

2160 

2161 def testIngestTimeQuery(self): 

2162 

2163 registry = self.makeRegistry() 

2164 self.loadData(registry, "base.yaml") 

2165 dt0 = datetime.utcnow() 

2166 self.loadData(registry, "datasets.yaml") 

2167 dt1 = datetime.utcnow() 

2168 

2169 datasets = list(registry.queryDatasets(..., collections=...)) 

2170 len0 = len(datasets) 

2171 self.assertGreater(len0, 0) 

2172 

2173 where = "ingest_date > T'2000-01-01'" 

2174 datasets = list(registry.queryDatasets(..., collections=..., where=where)) 

2175 len1 = len(datasets) 

2176 self.assertEqual(len0, len1) 

2177 

2178 # no one will ever use this piece of software in 30 years 

2179 where = "ingest_date > T'2050-01-01'" 

2180 datasets = list(registry.queryDatasets(..., collections=..., where=where)) 

2181 len2 = len(datasets) 

2182 self.assertEqual(len2, 0) 

2183 

2184 # Check more exact timing to make sure there is no 37 seconds offset 

2185 # (after fixing DM-30124). SQLite time precision is 1 second, make 

2186 # sure that we don't test with higher precision. 

2187 tests = [ 

2188 # format: (timestamp, operator, expected_len) 

2189 (dt0 - timedelta(seconds=1), ">", len0), 

2190 (dt0 - timedelta(seconds=1), "<", 0), 

2191 (dt1 + timedelta(seconds=1), "<", len0), 

2192 (dt1 + timedelta(seconds=1), ">", 0), 

2193 ] 

2194 for dt, op, expect_len in tests: 

2195 dt_str = dt.isoformat(sep=" ") 

2196 

2197 where = f"ingest_date {op} T'{dt_str}'" 

2198 datasets = list(registry.queryDatasets(..., collections=..., where=where)) 

2199 self.assertEqual(len(datasets), expect_len) 

2200 

2201 # same with bind using datetime or astropy Time 

2202 where = f"ingest_date {op} ingest_time" 

2203 datasets = list( 

2204 registry.queryDatasets(..., collections=..., where=where, bind={"ingest_time": dt}) 

2205 ) 

2206 self.assertEqual(len(datasets), expect_len) 

2207 

2208 dt_astropy = astropy.time.Time(dt, format="datetime") 

2209 datasets = list( 

2210 registry.queryDatasets(..., collections=..., where=where, bind={"ingest_time": dt_astropy}) 

2211 ) 

2212 self.assertEqual(len(datasets), expect_len) 

2213 

2214 def testTimespanQueries(self): 

2215 """Test query expressions involving timespans.""" 

2216 registry = self.makeRegistry() 

2217 self.loadData(registry, "hsc-rc2-subset.yaml") 

2218 # All exposures in the database; mapping from ID to timespan. 

2219 visits = {record.id: record.timespan for record in registry.queryDimensionRecords("visit")} 

2220 # Just those IDs, sorted (which is also temporal sorting, because HSC 

2221 # exposure IDs are monotonically increasing). 

2222 ids = sorted(visits.keys()) 

2223 self.assertGreater(len(ids), 20) 

2224 # Pick some quasi-random indexes into `ids` to play with. 

2225 i1 = int(len(ids) * 0.1) 

2226 i2 = int(len(ids) * 0.3) 

2227 i3 = int(len(ids) * 0.6) 

2228 i4 = int(len(ids) * 0.8) 

2229 # Extract some times from those: just before the beginning of i1 (which 

2230 # should be after the end of the exposure before), exactly the 

2231 # beginning of i2, just after the beginning of i3 (and before its end), 

2232 # and the exact end of i4. 

2233 t1 = visits[ids[i1]].begin - astropy.time.TimeDelta(1.0, format="sec") 

2234 self.assertGreater(t1, visits[ids[i1 - 1]].end) 

2235 t2 = visits[ids[i2]].begin 

2236 t3 = visits[ids[i3]].begin + astropy.time.TimeDelta(1.0, format="sec") 

2237 self.assertLess(t3, visits[ids[i3]].end) 

2238 t4 = visits[ids[i4]].end 

2239 # Make sure those are actually in order. 

2240 self.assertEqual([t1, t2, t3, t4], sorted([t4, t3, t2, t1])) 

2241 

2242 bind = { 

2243 "t1": t1, 

2244 "t2": t2, 

2245 "t3": t3, 

2246 "t4": t4, 

2247 "ts23": Timespan(t2, t3), 

2248 } 

2249 

2250 def query(where): 

2251 """Helper function that queries for visit data IDs and returns 

2252 results as a sorted, deduplicated list of visit IDs. 

2253 """ 

2254 return sorted( 

2255 { 

2256 dataId["visit"] 

2257 for dataId in registry.queryDataIds("visit", instrument="HSC", bind=bind, where=where) 

2258 } 

2259 ) 

2260 

2261 # Try a bunch of timespan queries, mixing up the bounds themselves, 

2262 # where they appear in the expression, and how we get the timespan into 

2263 # the expression. 

2264 

2265 # t1 is before the start of i1, so this should not include i1. 

2266 self.assertEqual(ids[:i1], query("visit.timespan OVERLAPS (null, t1)")) 

2267 # t2 is exactly at the start of i2, but ends are exclusive, so these 

2268 # should not include i2. 

2269 self.assertEqual(ids[i1:i2], query("(t1, t2) OVERLAPS visit.timespan")) 

2270 self.assertEqual(ids[:i2], query("visit.timespan < (t2, t4)")) 

2271 # t3 is in the middle of i3, so this should include i3. 

2272 self.assertEqual(ids[i2 : i3 + 1], query("visit.timespan OVERLAPS ts23")) 

2273 # This one should not include t3 by the same reasoning. 

2274 self.assertEqual(ids[i3 + 1 :], query("visit.timespan > (t1, t3)")) 

2275 # t4 is exactly at the end of i4, so this should include i4. 

2276 self.assertEqual(ids[i3 : i4 + 1], query(f"visit.timespan OVERLAPS (T'{t3.tai.isot}', t4)")) 

2277 # i4's upper bound of t4 is exclusive so this should not include t4. 

2278 self.assertEqual(ids[i4 + 1 :], query("visit.timespan OVERLAPS (t4, NULL)")) 

2279 

2280 # Now some timespan vs. time scalar queries. 

2281 self.assertEqual(ids[:i2], query("visit.timespan < t2")) 

2282 self.assertEqual(ids[:i2], query("t2 > visit.timespan")) 

2283 self.assertEqual(ids[i3 + 1 :], query("visit.timespan > t3")) 

2284 self.assertEqual(ids[i3 + 1 :], query("t3 < visit.timespan")) 

2285 self.assertEqual(ids[i3 : i3 + 1], query("visit.timespan OVERLAPS t3")) 

2286 self.assertEqual(ids[i3 : i3 + 1], query(f"T'{t3.tai.isot}' OVERLAPS visit.timespan")) 

2287 

2288 # Empty timespans should not overlap anything. 

2289 self.assertEqual([], query("visit.timespan OVERLAPS (t3, t2)")) 

2290 

2291 def testCollectionSummaries(self): 

2292 """Test recording and retrieval of collection summaries.""" 

2293 self.maxDiff = None 

2294 registry = self.makeRegistry() 

2295 # Importing datasets from yaml should go through the code path where 

2296 # we update collection summaries as we insert datasets. 

2297 self.loadData(registry, "base.yaml") 

2298 self.loadData(registry, "datasets.yaml") 

2299 flat = registry.getDatasetType("flat") 

2300 expected1 = CollectionSummary.makeEmpty(registry.dimensions) 

2301 expected1.datasetTypes.add(registry.getDatasetType("bias")) 

2302 expected1.datasetTypes.add(flat) 

2303 expected1.dimensions.update_extract( 

2304 DataCoordinate.standardize(instrument="Cam1", universe=registry.dimensions) 

2305 ) 

2306 self.assertEqual(registry.getCollectionSummary("imported_g"), expected1) 

2307 self.assertEqual(registry.getCollectionSummary("imported_r"), expected1) 

2308 # Create a chained collection with both of the imported runs; the 

2309 # summary should be the same, because it's a union with itself. 

2310 chain = "chain" 

2311 registry.registerCollection(chain, CollectionType.CHAINED) 

2312 registry.setCollectionChain(chain, ["imported_r", "imported_g"]) 

2313 self.assertEqual(registry.getCollectionSummary(chain), expected1) 

2314 # Associate flats only into a tagged collection and a calibration 

2315 # collection to check summaries of those. 

2316 tag = "tag" 

2317 registry.registerCollection(tag, CollectionType.TAGGED) 

2318 registry.associate(tag, registry.queryDatasets(flat, collections="imported_g")) 

2319 calibs = "calibs" 

2320 registry.registerCollection(calibs, CollectionType.CALIBRATION) 

2321 registry.certify( 

2322 calibs, registry.queryDatasets(flat, collections="imported_g"), timespan=Timespan(None, None) 

2323 ) 

2324 expected2 = expected1.copy() 

2325 expected2.datasetTypes.discard("bias") 

2326 self.assertEqual(registry.getCollectionSummary(tag), expected2) 

2327 self.assertEqual(registry.getCollectionSummary(calibs), expected2) 

2328 # Explicitly calling Registry.refresh() should load those same 

2329 # summaries, via a totally different code path. 

2330 registry.refresh() 

2331 self.assertEqual(registry.getCollectionSummary("imported_g"), expected1) 

2332 self.assertEqual(registry.getCollectionSummary("imported_r"), expected1) 

2333 self.assertEqual(registry.getCollectionSummary(tag), expected2) 

2334 self.assertEqual(registry.getCollectionSummary(calibs), expected2) 

2335 

2336 def testUnrelatedDimensionQueries(self): 

2337 """Test that WHERE expressions in queries can reference dimensions that 

2338 are not in the result set. 

2339 """ 

2340 registry = self.makeRegistry() 

2341 # There is no data to back this query, but it should still return 

2342 # zero records instead of raising. 

2343 self.assertFalse( 

2344 set( 

2345 registry.queryDataIds( 

2346 ["visit", "detector"], where="instrument='Cam1' AND skymap='not_here' AND tract=0" 

2347 ) 

2348 ), 

2349 ) 

2350 

2351 def testBindInQueryDatasets(self): 

2352 """Test that the bind parameter is correctly forwarded in 

2353 queryDatasets recursion. 

2354 """ 

2355 registry = self.makeRegistry() 

2356 # Importing datasets from yaml should go through the code path where 

2357 # we update collection summaries as we insert datasets. 

2358 self.loadData(registry, "base.yaml") 

2359 self.loadData(registry, "datasets.yaml") 

2360 self.assertEqual( 

2361 set(registry.queryDatasets("flat", band="r", collections=...)), 

2362 set(registry.queryDatasets("flat", where="band=my_band", bind={"my_band": "r"}, collections=...)), 

2363 ) 

2364 

2365 def testQueryResultSummaries(self): 

2366 """Test summary methods like `count`, `any`, and `explain_no_results` 

2367 on `DataCoordinateQueryResults` and `DatasetQueryResults` 

2368 """ 

2369 registry = self.makeRegistry() 

2370 self.loadData(registry, "base.yaml") 

2371 self.loadData(registry, "datasets.yaml") 

2372 self.loadData(registry, "spatial.yaml") 

2373 # Default test dataset has two collections, each with both flats and 

2374 # biases. Add a new collection with only biases. 

2375 registry.registerCollection("biases", CollectionType.TAGGED) 

2376 registry.associate("biases", registry.queryDatasets("bias", collections=["imported_g"])) 

2377 # First query yields two results, and involves no postprocessing. 

2378 query1 = registry.queryDataIds(["physical_filter"], band="r") 

2379 self.assertTrue(query1.any(execute=False, exact=False)) 

2380 self.assertTrue(query1.any(execute=True, exact=False)) 

2381 self.assertTrue(query1.any(execute=True, exact=True)) 

2382 self.assertEqual(query1.count(exact=False), 2) 

2383 self.assertEqual(query1.count(exact=True), 2) 

2384 self.assertFalse(list(query1.explain_no_results())) 

2385 # Second query should yield no results, but this isn't detectable 

2386 # unless we actually run a query. 

2387 query2 = registry.queryDataIds(["physical_filter"], band="h") 

2388 self.assertTrue(query2.any(execute=False, exact=False)) 

2389 self.assertFalse(query2.any(execute=True, exact=False)) 

2390 self.assertFalse(query2.any(execute=True, exact=True)) 

2391 self.assertEqual(query2.count(exact=False), 0) 

2392 self.assertEqual(query2.count(exact=True), 0) 

2393 self.assertFalse(list(query2.explain_no_results())) 

2394 # These queries yield no results due to various problems that can be 

2395 # spotted prior to execution, yielding helpful diagnostics. 

2396 for query, snippets in [ 

2397 ( 

2398 # Dataset type name doesn't match any existing dataset types. 

2399 registry.queryDatasets("nonexistent", collections=...), 

2400 ["nonexistent"], 

2401 ), 

2402 ( 

2403 # Dataset type name doesn't match any existing dataset types. 

2404 registry.queryDataIds(["detector"], datasets=["nonexistent"], collections=...), 

2405 ["nonexistent"], 

2406 ), 

2407 ( 

2408 # Dataset type object isn't registered. 

2409 registry.queryDatasets( 

2410 DatasetType( 

2411 "nonexistent", 

2412 dimensions=["instrument"], 

2413 universe=registry.dimensions, 

2414 storageClass="Image", 

2415 ), 

2416 collections=..., 

2417 ), 

2418 ["nonexistent"], 

2419 ), 

2420 ( 

2421 # No datasets of this type in this collection. 

2422 registry.queryDatasets("flat", collections=["biases"]), 

2423 ["flat", "biases"], 

2424 ), 

2425 ( 

2426 # No collections matching at all. 

2427 registry.queryDatasets("flat", collections=re.compile("potato.+")), 

2428 ["potato"], 

2429 ), 

2430 ]: 

2431 

2432 self.assertFalse(query.any(execute=False, exact=False)) 

2433 self.assertFalse(query.any(execute=True, exact=False)) 

2434 self.assertFalse(query.any(execute=True, exact=True)) 

2435 self.assertEqual(query.count(exact=False), 0) 

2436 self.assertEqual(query.count(exact=True), 0) 

2437 messages = list(query.explain_no_results()) 

2438 self.assertTrue(messages) 

2439 # Want all expected snippets to appear in at least one message. 

2440 self.assertTrue( 

2441 any( 

2442 all(snippet in message for snippet in snippets) for message in query.explain_no_results() 

2443 ), 

2444 messages, 

2445 ) 

2446 

2447 # These queries yield no results due to problems that can be identified 

2448 # by cheap follow-up queries, yielding helpful diagnostics. 

2449 for query, snippets in [ 

2450 ( 

2451 # No records for one of the involved dimensions. 

2452 registry.queryDataIds(["subfilter"]), 

2453 ["dimension records", "subfilter"], 

2454 ), 

2455 ]: 

2456 self.assertFalse(query.any(execute=True, exact=False)) 

2457 self.assertFalse(query.any(execute=True, exact=True)) 

2458 self.assertEqual(query.count(exact=True), 0) 

2459 messages = list(query.explain_no_results()) 

2460 self.assertTrue(messages) 

2461 # Want all expected snippets to appear in at least one message. 

2462 self.assertTrue( 

2463 any( 

2464 all(snippet in message for snippet in snippets) for message in query.explain_no_results() 

2465 ), 

2466 messages, 

2467 ) 

2468 

2469 # This query yields four overlaps in the database, but one is filtered 

2470 # out in postprocessing. The count queries aren't accurate because 

2471 # they don't account for duplication that happens due to an internal 

2472 # join against commonSkyPix. 

2473 query3 = registry.queryDataIds(["visit", "tract"], instrument="Cam1", skymap="SkyMap1") 

2474 self.assertEqual( 

2475 { 

2476 DataCoordinate.standardize( 

2477 instrument="Cam1", 

2478 skymap="SkyMap1", 

2479 visit=v, 

2480 tract=t, 

2481 universe=registry.dimensions, 

2482 ) 

2483 for v, t in [(1, 0), (2, 0), (2, 1)] 

2484 }, 

2485 set(query3), 

2486 ) 

2487 self.assertTrue(query3.any(execute=False, exact=False)) 

2488 self.assertTrue(query3.any(execute=True, exact=False)) 

2489 self.assertTrue(query3.any(execute=True, exact=True)) 

2490 self.assertGreaterEqual(query3.count(exact=False), 4) 

2491 self.assertGreaterEqual(query3.count(exact=True), 3) 

2492 self.assertFalse(list(query3.explain_no_results())) 

2493 # This query yields overlaps in the database, but all are filtered 

2494 # out in postprocessing. The count queries again aren't very useful. 

2495 # We have to use `where=` here to avoid an optimization that 

2496 # (currently) skips the spatial postprocess-filtering because it 

2497 # recognizes that no spatial join is necessary. That's not ideal, but 

2498 # fixing it is out of scope for this ticket. 

2499 query4 = registry.queryDataIds( 

2500 ["visit", "tract"], 

2501 instrument="Cam1", 

2502 skymap="SkyMap1", 

2503 where="visit=1 AND detector=1 AND tract=0 AND patch=4", 

2504 ) 

2505 self.assertFalse(set(query4)) 

2506 self.assertTrue(query4.any(execute=False, exact=False)) 

2507 self.assertTrue(query4.any(execute=True, exact=False)) 

2508 self.assertFalse(query4.any(execute=True, exact=True)) 

2509 self.assertGreaterEqual(query4.count(exact=False), 1) 

2510 self.assertEqual(query4.count(exact=True), 0) 

2511 messages = list(query4.explain_no_results()) 

2512 self.assertTrue(messages) 

2513 self.assertTrue(any("regions did not overlap" in message for message in messages)) 

2514 

2515 def testQueryDataIdsOrderBy(self): 

2516 """Test order_by and limit on result returned by queryDataIds().""" 

2517 registry = self.makeRegistry() 

2518 self.loadData(registry, "base.yaml") 

2519 self.loadData(registry, "datasets.yaml") 

2520 self.loadData(registry, "spatial.yaml") 

2521 

2522 def do_query(dimensions=("visit", "tract"), datasets=None, collections=None): 

2523 return registry.queryDataIds( 

2524 dimensions, datasets=datasets, collections=collections, instrument="Cam1", skymap="SkyMap1" 

2525 ) 

2526 

2527 Test = namedtuple( 

2528 "testQueryDataIdsOrderByTest", 

2529 ("order_by", "keys", "result", "limit", "datasets", "collections"), 

2530 defaults=(None, None, None), 

2531 ) 

2532 

2533 test_data = ( 

2534 Test("tract,visit", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))), 

2535 Test("-tract,visit", "tract,visit", ((1, 2), (1, 2), (0, 1), (0, 1), (0, 2), (0, 2))), 

2536 Test("tract,-visit", "tract,visit", ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2))), 

2537 Test("-tract,-visit", "tract,visit", ((1, 2), (1, 2), (0, 2), (0, 2), (0, 1), (0, 1))), 

2538 Test( 

2539 "tract.id,visit.id", 

2540 "tract,visit", 

2541 ((0, 1), (0, 1), (0, 2)), 

2542 limit=(3,), 

2543 ), 

2544 Test("-tract,-visit", "tract,visit", ((1, 2), (1, 2), (0, 2)), limit=(3,)), 

2545 Test("tract,visit", "tract,visit", ((0, 2), (1, 2), (1, 2)), limit=(3, 3)), 

2546 Test("-tract,-visit", "tract,visit", ((0, 1),), limit=(3, 5)), 

2547 Test( 

2548 "tract,visit.exposure_time", "tract,visit", ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2)) 

2549 ), 

2550 Test( 

2551 "-tract,-visit.exposure_time", "tract,visit", ((1, 2), (1, 2), (0, 1), (0, 1), (0, 2), (0, 2)) 

2552 ), 

2553 Test("tract,-exposure_time", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))), 

2554 Test("tract,visit.name", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))), 

2555 Test( 

2556 "tract,-timespan.begin,timespan.end", 

2557 "tract,visit", 

2558 ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2)), 

2559 ), 

2560 Test("visit.day_obs,exposure.day_obs", "visit,exposure", ()), 

2561 Test("visit.timespan.begin,-exposure.timespan.begin", "visit,exposure", ()), 

2562 Test( 

2563 "tract,detector", 

2564 "tract,detector", 

2565 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)), 

2566 datasets="flat", 

2567 collections="imported_r", 

2568 ), 

2569 Test( 

2570 "tract,detector.full_name", 

2571 "tract,detector", 

2572 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)), 

2573 datasets="flat", 

2574 collections="imported_r", 

2575 ), 

2576 Test( 

2577 "tract,detector.raft,detector.name_in_raft", 

2578 "tract,detector", 

2579 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)), 

2580 datasets="flat", 

2581 collections="imported_r", 

2582 ), 

2583 ) 

2584 

2585 for test in test_data: 

2586 order_by = test.order_by.split(",") 

2587 keys = test.keys.split(",") 

2588 query = do_query(keys, test.datasets, test.collections).order_by(*order_by) 

2589 if test.limit is not None: 

2590 query = query.limit(*test.limit) 

2591 dataIds = tuple(tuple(dataId[k] for k in keys) for dataId in query) 

2592 self.assertEqual(dataIds, test.result) 

2593 

2594 # and materialize 

2595 query = do_query(keys).order_by(*order_by) 

2596 if test.limit is not None: 

2597 query = query.limit(*test.limit) 

2598 with query.materialize() as materialized: 

2599 dataIds = tuple(tuple(dataId[k] for k in keys) for dataId in materialized) 

2600 self.assertEqual(dataIds, test.result) 

2601 

2602 # errors in a name 

2603 for order_by in ("", "-"): 

2604 with self.assertRaisesRegex(ValueError, "Empty dimension name in ORDER BY"): 

2605 list(do_query().order_by(order_by)) 

2606 

2607 for order_by in ("undimension.name", "-undimension.name"): 

2608 with self.assertRaisesRegex(ValueError, "Unknown dimension element name 'undimension'"): 

2609 list(do_query().order_by(order_by)) 

2610 

2611 for order_by in ("attract", "-attract"): 

2612 with self.assertRaisesRegex(ValueError, "Metadata 'attract' cannot be found in any dimension"): 

2613 list(do_query().order_by(order_by)) 

2614 

2615 with self.assertRaisesRegex(ValueError, "Metadata 'exposure_time' exists in more than one dimension"): 

2616 list(do_query(("exposure", "visit")).order_by("exposure_time")) 

2617 

2618 with self.assertRaisesRegex(ValueError, "Timespan exists in more than one dimesion"): 

2619 list(do_query(("exposure", "visit")).order_by("timespan.begin")) 

2620 

2621 with self.assertRaisesRegex( 

2622 ValueError, "Cannot find any temporal dimension element for 'timespan.begin'" 

2623 ): 

2624 list(do_query(("tract")).order_by("timespan.begin")) 

2625 

2626 with self.assertRaisesRegex(ValueError, "Cannot use 'timespan.begin' with non-temporal element"): 

2627 list(do_query(("tract")).order_by("tract.timespan.begin")) 

2628 

2629 with self.assertRaisesRegex(ValueError, "Field 'name' does not exist in 'tract'."): 

2630 list(do_query(("tract")).order_by("tract.name")) 

2631 

2632 def testQueryDimensionRecordsOrderBy(self): 

2633 """Test order_by and limit on result returned by 

2634 queryDimensionRecords(). 

2635 """ 

2636 registry = self.makeRegistry() 

2637 self.loadData(registry, "base.yaml") 

2638 self.loadData(registry, "datasets.yaml") 

2639 self.loadData(registry, "spatial.yaml") 

2640 

2641 def do_query(element, datasets=None, collections=None): 

2642 return registry.queryDimensionRecords( 

2643 element, instrument="Cam1", datasets=datasets, collections=collections 

2644 ) 

2645 

2646 query = do_query("detector") 

2647 self.assertEqual(len(list(query)), 4) 

2648 

2649 Test = namedtuple( 

2650 "testQueryDataIdsOrderByTest", 

2651 ("element", "order_by", "result", "limit", "datasets", "collections"), 

2652 defaults=(None, None, None), 

2653 ) 

2654 

2655 test_data = ( 

2656 Test("detector", "detector", (1, 2, 3, 4)), 

2657 Test("detector", "-detector", (4, 3, 2, 1)), 

2658 Test("detector", "raft,-name_in_raft", (2, 1, 4, 3)), 

2659 Test("detector", "-detector.purpose", (4,), limit=(1,)), 

2660 Test("detector", "-purpose,detector.raft,name_in_raft", (2, 3), limit=(2, 2)), 

2661 Test("visit", "visit", (1, 2)), 

2662 Test("visit", "-visit.id", (2, 1)), 

2663 Test("visit", "zenith_angle", (1, 2)), 

2664 Test("visit", "-visit.name", (2, 1)), 

2665 Test("visit", "day_obs,-timespan.begin", (2, 1)), 

2666 ) 

2667 

2668 for test in test_data: 

2669 order_by = test.order_by.split(",") 

2670 query = do_query(test.element).order_by(*order_by) 

2671 if test.limit is not None: 

2672 query = query.limit(*test.limit) 

2673 dataIds = tuple(rec.id for rec in query) 

2674 self.assertEqual(dataIds, test.result) 

2675 

2676 # errors in a name 

2677 for order_by in ("", "-"): 

2678 with self.assertRaisesRegex(ValueError, "Empty dimension name in ORDER BY"): 

2679 list(do_query("detector").order_by(order_by)) 

2680 

2681 for order_by in ("undimension.name", "-undimension.name"): 

2682 with self.assertRaisesRegex(ValueError, "Element name mismatch: 'undimension'"): 

2683 list(do_query("detector").order_by(order_by)) 

2684 

2685 for order_by in ("attract", "-attract"): 

2686 with self.assertRaisesRegex(ValueError, "Field 'attract' does not exist in 'detector'."): 

2687 list(do_query("detector").order_by(order_by)) 

2688 

2689 def testDatasetConstrainedDimensionRecordQueries(self): 

2690 """Test that queryDimensionRecords works even when given a dataset 

2691 constraint whose dimensions extend beyond the requested dimension 

2692 element's. 

2693 """ 

2694 registry = self.makeRegistry() 

2695 self.loadData(registry, "base.yaml") 

2696 self.loadData(registry, "datasets.yaml") 

2697 # Query for physical_filter dimension records, using a dataset that 

2698 # has both physical_filter and dataset dimensions. 

2699 records = registry.queryDimensionRecords( 

2700 "physical_filter", 

2701 datasets=["flat"], 

2702 collections="imported_r", 

2703 ) 

2704 self.assertEqual({record.name for record in records}, {"Cam1-R1", "Cam1-R2"})