Coverage for python / lsst / daf / butler / registry / tests / _registry.py: 6%

1699 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-04-28 08:36 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27from __future__ import annotations 

28 

29from ... import ddl 

30 

31__all__ = ["RegistryTests"] 

32 

33import contextlib 

34import datetime 

35import itertools 

36import re 

37import time 

38import unittest 

39import uuid 

40from abc import ABC, abstractmethod 

41from collections import defaultdict, namedtuple 

42from collections.abc import Callable, Iterator 

43from concurrent.futures import ThreadPoolExecutor 

44from contextlib import ExitStack 

45from datetime import timedelta 

46from threading import Barrier 

47from typing import TypeVar 

48 

49import astropy.time 

50import sqlalchemy 

51 

52try: 

53 import numpy as np 

54except ImportError: 

55 np = None 

56 

57import lsst.sphgeom 

58 

59from ... import Butler 

60from ..._collection_type import CollectionType 

61from ..._dataset_association import DatasetAssociation 

62from ..._dataset_ref import DatasetIdFactory, DatasetIdGenEnum, DatasetRef 

63from ..._dataset_type import DatasetType 

64from ..._exceptions import ( 

65 CalibrationLookupError, 

66 CollectionTypeError, 

67 DataIdValueError, 

68 InconsistentDataIdError, 

69 InvalidQueryError, 

70 MissingCollectionError, 

71 MissingDatasetTypeError, 

72) 

73from ..._exceptions_legacy import DatasetTypeError 

74from ..._storage_class import StorageClass 

75from ..._timespan import Timespan 

76from ...dimensions import DataCoordinate, DataCoordinateSet, DimensionUniverse, SkyPixDimension 

77from ...direct_butler import DirectButler 

78from .._collection_summary import CollectionSummary 

79from .._config import RegistryConfig 

80from .._exceptions import ( 

81 ArgumentError, 

82 CollectionError, 

83 ConflictingDefinitionError, 

84 DatasetTypeExpressionError, 

85 NoDefaultCollectionError, 

86 OrphanedRecordError, 

87) 

88from ..interfaces import ButlerAttributeExistsError, ReadOnlyDatabaseError 

89from ..queries import ParentDatasetQueryResults 

90from ..sql_registry import SqlRegistry 

91 

92_T = TypeVar("_T") 

93 

94 

95class RegistryTests(ABC): 

96 """Generic tests for the `SqlRegistry` class that can be subclassed to 

97 generate tests for different configurations. 

98 """ 

99 

100 collectionsManager: str | None = None 

101 """Name of the collections manager class, if subclass provides value for 

102 this member then it overrides name specified in default configuration 

103 (`str`). 

104 """ 

105 

106 datasetsManager: str | dict[str, str] | None = None 

107 """Name or configuration dictionary of the datasets manager class, if 

108 subclass provides value for this member then it overrides name specified 

109 in default configuration (`str` or `dict`). 

110 """ 

111 

112 supportsCollectionRegex: bool = False 

113 """True if the registry class being tested supports regex searches for 

114 collections.""" 

115 

116 def makeRegistryConfig(self) -> RegistryConfig: 

117 """Create RegistryConfig used to create a registry. 

118 

119 This method should be called by a subclass from `makeRegistry`. 

120 Returned instance will be pre-configured based on the values of class 

121 members, and default-configured for all other parameters. Subclasses 

122 that need default configuration should just instantiate 

123 `RegistryConfig` directly. 

124 """ 

125 config = RegistryConfig() 

126 if self.collectionsManager: 

127 config["managers", "collections"] = self.collectionsManager 

128 if self.datasetsManager: 

129 config["managers", "datasets"] = self.datasetsManager 

130 return config 

131 

132 @abstractmethod 

133 def make_butler(self, registry_config: RegistryConfig | None = None) -> Butler: 

134 """Return the butler to be tested. 

135 

136 Parameters 

137 ---------- 

138 registry_config : `RegistryConfig`, optional 

139 Registry configuration used when instantiating the Butler. 

140 

141 Returns 

142 ------- 

143 butler : `~lsst.daf.butler.Butler` 

144 The butler with a registry to be tested. 

145 """ 

146 raise NotImplementedError() 

147 

148 def load_data(self, butler: Butler, *filenames: str) -> None: 

149 """Load registry test data from 

150 ``resource://lsst.daf.butler/tests/registry_data/<filename>``, 

151 which should be a YAML import/export file. 

152 

153 Parameters 

154 ---------- 

155 butler : `Butler` 

156 The butler to load into. 

157 *filenames : `str` 

158 The names of the files to load. 

159 """ 

160 for filename in filenames: 

161 butler.import_( 

162 filename=f"resource://lsst.daf.butler/tests/registry_data/{filename}", without_datastore=True 

163 ) 

164 

165 def checkQueryResults(self, results, expected): 

166 """Check that a query results object contains expected values. 

167 

168 Parameters 

169 ---------- 

170 results : `DataCoordinateQueryResults` or `DatasetQueryResults` 

171 A lazy-evaluation query results object. 

172 expected : `list` 

173 A list of `DataCoordinate` o `DatasetRef` objects that should be 

174 equal to results of the query, aside from ordering. 

175 """ 

176 self.assertCountEqual(list(results), expected) 

177 self.assertEqual(results.count(), len(expected)) 

178 if expected: 

179 self.assertTrue(results.any()) 

180 else: 

181 self.assertFalse(results.any()) 

182 

183 def testOpaque(self): 

184 """Tests for `SqlRegistry.registerOpaqueTable`, 

185 `SqlRegistry.insertOpaqueData`, `SqlRegistry.fetchOpaqueData`, and 

186 `SqlRegistry.deleteOpaqueData`. 

187 """ 

188 butler = self.make_butler() 

189 registry = butler._registry 

190 table = "opaque_table_for_testing" 

191 registry.registerOpaqueTable( 

192 table, 

193 spec=ddl.TableSpec( 

194 fields=[ 

195 ddl.FieldSpec("id", dtype=sqlalchemy.BigInteger, primaryKey=True), 

196 ddl.FieldSpec("name", dtype=sqlalchemy.String, length=16, nullable=False), 

197 ddl.FieldSpec("count", dtype=sqlalchemy.SmallInteger, nullable=True), 

198 ], 

199 ), 

200 ) 

201 rows = [ 

202 {"id": 1, "name": "one", "count": None}, 

203 {"id": 2, "name": "two", "count": 5}, 

204 {"id": 3, "name": "three", "count": 6}, 

205 ] 

206 registry.insertOpaqueData(table, *rows) 

207 self.assertCountEqual(rows, list(registry.fetchOpaqueData(table))) 

208 self.assertEqual(rows[0:1], list(registry.fetchOpaqueData(table, id=1))) 

209 self.assertEqual(rows[1:2], list(registry.fetchOpaqueData(table, name="two"))) 

210 self.assertEqual(rows[0:1], list(registry.fetchOpaqueData(table, id=(1, 3), name=("one", "two")))) 

211 self.assertEqual(rows, list(registry.fetchOpaqueData(table, id=(1, 2, 3)))) 

212 # Test very long IN clause which exceeds sqlite limit on number of 

213 # parameters. SQLite says the limit is 32k but it looks like it is 

214 # much higher. 

215 self.assertEqual(rows, list(registry.fetchOpaqueData(table, id=list(range(300_000))))) 

216 # Two IN clauses, each longer than 1k batch size, first with 

217 # duplicates, second has matching elements in different batches (after 

218 # sorting). 

219 self.assertEqual( 

220 rows[0:2], 

221 list( 

222 registry.fetchOpaqueData( 

223 table, 

224 id=list(range(1000)) + list(range(100, 0, -1)), 

225 name=["one"] + [f"q{i}" for i in range(2200)] + ["two"], 

226 ) 

227 ), 

228 ) 

229 self.assertEqual([], list(registry.fetchOpaqueData(table, id=1, name="two"))) 

230 registry.deleteOpaqueData(table, id=3) 

231 self.assertCountEqual(rows[:2], list(registry.fetchOpaqueData(table))) 

232 registry.deleteOpaqueData(table) 

233 self.assertEqual([], list(registry.fetchOpaqueData(table))) 

234 

235 def testDatasetType(self): 

236 """Tests for `SqlRegistry.registerDatasetType` and 

237 `SqlRegistry.getDatasetType`. 

238 """ 

239 butler = self.make_butler() 

240 registry = butler.registry 

241 # Check valid insert 

242 datasetTypeName = "test" 

243 storageClass = StorageClass("testDatasetType") 

244 registry.storageClasses.registerStorageClass(storageClass) 

245 dimensions = registry.dimensions.conform(("instrument", "visit")) 

246 differentDimensions = registry.dimensions.conform(("instrument", "patch")) 

247 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

248 # Inserting for the first time should return True 

249 self.assertTrue(registry.registerDatasetType(inDatasetType)) 

250 outDatasetType1 = registry.getDatasetType(datasetTypeName) 

251 self.assertEqual(outDatasetType1, inDatasetType) 

252 

253 # Re-inserting should work 

254 self.assertFalse(registry.registerDatasetType(inDatasetType)) 

255 # Except when they are not identical 

256 with self.assertRaises(ConflictingDefinitionError): 

257 nonIdenticalDatasetType = DatasetType(datasetTypeName, differentDimensions, storageClass) 

258 registry.registerDatasetType(nonIdenticalDatasetType) 

259 

260 # Template can be None 

261 datasetTypeName = "testNoneTemplate" 

262 storageClass = StorageClass("testDatasetType2") 

263 registry.storageClasses.registerStorageClass(storageClass) 

264 dimensions = registry.dimensions.conform(("instrument", "visit")) 

265 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

266 registry.registerDatasetType(inDatasetType) 

267 outDatasetType2 = registry.getDatasetType(datasetTypeName) 

268 self.assertEqual(outDatasetType2, inDatasetType) 

269 

270 allTypes = set(registry.queryDatasetTypes()) 

271 self.assertEqual(allTypes, {outDatasetType1, outDatasetType2}) 

272 

273 # Test some basic queryDatasetTypes functionality 

274 missing: list[str] = [] 

275 types = registry.queryDatasetTypes(["te*", "notarealdatasettype"], missing=missing) 

276 self.assertCountEqual([dt.name for dt in types], ["test", "testNoneTemplate"]) 

277 self.assertEqual(missing, ["notarealdatasettype"]) 

278 

279 # Trying to register a dataset type with different universe version or 

280 # namespace will raise. 

281 wrong_universes = (DimensionUniverse(version=-1), DimensionUniverse(namespace="🔭")) 

282 for universe in wrong_universes: 

283 storageClass = StorageClass("testDatasetType") 

284 dataset_type = DatasetType( 

285 "wrong_universe", ("instrument", "visit"), storageClass, universe=universe 

286 ) 

287 with self.assertRaisesRegex(ValueError, "Incompatible dimension universe versions"): 

288 registry.registerDatasetType(dataset_type) 

289 

290 def testDatasetTypeCache(self): 

291 """Test for dataset type cache update logic after a cache miss.""" 

292 butler1 = self.make_butler() 

293 butler2 = butler1.clone() 

294 self.load_data(butler1, "base.yaml") 

295 

296 # Trigger full cache load. 

297 butler2.get_dataset_type("flat") 

298 # Have an external process register a dataset type. 

299 butler1.registry.registerDatasetType( 

300 DatasetType("test_type", ["instrument"], "int", universe=butler1.dimensions) 

301 ) 

302 # Try to read the new dataset type -- this is a cache miss that 

303 # triggers fetch of a single dataset type. 

304 dt = butler2.get_dataset_type("test_type") 

305 self.assertEqual(dt.name, "test_type") 

306 self.assertEqual(list(dt.dimensions.names), ["instrument"]) 

307 # Read it again -- this time it should pull from the cache. 

308 dt = butler2.get_dataset_type("test_type") 

309 self.assertEqual(dt.name, "test_type") 

310 self.assertEqual(list(dt.dimensions.names), ["instrument"]) 

311 # Do a query that uses the dataset type's tags table. 

312 self.assertEqual( 

313 butler2.query_datasets("test_type", collections="*", find_first=False, explain=False), [] 

314 ) 

315 

316 def testDimensions(self): 

317 """Tests for `SqlRegistry.insertDimensionData`, 

318 `SqlRegistry.syncDimensionData`, and `SqlRegistry.expandDataId`. 

319 """ 

320 butler = self.make_butler() 

321 registry = butler.registry 

322 dimensionName = "instrument" 

323 dimension = registry.dimensions[dimensionName] 

324 dimensionValue = { 

325 "name": "DummyCam", 

326 "visit_max": 10, 

327 "visit_system": 0, 

328 "exposure_max": 10, 

329 "detector_max": 2, 

330 "class_name": "lsst.pipe.base.Instrument", 

331 } 

332 registry.insertDimensionData(dimensionName, dimensionValue) 

333 # Inserting the same value twice should fail 

334 with self.assertRaises(sqlalchemy.exc.IntegrityError): 

335 registry.insertDimensionData(dimensionName, dimensionValue) 

336 # expandDataId should retrieve the record we just inserted 

337 self.assertEqual( 

338 registry.expandDataId(instrument="DummyCam", dimensions=dimension.minimal_group) 

339 .records[dimensionName] 

340 .toDict(), 

341 dimensionValue, 

342 ) 

343 # expandDataId should raise if there is no record with the given ID. 

344 with self.assertRaises(DataIdValueError): 

345 registry.expandDataId({"instrument": "Unknown"}, dimensions=dimension.minimal_group) 

346 # band doesn't have a table; insert should fail. 

347 with self.assertRaises(TypeError): 

348 registry.insertDimensionData("band", {"band": "i"}) 

349 dimensionName2 = "physical_filter" 

350 dimension2 = registry.dimensions[dimensionName2] 

351 dimensionValue2 = {"name": "DummyCam_i", "band": "i"} 

352 # Missing required dependency ("instrument") should fail 

353 with self.assertRaises(KeyError): 

354 registry.insertDimensionData(dimensionName2, dimensionValue2) 

355 # Adding required dependency should fix the failure 

356 dimensionValue2["instrument"] = "DummyCam" 

357 registry.insertDimensionData(dimensionName2, dimensionValue2) 

358 # expandDataId should retrieve the record we just inserted. 

359 self.assertEqual( 

360 registry.expandDataId( 

361 instrument="DummyCam", physical_filter="DummyCam_i", dimensions=dimension2.minimal_group 

362 ) 

363 .records[dimensionName2] 

364 .toDict(), 

365 dimensionValue2, 

366 ) 

367 # Use syncDimensionData to insert a new record successfully. 

368 dimensionName3 = "detector" 

369 dimensionValue3 = { 

370 "instrument": "DummyCam", 

371 "id": 1, 

372 "full_name": "one", 

373 "name_in_raft": "zero", 

374 "purpose": "SCIENCE", 

375 } 

376 self.assertTrue(registry.syncDimensionData(dimensionName3, dimensionValue3)) 

377 # Sync that again. Note that one field ("raft") is NULL, and that 

378 # should be okay. 

379 self.assertFalse(registry.syncDimensionData(dimensionName3, dimensionValue3)) 

380 # Now try that sync with the same primary key but a different value. 

381 # This should fail. 

382 with self.assertRaises(ConflictingDefinitionError): 

383 registry.syncDimensionData( 

384 dimensionName3, 

385 { 

386 "instrument": "DummyCam", 

387 "id": 1, 

388 "full_name": "one", 

389 "name_in_raft": "four", 

390 "purpose": "SCIENCE", 

391 }, 

392 ) 

393 

394 @unittest.skipIf(np is None, "numpy not available.") 

395 def testNumpyDataId(self): 

396 """Test that we can use a numpy int in a dataId.""" 

397 butler = self.make_butler() 

398 registry = butler.registry 

399 dimensionEntries = [ 

400 ("instrument", {"instrument": "DummyCam"}), 

401 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}), 

402 ("day_obs", {"instrument": "DummyCam", "id": 20250101}), 

403 # Using an np.int64 here fails unless Records.fromDict is also 

404 # patched to look for numbers.Integral 

405 ( 

406 "visit", 

407 { 

408 "instrument": "DummyCam", 

409 "id": 42, 

410 "name": "fortytwo", 

411 "physical_filter": "d-r", 

412 "day_obs": 20250101, 

413 }, 

414 ), 

415 ] 

416 for args in dimensionEntries: 

417 registry.insertDimensionData(*args) 

418 

419 # Try a normal integer and something that looks like an int but 

420 # is not. 

421 for visit_id in (42, np.int64(42)): 

422 with self.subTest(visit_id=repr(visit_id), id_type=type(visit_id).__name__): 

423 expanded = registry.expandDataId({"instrument": "DummyCam", "visit": visit_id}) 

424 self.assertEqual(expanded["visit"], int(visit_id)) 

425 self.assertIsInstance(expanded["visit"], int) 

426 

427 def testDataIdRelationships(self): 

428 """Test that `SqlRegistry.expandDataId` raises an exception when the 

429 given keys are inconsistent. 

430 """ 

431 butler = self.make_butler() 

432 self.load_data(butler, "base.yaml") 

433 registry = butler.registry 

434 # Insert a few more dimension records for the next test. 

435 registry.insertDimensionData( 

436 "day_obs", 

437 {"instrument": "Cam1", "id": 20250101}, 

438 ) 

439 registry.insertDimensionData( 

440 "group", 

441 {"instrument": "Cam1", "name": "group1"}, 

442 ) 

443 registry.insertDimensionData( 

444 "exposure", 

445 { 

446 "instrument": "Cam1", 

447 "id": 1, 

448 "obs_id": "one", 

449 "physical_filter": "Cam1-G", 

450 "group": "group1", 

451 "day_obs": 20250101, 

452 }, 

453 ) 

454 registry.insertDimensionData( 

455 "group", 

456 {"instrument": "Cam1", "name": "group2"}, 

457 ) 

458 registry.insertDimensionData( 

459 "exposure", 

460 { 

461 "instrument": "Cam1", 

462 "id": 2, 

463 "obs_id": "two", 

464 "physical_filter": "Cam1-G", 

465 "group": "group2", 

466 "day_obs": 20250101, 

467 }, 

468 ) 

469 registry.insertDimensionData( 

470 "visit_system", 

471 {"instrument": "Cam1", "id": 0, "name": "one-to-one"}, 

472 ) 

473 registry.insertDimensionData( 

474 "visit", 

475 {"instrument": "Cam1", "id": 1, "name": "one", "physical_filter": "Cam1-G", "day_obs": 20250101}, 

476 ) 

477 registry.insertDimensionData( 

478 "visit_definition", 

479 {"instrument": "Cam1", "visit": 1, "exposure": 1}, 

480 ) 

481 with self.assertRaises(InconsistentDataIdError): 

482 registry.expandDataId( 

483 {"instrument": "Cam1", "visit": 1, "exposure": 2}, 

484 ) 

485 

486 def testDataset(self): 

487 """Basic tests for `SqlRegistry.insertDatasets`, 

488 `SqlRegistry.getDataset`, and `SqlRegistry.removeDatasets`. 

489 """ 

490 butler = self.make_butler() 

491 registry = butler.registry 

492 self.load_data(butler, "base.yaml") 

493 run = "tésτ" 

494 registry.registerRun(run) 

495 datasetType = registry.getDatasetType("bias") 

496 dataId = {"instrument": "Cam1", "detector": 2} 

497 (ref,) = registry.insertDatasets(datasetType, dataIds=[dataId], run=run) 

498 outRef = registry.getDataset(ref.id) 

499 self.assertIsNotNone(ref.id) 

500 self.assertEqual(ref, outRef) 

501 with self.assertRaises(ConflictingDefinitionError): 

502 registry.insertDatasets(datasetType, dataIds=[dataId], run=run) 

503 registry.removeDatasets([ref]) 

504 self.assertIsNone(registry.findDataset(datasetType, dataId, collections=[run])) 

505 

506 def test_get_many_datasets(self): 

507 butler = self.make_butler() 

508 self.load_data(butler, "base.yaml", "datasets.yaml") 

509 expected_refs = { 

510 str(ref.id): ref 

511 for ref in butler.query_all_datasets(["imported_g", "imported_r"], find_first=False) 

512 } 

513 

514 # Set up a tagged collection containing a dataset used by the tests 

515 # below. get_many_datasets() queries on tables shared between run 

516 # collections and tagged collections, so this makes sure the tags don't 

517 # interfere. 

518 butler.collections.register("tagged", CollectionType.TAGGED) 

519 butler.registry.associate("tagged", [expected_refs["60c8a65c-7290-4c38-b1de-e3b1cdcf872d"]]) 

520 

521 # Empty input returns empty output. 

522 self.assertEqual(butler.get_many_datasets([]), []) 

523 # Datasets all of one type, but in different collections. 

524 self.assertCountEqual( 

525 butler.get_many_datasets( 

526 ["60c8a65c-7290-4c38-b1de-e3b1cdcf872d", "d0bb04cd-d697-4a83-ba53-cdfcd58e3a0c"] 

527 ), 

528 [ 

529 expected_refs["60c8a65c-7290-4c38-b1de-e3b1cdcf872d"], 

530 expected_refs["d0bb04cd-d697-4a83-ba53-cdfcd58e3a0c"], 

531 ], 

532 ) 

533 # Datasets of multiple types with different dimension groups. 

534 self.assertCountEqual( 

535 butler.get_many_datasets( 

536 [ 

537 "60c8a65c-7290-4c38-b1de-e3b1cdcf872d", 

538 "d0bb04cd-d697-4a83-ba53-cdfcd58e3a0c", 

539 "87f3e68d-258d-41b7-8ea5-edf3557ccb30", 

540 ] 

541 ), 

542 [ 

543 expected_refs["60c8a65c-7290-4c38-b1de-e3b1cdcf872d"], 

544 expected_refs["d0bb04cd-d697-4a83-ba53-cdfcd58e3a0c"], 

545 expected_refs["87f3e68d-258d-41b7-8ea5-edf3557ccb30"], 

546 ], 

547 ) 

548 # Missing datasets are omitted from the result. 

549 self.assertCountEqual( 

550 butler.get_many_datasets( 

551 [ 

552 "238c3b83-f6e5-4ccb-a7b0-5028dec1dcbb", 

553 "60c8a65c-7290-4c38-b1de-e3b1cdcf872d", 

554 ] 

555 ), 

556 [expected_refs["60c8a65c-7290-4c38-b1de-e3b1cdcf872d"]], 

557 ) 

558 # Duplicates are squashed in the result. 

559 self.assertCountEqual( 

560 butler.get_many_datasets( 

561 [ 

562 "60c8a65c-7290-4c38-b1de-e3b1cdcf872d", 

563 "60c8a65c-7290-4c38-b1de-e3b1cdcf872d", 

564 ] 

565 ), 

566 [expected_refs["60c8a65c-7290-4c38-b1de-e3b1cdcf872d"]], 

567 ) 

568 # Can use UUID instances as inputs instead of strings. 

569 self.assertCountEqual( 

570 butler.get_many_datasets( 

571 [ 

572 uuid.UUID("60c8a65c-7290-4c38-b1de-e3b1cdcf872d"), 

573 ] 

574 ), 

575 [expected_refs["60c8a65c-7290-4c38-b1de-e3b1cdcf872d"]], 

576 ) 

577 # Bad ID format raises ValueError. 

578 with self.assertRaises(ValueError): 

579 butler.get_many_datasets(["not-a-valid-uuid"]) 

580 # Works with arbitrary iterables as input. 

581 self.assertCountEqual( 

582 butler.get_many_datasets( 

583 itertools.chain( 

584 ["60c8a65c-7290-4c38-b1de-e3b1cdcf872d", "d0bb04cd-d697-4a83-ba53-cdfcd58e3a0c"] 

585 ) 

586 ), 

587 [ 

588 expected_refs["60c8a65c-7290-4c38-b1de-e3b1cdcf872d"], 

589 expected_refs["d0bb04cd-d697-4a83-ba53-cdfcd58e3a0c"], 

590 ], 

591 ) 

592 

593 def test_fetch_run_dataset_ids(self): 

594 butler = self.make_butler() 

595 registry = butler._registry 

596 self.load_data(butler, "base.yaml", "datasets.yaml") 

597 dataset_ids = registry._fetch_run_dataset_ids("imported_r") 

598 self.assertEqual(len(dataset_ids), 7) 

599 refs = butler.query_all_datasets("imported_r") 

600 self.assertCountEqual(dataset_ids, [ref.id for ref in refs]) 

601 

602 def testFindDataset(self): 

603 """Tests for `SqlRegistry.findDataset`.""" 

604 butler = self.make_butler() 

605 registry = butler.registry 

606 self.load_data(butler, "base.yaml") 

607 run = "tésτ" 

608 datasetType = registry.getDatasetType("bias") 

609 dataId = {"instrument": "Cam1", "detector": 4} 

610 registry.registerRun(run) 

611 (inputRef,) = registry.insertDatasets(datasetType, dataIds=[dataId], run=run) 

612 outputRef = registry.findDataset(datasetType, dataId, collections=[run]) 

613 self.assertEqual(outputRef, inputRef) 

614 # Check that retrieval with invalid dataId raises 

615 with self.assertRaises(LookupError): 

616 dataId = {"instrument": "Cam1"} # no detector 

617 registry.findDataset(datasetType, dataId, collections=run) 

618 # Check that different dataIds match to different datasets 

619 dataId1 = {"instrument": "Cam1", "detector": 1} 

620 (inputRef1,) = registry.insertDatasets(datasetType, dataIds=[dataId1], run=run) 

621 dataId2 = {"instrument": "Cam1", "detector": 2} 

622 (inputRef2,) = registry.insertDatasets(datasetType, dataIds=[dataId2], run=run) 

623 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef1) 

624 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef2) 

625 self.assertNotEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef2) 

626 self.assertNotEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef1) 

627 # Check that requesting a non-existing dataId returns None 

628 nonExistingDataId = {"instrument": "Cam1", "detector": 3} 

629 self.assertIsNone(registry.findDataset(datasetType, nonExistingDataId, collections=run)) 

630 # Search more than one collection, in which two have the right 

631 # dataset type and another does not. 

632 registry.registerRun("empty") 

633 self.load_data(butler, "datasets.yaml") 

634 bias1 = registry.findDataset("bias", instrument="Cam1", detector=2, collections=["imported_g"]) 

635 self.assertIsNotNone(bias1) 

636 bias2 = registry.findDataset("bias", instrument="Cam1", detector=2, collections=["imported_r"]) 

637 self.assertIsNotNone(bias2) 

638 self.assertEqual( 

639 bias1, 

640 registry.findDataset( 

641 "bias", instrument="Cam1", detector=2, collections=["empty", "imported_g", "imported_r"] 

642 ), 

643 ) 

644 self.assertEqual( 

645 bias2, 

646 registry.findDataset( 

647 "bias", instrument="Cam1", detector=2, collections=["empty", "imported_r", "imported_g"] 

648 ), 

649 ) 

650 # If the input data ID was an expanded DataCoordinate with records, 

651 # then the output ref has records, too. 

652 expanded_id = registry.expandDataId({"instrument": "Cam1", "detector": 2}) 

653 expanded_ref = registry.findDataset("bias", expanded_id, collections=["imported_r"]) 

654 self.assertTrue(expanded_ref.dataId.hasRecords()) 

655 # Search more than one collection, with one of them a CALIBRATION 

656 # collection. 

657 registry.registerCollection("Cam1/calib", CollectionType.CALIBRATION) 

658 timespan = Timespan( 

659 begin=astropy.time.Time("2020-01-01T01:00:00", format="isot", scale="tai"), 

660 end=astropy.time.Time("2020-01-01T02:00:00", format="isot", scale="tai"), 

661 ) 

662 registry.certify("Cam1/calib", [bias2], timespan=timespan) 

663 self.assertEqual( 

664 bias1, 

665 registry.findDataset( 

666 "bias", 

667 instrument="Cam1", 

668 detector=2, 

669 collections=["empty", "imported_g", "Cam1/calib"], 

670 timespan=timespan, 

671 ), 

672 ) 

673 self.assertEqual( 

674 bias1, 

675 registry.findDataset( 

676 "bias", 

677 instrument="Cam1", 

678 detector=2, 

679 # Calibration dataset type, with no calibration collection, but 

680 # a timespan was provided. 

681 collections=["imported_g"], 

682 timespan=timespan, 

683 ), 

684 ) 

685 self.assertEqual( 

686 bias2, 

687 registry.findDataset( 

688 "bias", 

689 instrument="Cam1", 

690 detector=2, 

691 collections=["empty", "Cam1/calib", "imported_g"], 

692 timespan=timespan, 

693 ), 

694 ) 

695 # If we try to search those same collections without a timespan, it 

696 # should still work, since the CALIBRATION collection is ignored. 

697 self.assertEqual( 

698 bias1, 

699 registry.findDataset( 

700 "bias", instrument="Cam1", detector=2, collections=["empty", "imported_g", "Cam1/calib"] 

701 ), 

702 ) 

703 self.assertEqual( 

704 bias1, 

705 registry.findDataset( 

706 "bias", instrument="Cam1", detector=2, collections=["empty", "Cam1/calib", "imported_g"] 

707 ), 

708 ) 

709 self.assertIsNone( 

710 registry.findDataset("bias", instrument="Cam1", detector=2, collections=["Cam1/calib"]) 

711 ) 

712 # Test non-calibration dataset type. 

713 registry.registerDatasetType( 

714 DatasetType("noncalibration", ["instrument", "detector"], "int", universe=butler.dimensions) 

715 ) 

716 (non_calibration_ref,) = registry.insertDatasets("noncalibration", dataIds=[dataId2], run=run) 

717 self.assertIsNone( 

718 registry.findDataset("noncalibration", instrument="Cam1", detector=2, collections=["imported_g"]) 

719 ) 

720 self.assertEqual( 

721 non_calibration_ref, 

722 registry.findDataset("noncalibration", instrument="Cam1", detector=2, collections=[run]), 

723 ) 

724 # Timespan parameter is ignored for non-calibration dataset types. 

725 self.assertIsNone( 

726 registry.findDataset( 

727 "noncalibration", instrument="Cam1", detector=2, collections=["imported_g"], timespan=timespan 

728 ) 

729 ) 

730 self.assertEqual( 

731 non_calibration_ref, 

732 registry.findDataset( 

733 "noncalibration", instrument="Cam1", detector=2, collections=[run], timespan=timespan 

734 ), 

735 ) 

736 self.assertEqual( 

737 non_calibration_ref, 

738 registry.findDataset( 

739 "noncalibration", 

740 instrument="Cam1", 

741 detector=2, 

742 collections=["Cam1/calib", run], 

743 timespan=timespan, 

744 ), 

745 ) 

746 # Add a dataset type whose dimension group involves an "implied" 

747 # dimension. ("physical_filter" implies "band".) 

748 registry.registerDatasetType( 

749 DatasetType( 

750 "dt_with_implied", 

751 [ 

752 "instrument", 

753 "physical_filter", 

754 ], 

755 "int", 

756 universe=butler.dimensions, 

757 ) 

758 ) 

759 data_id = {"instrument": "Cam1", "physical_filter": "Cam1-G"} 

760 (implied_ref,) = registry.insertDatasets("dt_with_implied", dataIds=[data_id], run=run) 

761 found_ref = registry.findDataset("dt_with_implied", data_id, collections=[run]) 

762 self.assertEqual(implied_ref, found_ref) 

763 # The "full" data ID with implied values is looked up, even though we 

764 # provided only the "required" values. 

765 self.assertTrue(found_ref.dataId.hasFull()) 

766 # The search ignores excess data ID values beyond the 'required' set. 

767 # This is not the correct band value for this physical_filter, but 

768 # the mismatch is ignored. 

769 self.assertEqual( 

770 implied_ref, 

771 registry.findDataset( 

772 "dt_with_implied", 

773 {"instrument": "Cam1", "physical_filter": "Cam1-G", "band": "r"}, 

774 collections=[run], 

775 ), 

776 ) 

777 # Correct band value, wrong physical_filter. 

778 self.assertIsNone( 

779 registry.findDataset( 

780 "dt_with_implied", 

781 {"instrument": "Cam1", "physical_filter": "Cam1-R1", "band": "g"}, 

782 collections=[run], 

783 ), 

784 ) 

785 

786 def testRemoveDatasetTypeSuccess(self): 

787 """Test that SqlRegistry.removeDatasetType works when there are no 

788 datasets of that type present. 

789 """ 

790 butler = self.make_butler() 

791 registry = butler.registry 

792 self.load_data(butler, "base.yaml") 

793 registry.removeDatasetType("flat") 

794 with self.assertRaises(MissingDatasetTypeError): 

795 registry.getDatasetType("flat") 

796 

797 def testRemoveDatasetTypeFailure(self): 

798 """Test that SqlRegistry.removeDatasetType raises when there are 

799 datasets of that type present or if the dataset type is for a 

800 component. 

801 """ 

802 butler = self.make_butler() 

803 registry = butler.registry 

804 self.load_data(butler, "base.yaml", "datasets.yaml") 

805 with self.assertRaises(OrphanedRecordError): 

806 registry.removeDatasetType("flat") 

807 with self.assertRaises(DatasetTypeError): 

808 registry.removeDatasetType(DatasetType.nameWithComponent("flat", "image")) 

809 

810 def testImportDatasetsUUID(self): 

811 """Test for `SqlRegistry._importDatasets` with UUID dataset ID.""" 

812 if isinstance(self.datasetsManager, str): 

813 if not self.datasetsManager.endswith(".ByDimensionsDatasetRecordStorageManagerUUID"): 

814 self.skipTest(f"Unexpected dataset manager {self.datasetsManager}") 

815 elif isinstance(self.datasetsManager, dict) and not self.datasetsManager["cls"].endswith( 

816 ".ByDimensionsDatasetRecordStorageManagerUUID" 

817 ): 

818 self.skipTest(f"Unexpected dataset manager {self.datasetsManager['cls']}") 

819 

820 butler = self.make_butler() 

821 registry = butler.registry 

822 self.load_data(butler, "base.yaml") 

823 for run in range(6): 

824 registry.registerRun(f"run{run}") 

825 datasetTypeBias = registry.getDatasetType("bias") 

826 datasetTypeFlat = registry.getDatasetType("flat") 

827 dataIdBias1 = {"instrument": "Cam1", "detector": 1} 

828 dataIdBias2 = {"instrument": "Cam1", "detector": 2} 

829 dataIdFlat1 = {"instrument": "Cam1", "detector": 1, "physical_filter": "Cam1-G", "band": "g"} 

830 

831 ref = DatasetRef(datasetTypeBias, dataIdBias1, run="run0") 

832 (ref1,) = registry._importDatasets([ref], assume_new=True) 

833 # UUID is used without change 

834 self.assertEqual(ref.id, ref1.id) 

835 

836 # Inserting this ref with assume_new=True should fail, since this 

837 # dataset exists. 

838 with self.assertRaises(ConflictingDefinitionError): 

839 registry._importDatasets([ref], assume_new=True) 

840 

841 # All different failure modes 

842 refs = ( 

843 # Importing same DatasetRef with different dataset ID is an error 

844 DatasetRef(datasetTypeBias, dataIdBias1, run="run0"), 

845 # Same DatasetId but different DataId 

846 DatasetRef(datasetTypeBias, dataIdBias2, id=ref1.id, run="run0"), 

847 DatasetRef(datasetTypeFlat, dataIdFlat1, id=ref1.id, run="run0"), 

848 # Same DatasetRef and DatasetId but different run 

849 DatasetRef(datasetTypeBias, dataIdBias1, id=ref1.id, run="run1"), 

850 ) 

851 for ref in refs: 

852 with self.assertRaises(ConflictingDefinitionError): 

853 registry._importDatasets([ref]) 

854 

855 # Test for non-unique IDs, they can be re-imported multiple times. 

856 for run, idGenMode in ((2, DatasetIdGenEnum.DATAID_TYPE), (4, DatasetIdGenEnum.DATAID_TYPE_RUN)): 

857 with self.subTest(idGenMode=repr(idGenMode)): 

858 # Make dataset ref with reproducible dataset ID. 

859 ref = DatasetRef(datasetTypeBias, dataIdBias1, run=f"run{run}", id_generation_mode=idGenMode) 

860 (ref1,) = registry._importDatasets([ref]) 

861 self.assertIsInstance(ref1.id, uuid.UUID) 

862 self.assertEqual(ref1.id.version, 5) 

863 self.assertEqual(ref1.id, ref.id) 

864 

865 # Importing it again is OK 

866 (ref2,) = registry._importDatasets([ref1]) 

867 self.assertEqual(ref2.id, ref1.id) 

868 

869 # Cannot import to different run with the same ID 

870 ref = DatasetRef(datasetTypeBias, dataIdBias1, id=ref1.id, run=f"run{run + 1}") 

871 with self.assertRaises(ConflictingDefinitionError): 

872 registry._importDatasets([ref]) 

873 

874 ref = DatasetRef( 

875 datasetTypeBias, dataIdBias1, run=f"run{run + 1}", id_generation_mode=idGenMode 

876 ) 

877 if idGenMode is DatasetIdGenEnum.DATAID_TYPE: 

878 # Cannot import same DATAID_TYPE ref into a new run 

879 with self.assertRaises(ConflictingDefinitionError): 

880 (ref2,) = registry._importDatasets([ref]) 

881 else: 

882 # DATAID_TYPE_RUN ref can be imported into a new run 

883 (ref2,) = registry._importDatasets([ref]) 

884 

885 def testComponentLookups(self): 

886 """Test searching for component datasets via their parents. 

887 

888 Components can no longer be found by registry. This test checks 

889 that this now fails. 

890 """ 

891 butler = self.make_butler() 

892 registry = butler.registry 

893 self.load_data(butler, "base.yaml", "datasets.yaml") 

894 # Test getting the child dataset type (which does still exist in the 

895 # Registry), and check for consistency with 

896 # DatasetRef.makeComponentRef. 

897 collection = "imported_g" 

898 parentType = registry.getDatasetType("bias") 

899 childType = registry.getDatasetType("bias.wcs") 

900 parentRefResolved = registry.findDataset( 

901 parentType, collections=collection, instrument="Cam1", detector=1 

902 ) 

903 self.assertIsInstance(parentRefResolved, DatasetRef) 

904 self.assertEqual(childType, parentRefResolved.makeComponentRef("wcs").datasetType) 

905 # Search for a single dataset with findDataset. 

906 with self.assertRaises(DatasetTypeError): 

907 registry.findDataset("bias.wcs", collections=collection, dataId=parentRefResolved.dataId) 

908 

909 def testCollections(self): 

910 """Tests for registry methods that manage collections.""" 

911 butler = self.make_butler() 

912 registry = butler.registry 

913 other_registry = butler.clone().registry 

914 self.load_data(butler, "base.yaml", "datasets.yaml") 

915 run1 = "imported_g" 

916 run2 = "imported_r" 

917 # Test setting a collection docstring after it has been created. 

918 registry.setCollectionDocumentation(run1, "doc for run1") 

919 self.assertEqual(registry.getCollectionDocumentation(run1), "doc for run1") 

920 registry.setCollectionDocumentation(run1, None) 

921 self.assertIsNone(registry.getCollectionDocumentation(run1)) 

922 datasetType = "bias" 

923 # Find some datasets via their run's collection. 

924 dataId1 = {"instrument": "Cam1", "detector": 1} 

925 ref1 = registry.findDataset(datasetType, dataId1, collections=run1) 

926 self.assertIsNotNone(ref1) 

927 dataId2 = {"instrument": "Cam1", "detector": 2} 

928 ref2 = registry.findDataset(datasetType, dataId2, collections=run1) 

929 self.assertIsNotNone(ref2) 

930 # Associate those into a new collection, then look for them there. 

931 tag1 = "tag1" 

932 registry.registerCollection(tag1, type=CollectionType.TAGGED, doc="doc for tag1") 

933 # Check that we can query for old and new collections by type. 

934 self.assertEqual(set(registry.queryCollections(collectionTypes=CollectionType.RUN)), {run1, run2}) 

935 self.assertEqual( 

936 set(registry.queryCollections(collectionTypes={CollectionType.TAGGED, CollectionType.RUN})), 

937 {tag1, run1, run2}, 

938 ) 

939 self.assertEqual(registry.getCollectionDocumentation(tag1), "doc for tag1") 

940 registry.associate(tag1, [ref1, ref2]) 

941 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1) 

942 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

943 # Disassociate one and verify that we can't it there anymore... 

944 registry.disassociate(tag1, [ref1]) 

945 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=tag1)) 

946 # ...but we can still find ref2 in tag1, and ref1 in the run. 

947 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run1), ref1) 

948 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

949 collections = set(registry.queryCollections()) 

950 self.assertEqual(collections, {run1, run2, tag1}) 

951 # Associate both refs into tag1 again; ref2 is already there, but that 

952 # should be a harmless no-op. 

953 registry.associate(tag1, [ref1, ref2]) 

954 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1) 

955 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

956 # Get a different dataset (from a different run) that has the same 

957 # dataset type and data ID as ref2. 

958 ref2b = registry.findDataset(datasetType, dataId2, collections=run2) 

959 self.assertNotEqual(ref2, ref2b) 

960 # Attempting to associate that into tag1 should be an error. 

961 with self.assertRaises(ConflictingDefinitionError): 

962 registry.associate(tag1, [ref2b]) 

963 # That error shouldn't have messed up what we had before. 

964 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1) 

965 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

966 # Attempt to associate the conflicting dataset again, this time with 

967 # a dataset that isn't in the collection and won't cause a conflict. 

968 # Should also fail without modifying anything. 

969 dataId3 = {"instrument": "Cam1", "detector": 3} 

970 ref3 = registry.findDataset(datasetType, dataId3, collections=run1) 

971 with self.assertRaises(ConflictingDefinitionError): 

972 registry.associate(tag1, [ref3, ref2b]) 

973 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1) 

974 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

975 self.assertIsNone(registry.findDataset(datasetType, dataId3, collections=tag1)) 

976 # Register a chained collection that searches [tag1, run2] 

977 chain1 = "chain1" 

978 registry.registerCollection(chain1, type=CollectionType.CHAINED) 

979 self.assertIs(registry.getCollectionType(chain1), CollectionType.CHAINED) 

980 # Chained collection exists, but has no collections in it. 

981 self.assertFalse(registry.getCollectionChain(chain1)) 

982 # If we query for all collections, we should get the chained collection 

983 # if we don't ask to flatten it (i.e. yield only its children) or if we 

984 # explicitly ask to include it too. 

985 self.assertEqual(set(registry.queryCollections(flattenChains=False)), {tag1, run1, run2, chain1}) 

986 self.assertEqual(set(registry.queryCollections(flattenChains=True)), {tag1, run1, run2}) 

987 self.assertEqual( 

988 set(registry.queryCollections(flattenChains=True, includeChains=True)), {tag1, run1, run2, chain1} 

989 ) 

990 # Attempt to set its child collections to something circular; that 

991 # should fail. 

992 with self.assertRaises(ValueError): 

993 registry.setCollectionChain(chain1, [tag1, chain1]) 

994 # Add the child collections. 

995 registry.setCollectionChain(chain1, [tag1, run2]) 

996 self.assertEqual(list(registry.getCollectionChain(chain1)), [tag1, run2]) 

997 self.assertEqual(registry.getCollectionParentChains(tag1), {chain1}) 

998 self.assertEqual(registry.getCollectionParentChains(run2), {chain1}) 

999 # Refresh the other registry that points to the same repo, and make 

1000 # sure it can see the things we've done (note that this does require 

1001 # an explicit refresh(); that's the documented behavior, because 

1002 # caching is ~impossible otherwise). 

1003 if other_registry is not None: 

1004 other_registry.refresh() 

1005 self.assertEqual(list(other_registry.getCollectionChain(chain1)), [tag1, run2]) 

1006 self.assertEqual(other_registry.getCollectionParentChains(tag1), {chain1}) 

1007 self.assertEqual(other_registry.getCollectionParentChains(run2), {chain1}) 

1008 # Searching for dataId1 or dataId2 in the chain should return ref1 and 

1009 # ref2, because both are in tag1. 

1010 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain1), ref1) 

1011 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain1), ref2) 

1012 # Now disassociate ref2 from tag1. The search (for bias) with 

1013 # dataId2 in chain1 should then: 

1014 # 1. not find it in tag1 

1015 # 2. find a different dataset in run2 

1016 registry.disassociate(tag1, [ref2]) 

1017 ref2b = registry.findDataset(datasetType, dataId2, collections=chain1) 

1018 self.assertNotEqual(ref2b, ref2) 

1019 self.assertEqual(ref2b, registry.findDataset(datasetType, dataId2, collections=run2)) 

1020 # Define a new chain so we can test recursive chains. 

1021 chain2 = "chain2" 

1022 registry.registerCollection(chain2, type=CollectionType.CHAINED) 

1023 registry.setCollectionChain(chain2, [run2, chain1]) 

1024 self.assertEqual(registry.getCollectionParentChains(chain1), {chain2}) 

1025 self.assertEqual(registry.getCollectionParentChains(run2), {chain1, chain2}) 

1026 

1027 if self.supportsCollectionRegex: 

1028 # Query for collections matching a regex. 

1029 with self.assertWarns(FutureWarning): 

1030 self.assertCountEqual( 

1031 list(registry.queryCollections(re.compile("imported_."), flattenChains=False)), 

1032 ["imported_r", "imported_g"], 

1033 ) 

1034 # Query for collections matching a regex or an explicit str. 

1035 with self.assertWarns(FutureWarning): 

1036 self.assertCountEqual( 

1037 list( 

1038 registry.queryCollections([re.compile("imported_."), "chain1"], flattenChains=False) 

1039 ), 

1040 ["imported_r", "imported_g", "chain1"], 

1041 ) 

1042 # Same queries as the regex ones above, but using globs instead of 

1043 # regex. 

1044 self.assertCountEqual( 

1045 list(registry.queryCollections("imported_*", flattenChains=False)), 

1046 ["imported_r", "imported_g"], 

1047 ) 

1048 # Query for collections matching a regex or an explicit str. 

1049 self.assertCountEqual( 

1050 list(registry.queryCollections(["imported_*", "chain1"], flattenChains=False)), 

1051 ["imported_r", "imported_g", "chain1"], 

1052 ) 

1053 # Query for collection matching chain names, by flattening it should 

1054 # only return non-chain names. 

1055 self.assertCountEqual(list(registry.queryCollections("chain?", flattenChains=True)), [tag1, run2]) 

1056 # Query for collection matching chain name, by flattening and 

1057 # asking to include chains it should return everything. 

1058 self.assertCountEqual( 

1059 list(registry.queryCollections("chain*", flattenChains=True, includeChains=True)), 

1060 [tag1, run2, chain1, chain2], 

1061 ) 

1062 # Order of children in chained collections is preserved. 

1063 self.assertEqual(list(registry.queryCollections("chain1", flattenChains=True)), [tag1, run2]) 

1064 self.assertEqual(list(registry.queryCollections("cha*2", flattenChains=True)), [run2, tag1]) 

1065 self.assertEqual( 

1066 list(registry.queryCollections("chain1", flattenChains=True, includeChains=True)), 

1067 [chain1, tag1, run2], 

1068 ) 

1069 self.assertEqual( 

1070 list(registry.queryCollections("chain2", flattenChains=True, includeChains=True)), 

1071 [chain2, run2, chain1, tag1], 

1072 ) 

1073 

1074 # Search for bias with dataId1 should find it via tag1 in chain2, 

1075 # recursing, because is not in run1. 

1076 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=run2)) 

1077 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain2), ref1) 

1078 # Search for bias with dataId2 should find it in run2 (ref2b). 

1079 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain2), ref2b) 

1080 # Search for a flat that is in run2. That should not be found 

1081 # at the front of chain2, because of the restriction to bias 

1082 # on run2 there, but it should be found in at the end of chain1. 

1083 dataId4 = {"instrument": "Cam1", "detector": 3, "physical_filter": "Cam1-R2"} 

1084 ref4 = registry.findDataset("flat", dataId4, collections=run2) 

1085 self.assertIsNotNone(ref4) 

1086 self.assertEqual(ref4, registry.findDataset("flat", dataId4, collections=chain2)) 

1087 # Deleting a collection that's part of a CHAINED collection is not 

1088 # allowed, and is exception-safe. 

1089 with self.assertRaises(sqlalchemy.exc.IntegrityError): 

1090 registry.removeCollection(run2) 

1091 self.assertEqual(registry.getCollectionType(run2), CollectionType.RUN) 

1092 with self.assertRaises(sqlalchemy.exc.IntegrityError): 

1093 registry.removeCollection(chain1) 

1094 self.assertEqual(registry.getCollectionType(chain1), CollectionType.CHAINED) 

1095 # Actually remove chain2, test that it's gone by asking for its type. 

1096 registry.removeCollection(chain2) 

1097 with self.assertRaises(MissingCollectionError): 

1098 registry.getCollectionType(chain2) 

1099 # Actually remove run2 and chain1, which should work now. 

1100 registry.removeCollection(chain1) 

1101 registry.removeCollection(run2) 

1102 with self.assertRaises(MissingCollectionError): 

1103 registry.getCollectionType(run2) 

1104 with self.assertRaises(MissingCollectionError): 

1105 registry.getCollectionType(chain1) 

1106 # Remove tag1 as well, just to test that we can remove TAGGED 

1107 # collections. 

1108 registry.removeCollection(tag1) 

1109 with self.assertRaises(MissingCollectionError): 

1110 registry.getCollectionType(tag1) 

1111 

1112 def test_collection_clearing(self) -> None: 

1113 """Test that we can delete TAGGED and CALIBRATION collections without 

1114 manually removing all associated datasets first. 

1115 """ 

1116 butler = self.make_butler() 

1117 self.load_data(butler, "base.yaml", "datasets.yaml") 

1118 

1119 # This brings in datasets of two different types, with the same 

1120 # dimension group. 

1121 original_datasets = tuple(butler.query_all_datasets("imported_r", instrument="Cam1", detector=2)) 

1122 self.assertEqual(len(original_datasets), 2) 

1123 

1124 # Test tagged collections. 

1125 butler.collections.register("tag1", CollectionType.TAGGED) 

1126 butler.collections.register("tag2", CollectionType.TAGGED) 

1127 butler.registry.associate("tag1", original_datasets) 

1128 butler.registry.associate("tag2", original_datasets) 

1129 butler.collections.x_remove("tag1") 

1130 with self.assertRaises(MissingCollectionError): 

1131 butler.collections.get_info("tag1") 

1132 # Make sure there was no collateral damage -- tag2 should still be 

1133 # intact. 

1134 self.assertEqual(set(butler.query_all_datasets("tag2")), set(original_datasets)) 

1135 

1136 # Test calibration collections. 

1137 butler.collections.register("calib1", CollectionType.CALIBRATION) 

1138 butler.collections.register("calib2", CollectionType.CALIBRATION) 

1139 butler.registry.certify("calib1", original_datasets, Timespan(None, None)) 

1140 butler.registry.certify("calib2", original_datasets, Timespan(None, None)) 

1141 butler.collections.x_remove("calib1") 

1142 with self.assertRaises(MissingCollectionError): 

1143 butler.collections.get_info("calib1") 

1144 # Make sure there was no collateral damage -- calib2 should still be 

1145 # intact. 

1146 self.assertEqual(set(butler.query_all_datasets("calib2")), set(original_datasets)) 

1147 

1148 def testCollectionChainCaching(self): 

1149 butler = self.make_butler() 

1150 registry = butler.registry 

1151 with registry.caching_context(): 

1152 registry.registerCollection("a") 

1153 registry.registerCollection("chain", CollectionType.CHAINED) 

1154 # There used to be a caching bug (DM-43750) that would throw an 

1155 # exception if you modified a collection chain for a collection 

1156 # that was already in the cache. 

1157 registry.setCollectionChain("chain", ["a"]) 

1158 self.assertEqual(list(registry.getCollectionChain("chain")), ["a"]) 

1159 

1160 def testCollectionChainFlatten(self): 

1161 """Test that `SqlRegistry.setCollectionChain` obeys its 'flatten' 

1162 option. 

1163 """ 

1164 butler = self.make_butler() 

1165 registry = butler.registry 

1166 registry.registerCollection("inner", CollectionType.CHAINED) 

1167 registry.registerCollection("innermost", CollectionType.RUN) 

1168 registry.setCollectionChain("inner", ["innermost"]) 

1169 registry.registerCollection("outer", CollectionType.CHAINED) 

1170 registry.setCollectionChain("outer", ["inner"], flatten=False) 

1171 self.assertEqual(list(registry.getCollectionChain("outer")), ["inner"]) 

1172 registry.setCollectionChain("outer", ["inner"], flatten=True) 

1173 self.assertEqual(list(registry.getCollectionChain("outer")), ["innermost"]) 

1174 

1175 def testCollectionChainPrependConcurrency(self): 

1176 """Verify that locking via database row locks is working as 

1177 expected. 

1178 """ 

1179 

1180 def blocked_thread_func(butler: Butler): 

1181 # This call will become blocked after it has decided on positions 

1182 # for the new children in the collection chain, but before 

1183 # inserting them. 

1184 butler.collections.prepend_chain("chain", ["a"]) 

1185 

1186 def unblocked_thread_func(butler: Butler): 

1187 butler.collections.prepend_chain("chain", ["b"]) 

1188 

1189 registry = self._do_collection_concurrency_test(blocked_thread_func, unblocked_thread_func) 

1190 

1191 # blocked_thread_func should have finished first, inserting "a". 

1192 # unblocked_thread_func should have finished second, prepending "b". 

1193 self.assertEqual(("b", "a"), registry.getCollectionChain("chain")) 

1194 

1195 def testCollectionChainReplaceConcurrency(self): 

1196 """Verify that locking via database row locks is working as 

1197 expected. 

1198 """ 

1199 

1200 def blocked_thread_func(butler: Butler): 

1201 # This call will become blocked after deleting children, but before 

1202 # inserting new ones. 

1203 butler.collections.redefine_chain("chain", ["a"]) 

1204 

1205 def unblocked_thread_func(butler: Butler): 

1206 butler.collections.redefine_chain("chain", ["b"]) 

1207 

1208 registry = self._do_collection_concurrency_test(blocked_thread_func, unblocked_thread_func) 

1209 

1210 # blocked_thread_func should have finished first. 

1211 # unblocked_thread_func should have finished second, overwriting the 

1212 # chain with "b". 

1213 self.assertEqual(("b",), registry.getCollectionChain("chain")) 

1214 

1215 def testCollectionChainRemoveConcurrency(self): 

1216 def blocked_thread_func(butler: Butler): 

1217 # This call will become blocked after taking the lock, but before 

1218 # deleting the children. 

1219 butler.collections.remove_from_chain("chain", ["b"]) 

1220 

1221 def unblocked_thread_func(butler: Butler): 

1222 butler.collections.redefine_chain("chain", ["b", "a"]) 

1223 

1224 registry = self._do_collection_concurrency_test(blocked_thread_func, unblocked_thread_func) 

1225 

1226 # blocked_thread_func should have finished first, removing "b". 

1227 # unblocked_thread_func should have finished second, putting "b" back. 

1228 self.assertEqual(("b", "a"), registry.getCollectionChain("chain")) 

1229 

1230 def _do_collection_concurrency_test( 

1231 self, blocked_thread_func: Callable[[Butler]], unblocked_thread_func: Callable[[Butler]] 

1232 ) -> SqlRegistry: 

1233 # This function: 

1234 # 1. Sets up two registries pointing at the same database. 

1235 # 2. Start running 'blocked_thread_func' in a background thread, 

1236 # arranging for it to become blocked during a critical section in 

1237 # the collections manager. 

1238 # 3. Wait for 'blocked_thread_func' to reach the critical section 

1239 # 4. Start running 'unblocked_thread_func'. 

1240 # 5. Allow both functions to run to completion. 

1241 

1242 # Set up two registries pointing to the same DB 

1243 butler1 = self.make_butler() 

1244 butler2 = butler1.clone() 

1245 registry1 = butler1._registry 

1246 assert isinstance(registry1, SqlRegistry) 

1247 registry2 = butler2._registry 

1248 

1249 with contextlib.suppress(AttributeError): 

1250 if ":memory:" in str(registry2._db): 

1251 raise unittest.SkipTest("Testing concurrency requires two connections to the same DB.") 

1252 

1253 registry1.registerCollection("chain", CollectionType.CHAINED) 

1254 for collection in ["a", "b"]: 

1255 registry1.registerCollection(collection) 

1256 

1257 # Arrange for registry1 to block during its critical section, allowing 

1258 # us to detect this and control when it becomes unblocked. 

1259 enter_barrier = Barrier(2, timeout=60) 

1260 exit_barrier = Barrier(2, timeout=60) 

1261 

1262 def wait_for_barrier(): 

1263 enter_barrier.wait() 

1264 exit_barrier.wait() 

1265 

1266 registry1._managers.collections._block_for_concurrency_test = wait_for_barrier 

1267 

1268 with ThreadPoolExecutor(max_workers=1) as exec1: 

1269 with ThreadPoolExecutor(max_workers=1) as exec2: 

1270 future1 = exec1.submit(blocked_thread_func, butler1) 

1271 enter_barrier.wait() 

1272 

1273 # At this point registry 1 has entered the critical section and 

1274 # is waiting for us to release it. Start the other thread. 

1275 future2 = exec2.submit(unblocked_thread_func, butler2) 

1276 # thread2 should block inside a database call, but we have no 

1277 # way to detect when it is in this state. 

1278 time.sleep(0.200) 

1279 

1280 # Let the threads run to completion. 

1281 exit_barrier.wait() 

1282 future1.result() 

1283 future2.result() 

1284 

1285 return registry1 

1286 

1287 def testBasicTransaction(self): 

1288 """Test that all operations within a single transaction block are 

1289 rolled back if an exception propagates out of the block. 

1290 """ 

1291 butler = self.make_butler() 

1292 registry = butler.registry 

1293 storageClass = StorageClass("testDatasetType") 

1294 registry.storageClasses.registerStorageClass(storageClass) 

1295 with registry.transaction(): 

1296 registry.insertDimensionData("instrument", {"name": "Cam1", "class_name": "A"}) 

1297 with self.assertRaises(ValueError): 

1298 with registry.transaction(): 

1299 registry.insertDimensionData("instrument", {"name": "Cam2"}) 

1300 raise ValueError("Oops, something went wrong") 

1301 # Cam1 should exist 

1302 self.assertEqual(registry.expandDataId(instrument="Cam1").records["instrument"].class_name, "A") 

1303 # But Cam2 and Cam3 should both not exist 

1304 with self.assertRaises(DataIdValueError): 

1305 registry.expandDataId(instrument="Cam2") 

1306 with self.assertRaises(DataIdValueError): 

1307 registry.expandDataId(instrument="Cam3") 

1308 

1309 def testNestedTransaction(self): 

1310 """Test that operations within a transaction block are not rolled back 

1311 if an exception propagates out of an inner transaction block and is 

1312 then caught. 

1313 """ 

1314 butler = self.make_butler() 

1315 registry = butler.registry 

1316 dimension = registry.dimensions["instrument"] 

1317 dataId1 = {"instrument": "DummyCam"} 

1318 dataId2 = {"instrument": "DummyCam2"} 

1319 checkpointReached = False 

1320 with registry.transaction(): 

1321 # This should be added and (ultimately) committed. 

1322 registry.insertDimensionData(dimension, dataId1) 

1323 with self.assertRaises(sqlalchemy.exc.IntegrityError): 

1324 with registry.transaction(savepoint=True): 

1325 # This does not conflict, and should succeed (but not 

1326 # be committed). 

1327 registry.insertDimensionData(dimension, dataId2) 

1328 checkpointReached = True 

1329 # This should conflict and raise, triggering a rollback 

1330 # of the previous insertion within the same transaction 

1331 # context, but not the original insertion in the outer 

1332 # block. 

1333 registry.insertDimensionData(dimension, dataId1) 

1334 self.assertTrue(checkpointReached) 

1335 self.assertIsNotNone(registry.expandDataId(dataId1, dimensions=dimension.minimal_group)) 

1336 with self.assertRaises(DataIdValueError): 

1337 registry.expandDataId(dataId2, dimensions=dimension.minimal_group) 

1338 

1339 def testInstrumentDimensions(self): 

1340 """Test queries involving only instrument dimensions, with no joins to 

1341 skymap. 

1342 """ 

1343 butler = self.make_butler() 

1344 registry = butler.registry 

1345 

1346 # need a bunch of dimensions and datasets for test 

1347 registry.insertDimensionData( 

1348 "instrument", dict(name="DummyCam", visit_max=25, exposure_max=300, detector_max=6) 

1349 ) 

1350 registry.insertDimensionData("day_obs", dict(instrument="DummyCam", id=20250101)) 

1351 registry.insertDimensionData( 

1352 "physical_filter", 

1353 dict(instrument="DummyCam", name="dummy_r", band="r"), 

1354 dict(instrument="DummyCam", name="dummy_i", band="i"), 

1355 ) 

1356 registry.insertDimensionData( 

1357 "detector", *[dict(instrument="DummyCam", id=i, full_name=str(i)) for i in range(1, 6)] 

1358 ) 

1359 registry.insertDimensionData( 

1360 "visit", 

1361 dict(instrument="DummyCam", id=10, name="ten", physical_filter="dummy_i", day_obs=20250101), 

1362 dict(instrument="DummyCam", id=11, name="eleven", physical_filter="dummy_r", day_obs=20250101), 

1363 dict(instrument="DummyCam", id=20, name="twelve", physical_filter="dummy_r", day_obs=20250101), 

1364 ) 

1365 registry.insertDimensionData( 

1366 "group", 

1367 dict(instrument="DummyCam", name="ten"), 

1368 dict(instrument="DummyCam", name="eleven"), 

1369 dict(instrument="DummyCam", name="twelve"), 

1370 ) 

1371 for i in range(1, 6): 

1372 registry.insertDimensionData( 

1373 "visit_detector_region", 

1374 dict(instrument="DummyCam", visit=10, detector=i), 

1375 dict(instrument="DummyCam", visit=11, detector=i), 

1376 dict(instrument="DummyCam", visit=20, detector=i), 

1377 ) 

1378 registry.insertDimensionData( 

1379 "exposure", 

1380 dict( 

1381 instrument="DummyCam", 

1382 id=100, 

1383 obs_id="100", 

1384 physical_filter="dummy_i", 

1385 group="ten", 

1386 day_obs=20250101, 

1387 ), 

1388 dict( 

1389 instrument="DummyCam", 

1390 id=101, 

1391 obs_id="101", 

1392 physical_filter="dummy_i", 

1393 group="ten", 

1394 day_obs=20250101, 

1395 ), 

1396 dict( 

1397 instrument="DummyCam", 

1398 id=110, 

1399 obs_id="110", 

1400 physical_filter="dummy_r", 

1401 group="eleven", 

1402 day_obs=20250101, 

1403 ), 

1404 dict( 

1405 instrument="DummyCam", 

1406 id=111, 

1407 obs_id="111", 

1408 physical_filter="dummy_r", 

1409 group="eleven", 

1410 day_obs=20250101, 

1411 ), 

1412 dict( 

1413 instrument="DummyCam", 

1414 id=200, 

1415 obs_id="200", 

1416 physical_filter="dummy_r", 

1417 group="twelve", 

1418 day_obs=20250101, 

1419 ), 

1420 dict( 

1421 instrument="DummyCam", 

1422 id=201, 

1423 obs_id="201", 

1424 physical_filter="dummy_r", 

1425 group="twelve", 

1426 day_obs=20250101, 

1427 ), 

1428 ) 

1429 registry.insertDimensionData( 

1430 "visit_definition", 

1431 dict(instrument="DummyCam", exposure=100, visit=10), 

1432 dict(instrument="DummyCam", exposure=101, visit=10), 

1433 dict(instrument="DummyCam", exposure=110, visit=11), 

1434 dict(instrument="DummyCam", exposure=111, visit=11), 

1435 dict(instrument="DummyCam", exposure=200, visit=20), 

1436 dict(instrument="DummyCam", exposure=201, visit=20), 

1437 ) 

1438 # dataset types 

1439 run1 = "test1_r" 

1440 run2 = "test2_r" 

1441 tagged2 = "test2_t" 

1442 registry.registerRun(run1) 

1443 registry.registerRun(run2) 

1444 registry.registerCollection(tagged2) 

1445 storageClass = StorageClass("testDataset") 

1446 registry.storageClasses.registerStorageClass(storageClass) 

1447 rawType = DatasetType( 

1448 name="RAW", 

1449 dimensions=registry.dimensions.conform(("instrument", "exposure", "detector")), 

1450 storageClass=storageClass, 

1451 ) 

1452 registry.registerDatasetType(rawType) 

1453 calexpType = DatasetType( 

1454 name="CALEXP", 

1455 dimensions=registry.dimensions.conform(("instrument", "visit", "detector")), 

1456 storageClass=storageClass, 

1457 ) 

1458 registry.registerDatasetType(calexpType) 

1459 

1460 # add pre-existing datasets 

1461 for exposure in (100, 101, 110, 111): 

1462 for detector in (1, 2, 3): 

1463 # note that only 3 of 5 detectors have datasets 

1464 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector) 

1465 (ref,) = registry.insertDatasets(rawType, dataIds=[dataId], run=run1) 

1466 # exposures 100 and 101 appear in both run1 and tagged2. 

1467 # 100 has different datasets in the different collections 

1468 # 101 has the same dataset in both collections. 

1469 if exposure == 100: 

1470 (ref,) = registry.insertDatasets(rawType, dataIds=[dataId], run=run2) 

1471 if exposure in (100, 101): 

1472 registry.associate(tagged2, [ref]) 

1473 # Add pre-existing datasets to tagged2. 

1474 for exposure in (200, 201): 

1475 for detector in (3, 4, 5): 

1476 # note that only 3 of 5 detectors have datasets 

1477 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector) 

1478 (ref,) = registry.insertDatasets(rawType, dataIds=[dataId], run=run2) 

1479 registry.associate(tagged2, [ref]) 

1480 

1481 dimensions = registry.dimensions.conform(rawType.dimensions.required | calexpType.dimensions.required) 

1482 # Test that single dim string works as well as list of str 

1483 rows = registry.queryDataIds("visit", datasets=rawType, collections=run1).expanded().toSet() 

1484 rowsI = registry.queryDataIds(["visit"], datasets=rawType, collections=run1).expanded().toSet() 

1485 self.assertEqual(rows, rowsI) 

1486 # with empty expression 

1487 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1).expanded().toSet() 

1488 self.assertEqual(len(rows), 4 * 3) # 4 exposures times 3 detectors 

1489 self.assertCountEqual({dataId["exposure"] for dataId in rows}, (100, 101, 110, 111)) 

1490 self.assertCountEqual({dataId["visit"] for dataId in rows}, (10, 11)) 

1491 self.assertCountEqual({dataId["detector"] for dataId in rows}, (1, 2, 3)) 

1492 

1493 # second collection 

1494 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=tagged2).toSet() 

1495 self.assertEqual(len(rows), 4 * 3) # 4 exposures times 3 detectors 

1496 for dataId in rows: 

1497 self.assertCountEqual(dataId.dimensions.required, ("instrument", "detector", "exposure", "visit")) 

1498 self.assertCountEqual({dataId["exposure"] for dataId in rows}, (100, 101, 200, 201)) 

1499 self.assertCountEqual({dataId["visit"] for dataId in rows}, (10, 20)) 

1500 self.assertCountEqual({dataId["detector"] for dataId in rows}, (1, 2, 3, 4, 5)) 

1501 

1502 # with two input datasets 

1503 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=[run1, tagged2]).toSet() 

1504 self.assertEqual(len(set(rows)), 6 * 3) # 6 exposures times 3 detectors; set needed to de-dupe 

1505 for dataId in rows: 

1506 self.assertCountEqual(dataId.dimensions.required, ("instrument", "detector", "exposure", "visit")) 

1507 self.assertCountEqual({dataId["exposure"] for dataId in rows}, (100, 101, 110, 111, 200, 201)) 

1508 self.assertCountEqual({dataId["visit"] for dataId in rows}, (10, 11, 20)) 

1509 self.assertCountEqual({dataId["detector"] for dataId in rows}, (1, 2, 3, 4, 5)) 

1510 

1511 # limit to single visit 

1512 rows = registry.queryDataIds( 

1513 dimensions, datasets=rawType, collections=run1, where="visit = 10", instrument="DummyCam" 

1514 ).toSet() 

1515 self.assertEqual(len(rows), 2 * 3) # 2 exposures times 3 detectors 

1516 self.assertCountEqual({dataId["exposure"] for dataId in rows}, (100, 101)) 

1517 self.assertCountEqual({dataId["visit"] for dataId in rows}, (10,)) 

1518 self.assertCountEqual({dataId["detector"] for dataId in rows}, (1, 2, 3)) 

1519 

1520 # more limiting expression, using link names instead of Table.column 

1521 rows = registry.queryDataIds( 

1522 dimensions, 

1523 datasets=rawType, 

1524 collections=run1, 

1525 where="visit = 10 and detector > 1 and 'DummyCam'=instrument", 

1526 ).toSet() 

1527 self.assertEqual(len(rows), 2 * 2) # 2 exposures times 2 detectors 

1528 self.assertCountEqual({dataId["exposure"] for dataId in rows}, (100, 101)) 

1529 self.assertCountEqual({dataId["visit"] for dataId in rows}, (10,)) 

1530 self.assertCountEqual({dataId["detector"] for dataId in rows}, (2, 3)) 

1531 

1532 # queryDataIds with only one of `datasets` and `collections` is an 

1533 # error. 

1534 with self.assertRaises(CollectionError): 

1535 registry.queryDataIds(dimensions, datasets=rawType) 

1536 with self.assertRaises(ArgumentError): 

1537 registry.queryDataIds(dimensions, collections=run1) 

1538 

1539 # expression excludes everything 

1540 rows = registry.queryDataIds( 

1541 dimensions, datasets=rawType, collections=run1, where="visit > 1000", instrument="DummyCam" 

1542 ).toSet() 

1543 self.assertEqual(len(rows), 0) 

1544 

1545 # Selecting by physical_filter, this is not in the dimensions, but it 

1546 # is a part of the full expression so it should work too. 

1547 rows = registry.queryDataIds( 

1548 dimensions, 

1549 datasets=rawType, 

1550 collections=run1, 

1551 where="physical_filter = 'dummy_r'", 

1552 instrument="DummyCam", 

1553 ).toSet() 

1554 self.assertEqual(len(rows), 2 * 3) # 2 exposures times 3 detectors 

1555 self.assertCountEqual({dataId["exposure"] for dataId in rows}, (110, 111)) 

1556 self.assertCountEqual({dataId["visit"] for dataId in rows}, (11,)) 

1557 self.assertCountEqual({dataId["detector"] for dataId in rows}, (1, 2, 3)) 

1558 

1559 def testSkyMapDimensions(self): 

1560 """Tests involving only skymap dimensions, no joins to instrument.""" 

1561 butler = self.make_butler() 

1562 registry = butler.registry 

1563 

1564 # need a bunch of dimensions and datasets for test, we want 

1565 # "band" in the test so also have to add physical_filter 

1566 # dimensions 

1567 registry.insertDimensionData("instrument", dict(instrument="DummyCam")) 

1568 registry.insertDimensionData( 

1569 "physical_filter", 

1570 dict(instrument="DummyCam", name="dummy_r", band="r"), 

1571 dict(instrument="DummyCam", name="dummy_i", band="i"), 

1572 ) 

1573 registry.insertDimensionData("skymap", dict(name="DummyMap", hash=b"sha!")) 

1574 for tract in range(10): 

1575 registry.insertDimensionData("tract", dict(skymap="DummyMap", id=tract)) 

1576 registry.insertDimensionData( 

1577 "patch", 

1578 *[dict(skymap="DummyMap", tract=tract, id=patch, cell_x=0, cell_y=0) for patch in range(10)], 

1579 ) 

1580 

1581 # dataset types 

1582 run = "tésτ" 

1583 registry.registerRun(run) 

1584 storageClass = StorageClass("testDataset") 

1585 registry.storageClasses.registerStorageClass(storageClass) 

1586 calexpType = DatasetType( 

1587 name="deepCoadd_calexp", 

1588 dimensions=registry.dimensions.conform(("skymap", "tract", "patch", "band")), 

1589 storageClass=storageClass, 

1590 ) 

1591 registry.registerDatasetType(calexpType) 

1592 mergeType = DatasetType( 

1593 name="deepCoadd_mergeDet", 

1594 dimensions=registry.dimensions.conform(("skymap", "tract", "patch")), 

1595 storageClass=storageClass, 

1596 ) 

1597 registry.registerDatasetType(mergeType) 

1598 measType = DatasetType( 

1599 name="deepCoadd_meas", 

1600 dimensions=registry.dimensions.conform(("skymap", "tract", "patch", "band")), 

1601 storageClass=storageClass, 

1602 ) 

1603 registry.registerDatasetType(measType) 

1604 

1605 dimensions = registry.dimensions.conform( 

1606 calexpType.dimensions.required | mergeType.dimensions.required | measType.dimensions.required 

1607 ) 

1608 

1609 # add pre-existing datasets 

1610 for tract in (1, 3, 5): 

1611 for patch in (2, 4, 6, 7): 

1612 dataId = dict(skymap="DummyMap", tract=tract, patch=patch) 

1613 registry.insertDatasets(mergeType, dataIds=[dataId], run=run) 

1614 for aFilter in ("i", "r"): 

1615 dataId = dict(skymap="DummyMap", tract=tract, patch=patch, band=aFilter) 

1616 registry.insertDatasets(calexpType, dataIds=[dataId], run=run) 

1617 

1618 # with empty expression 

1619 rows = registry.queryDataIds(dimensions, datasets=[calexpType, mergeType], collections=run).toSet() 

1620 self.assertEqual(len(rows), 3 * 4 * 2) # 4 tracts x 4 patches x 2 filters 

1621 for dataId in rows: 

1622 self.assertCountEqual(dataId.dimensions.required, ("skymap", "tract", "patch", "band")) 

1623 self.assertCountEqual({dataId["tract"] for dataId in rows}, (1, 3, 5)) 

1624 self.assertCountEqual({dataId["patch"] for dataId in rows}, (2, 4, 6, 7)) 

1625 self.assertCountEqual({dataId["band"] for dataId in rows}, ("i", "r")) 

1626 

1627 # limit to 2 tracts and 2 patches 

1628 rows = registry.queryDataIds( 

1629 dimensions, 

1630 datasets=[calexpType, mergeType], 

1631 collections=run, 

1632 where="tract IN (1, 5) AND patch IN (2, 7)", 

1633 skymap="DummyMap", 

1634 ).toSet() 

1635 self.assertEqual(len(rows), 2 * 2 * 2) # 2 tracts x 2 patches x 2 filters 

1636 self.assertCountEqual({dataId["tract"] for dataId in rows}, (1, 5)) 

1637 self.assertCountEqual({dataId["patch"] for dataId in rows}, (2, 7)) 

1638 self.assertCountEqual({dataId["band"] for dataId in rows}, ("i", "r")) 

1639 

1640 # limit to single filter 

1641 rows = registry.queryDataIds( 

1642 dimensions, datasets=[calexpType, mergeType], collections=run, where="band = 'i'" 

1643 ).toSet() 

1644 self.assertEqual(len(rows), 3 * 4 * 1) # 4 tracts x 4 patches x 2 filters 

1645 self.assertCountEqual({dataId["tract"] for dataId in rows}, (1, 3, 5)) 

1646 self.assertCountEqual({dataId["patch"] for dataId in rows}, (2, 4, 6, 7)) 

1647 self.assertCountEqual({dataId["band"] for dataId in rows}, ("i",)) 

1648 

1649 def do_query(): 

1650 return registry.queryDataIds( 

1651 dimensions, datasets=[calexpType, mergeType], collections=run, where="skymap = 'Mars'" 

1652 ).toSet() 

1653 

1654 self.assertEqual(len(do_query()), 0) 

1655 

1656 def testSpatialJoin(self): 

1657 """Test queries that involve spatial overlap joins.""" 

1658 butler = self.make_butler() 

1659 registry = butler.registry 

1660 self.load_data(butler, "base.yaml", "spatial.yaml") 

1661 

1662 # Dictionary of spatial DatabaseDimensionElements, keyed by the name of 

1663 # the TopologicalFamily they belong to. We'll relate all elements in 

1664 # each family to all of the elements in each other family. 

1665 families = defaultdict(set) 

1666 # Dictionary of {element.name: {dataId: region}}. 

1667 regions = {} 

1668 for element in registry.dimensions.database_elements: 

1669 if element.spatial is not None: 

1670 families[element.spatial.name].add(element) 

1671 regions[element.name] = { 

1672 record.dataId: record.region for record in registry.queryDimensionRecords(element) 

1673 } 

1674 

1675 # If this check fails, it's not necessarily a problem - it may just be 

1676 # a reasonable change to the default dimension definitions - but the 

1677 # test below depends on there being more than one family to do anything 

1678 # useful. 

1679 self.assertEqual(len(families), 2) 

1680 

1681 # Overlap DatabaseDimensionElements with each other. 

1682 for family1, family2 in itertools.combinations(families, 2): 

1683 for element1, element2 in itertools.product(families[family1], families[family2]): 

1684 dimensions = element1.minimal_group | element2.minimal_group 

1685 # Construct expected set of overlapping data IDs via a 

1686 # brute-force comparison of the regions we've already fetched. 

1687 expected = { 

1688 DataCoordinate.standardize( 

1689 {**dataId1.required, **dataId2.required}, dimensions=dimensions 

1690 ) 

1691 for (dataId1, region1), (dataId2, region2) in itertools.product( 

1692 regions[element1.name].items(), regions[element2.name].items() 

1693 ) 

1694 if not region1.isDisjointFrom(region2) 

1695 } 

1696 self.assertGreater(len(expected), 2, msg="Test that we aren't just comparing empty sets.") 

1697 queried = set(registry.queryDataIds(dimensions)) 

1698 self.assertEqual(expected, queried) 

1699 

1700 # Overlap each DatabaseDimensionElement with the commonSkyPix system. 

1701 commonSkyPix = registry.dimensions.commonSkyPix 

1702 for elementName, these_regions in regions.items(): 

1703 dimensions = registry.dimensions[elementName].minimal_group | commonSkyPix.minimal_group 

1704 expected = set() 

1705 for dataId, region in these_regions.items(): 

1706 for begin, end in commonSkyPix.pixelization.envelope(region): 

1707 expected.update( 

1708 DataCoordinate.standardize( 

1709 {commonSkyPix.name: index, **dataId.required}, dimensions=dimensions 

1710 ) 

1711 for index in range(begin, end) 

1712 ) 

1713 self.assertGreater(len(expected), 2, msg="Test that we aren't just comparing empty sets.") 

1714 queried = set(registry.queryDataIds(dimensions)) 

1715 self.assertEqual(expected, queried) 

1716 

1717 def testAbstractQuery(self): 

1718 """Test that we can run a query that just lists the known 

1719 bands. This is tricky because band is 

1720 backed by a query against physical_filter. 

1721 """ 

1722 butler = self.make_butler() 

1723 registry = butler.registry 

1724 registry.insertDimensionData("instrument", dict(name="DummyCam")) 

1725 registry.insertDimensionData( 

1726 "physical_filter", 

1727 dict(instrument="DummyCam", name="dummy_i", band="i"), 

1728 dict(instrument="DummyCam", name="dummy_i2", band="i"), 

1729 dict(instrument="DummyCam", name="dummy_r", band="r"), 

1730 ) 

1731 rows = registry.queryDataIds(["band"]).toSet() 

1732 self.assertCountEqual( 

1733 rows, 

1734 [ 

1735 DataCoordinate.standardize(band="i", universe=registry.dimensions), 

1736 DataCoordinate.standardize(band="r", universe=registry.dimensions), 

1737 ], 

1738 ) 

1739 

1740 def testAttributeManager(self): 

1741 """Test basic functionality of attribute manager.""" 

1742 # number of attributes with schema versions in a fresh database, 

1743 # 6 managers with 2 records per manager, plus config for dimensions 

1744 VERSION_COUNT = 6 * 2 + 1 

1745 

1746 butler = self.make_butler() 

1747 registry = butler._registry 

1748 attributes = registry._managers.attributes 

1749 

1750 # check what get() returns for non-existing key 

1751 self.assertIsNone(attributes.get("attr")) 

1752 self.assertEqual(attributes.get("attr", ""), "") 

1753 self.assertEqual(attributes.get("attr", "Value"), "Value") 

1754 self.assertEqual(len(list(attributes.items())), VERSION_COUNT) 

1755 

1756 # cannot store empty key or value 

1757 with self.assertRaises(ValueError): 

1758 attributes.set("", "value") 

1759 with self.assertRaises(ValueError): 

1760 attributes.set("attr", "") 

1761 

1762 # set value of non-existing key 

1763 attributes.set("attr", "value") 

1764 self.assertEqual(len(list(attributes.items())), VERSION_COUNT + 1) 

1765 self.assertEqual(attributes.get("attr"), "value") 

1766 

1767 # update value of existing key 

1768 with self.assertRaises(ButlerAttributeExistsError): 

1769 attributes.set("attr", "value2") 

1770 

1771 attributes.set("attr", "value2", force=True) 

1772 self.assertEqual(len(list(attributes.items())), VERSION_COUNT + 1) 

1773 self.assertEqual(attributes.get("attr"), "value2") 

1774 

1775 # delete existing key 

1776 self.assertTrue(attributes.delete("attr")) 

1777 self.assertEqual(len(list(attributes.items())), VERSION_COUNT) 

1778 

1779 # delete non-existing key 

1780 self.assertFalse(attributes.delete("non-attr")) 

1781 

1782 # store bunch of keys and get the list back 

1783 data = [ 

1784 ("version.core", "1.2.3"), 

1785 ("version.dimensions", "3.2.1"), 

1786 ("config.managers.opaque", "ByNameOpaqueTableStorageManager"), 

1787 ] 

1788 for key, value in data: 

1789 attributes.set(key, value) 

1790 items = dict(attributes.items()) 

1791 for key, value in data: 

1792 self.assertEqual(items[key], value) 

1793 

1794 def testQueryDatasetsDeduplication(self): 

1795 """Test that the findFirst option to queryDatasets selects datasets 

1796 from collections in the order given". 

1797 """ 

1798 butler = self.make_butler() 

1799 registry = butler.registry 

1800 self.load_data(butler, "base.yaml", "datasets.yaml") 

1801 self.assertCountEqual( 

1802 list(registry.queryDatasets("bias", collections=["imported_g", "imported_r"])), 

1803 [ 

1804 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"), 

1805 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g"), 

1806 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g"), 

1807 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r"), 

1808 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r"), 

1809 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

1810 ], 

1811 ) 

1812 self.assertCountEqual( 

1813 list(registry.queryDatasets("bias", collections=["imported_g", "imported_r"], findFirst=True)), 

1814 [ 

1815 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"), 

1816 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g"), 

1817 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g"), 

1818 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

1819 ], 

1820 ) 

1821 self.assertCountEqual( 

1822 list(registry.queryDatasets("bias", collections=["imported_r", "imported_g"], findFirst=True)), 

1823 [ 

1824 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"), 

1825 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r"), 

1826 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r"), 

1827 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

1828 ], 

1829 ) 

1830 

1831 with self.assertRaises(TypeError): 

1832 # Collection wildcards not allowed in find-first searches because 

1833 # they do not guarantee the ordering of collections. 

1834 registry.queryDatasets("bias", collections="imported_*", findFirst=True) 

1835 

1836 def testQueryDatasetsExtraDimensions(self): 

1837 butler = self.make_butler() 

1838 registry = butler.registry 

1839 self.load_data(butler, "base.yaml", "datasets.yaml") 

1840 # Bias dataset type does not include physical filter. By adding 

1841 # "physical_filter" to dimensions, we are effectively searching here 

1842 # for bias datasets with an instrument that has a specific filter 

1843 # available, even though that filter has nothing to do with the bias 

1844 # datasets we are finding. 

1845 self.assertEqual( 

1846 0, 

1847 registry.queryDatasets( 

1848 "bias", 

1849 collections=..., 

1850 dimensions=["physical_filter"], 

1851 dataId={ 

1852 "instrument": "Cam1", 

1853 "band": "not_a_real_band", 

1854 }, 

1855 ).count(), 

1856 ) 

1857 self.assertEqual( 

1858 6, 

1859 len( 

1860 set( 

1861 registry.queryDatasets( 

1862 "bias", 

1863 collections=..., 

1864 dimensions=["physical_filter"], 

1865 dataId={ 

1866 "instrument": "Cam1", 

1867 "band": "r", 

1868 }, 

1869 ) 

1870 ) 

1871 ), 

1872 ) 

1873 

1874 def testQueryResults(self): 

1875 """Test querying for data IDs and then manipulating the QueryResults 

1876 object returned to perform other queries. 

1877 """ 

1878 butler = self.make_butler() 

1879 registry = butler.registry 

1880 self.load_data(butler, "base.yaml", "datasets.yaml") 

1881 bias = registry.getDatasetType("bias") 

1882 flat = registry.getDatasetType("flat") 

1883 # Obtain expected results from methods other than those we're testing 

1884 # here. That includes: 

1885 # - the dimensions of the data IDs we want to query: 

1886 expected_dimensions = registry.dimensions.conform(["detector", "physical_filter"]) 

1887 # - the dimensions of some other data IDs we'll extract from that: 

1888 expected_subset_dimensions = registry.dimensions.conform(["detector"]) 

1889 # - the data IDs we expect to obtain from the first queries: 

1890 expectedDataIds = DataCoordinateSet( 

1891 { 

1892 DataCoordinate.standardize( 

1893 instrument="Cam1", detector=d, physical_filter=p, universe=registry.dimensions 

1894 ) 

1895 for d, p in itertools.product({1, 2, 3}, {"Cam1-G", "Cam1-R1", "Cam1-R2"}) 

1896 }, 

1897 dimensions=expected_dimensions, 

1898 hasFull=False, 

1899 hasRecords=False, 

1900 ) 

1901 # - the flat datasets we expect to find from those data IDs, in just 

1902 # one collection (so deduplication is irrelevant): 

1903 expectedFlats = [ 

1904 registry.findDataset( 

1905 flat, instrument="Cam1", detector=1, physical_filter="Cam1-R1", collections="imported_r" 

1906 ), 

1907 registry.findDataset( 

1908 flat, instrument="Cam1", detector=2, physical_filter="Cam1-R1", collections="imported_r" 

1909 ), 

1910 registry.findDataset( 

1911 flat, instrument="Cam1", detector=3, physical_filter="Cam1-R2", collections="imported_r" 

1912 ), 

1913 ] 

1914 # - the data IDs we expect to extract from that: 

1915 expectedSubsetDataIds = expectedDataIds.subset(expected_subset_dimensions) 

1916 # - the bias datasets we expect to find from those data IDs, after we 

1917 # subset-out the physical_filter dimension, both with duplicates: 

1918 expectedAllBiases = [ 

1919 registry.findDataset(bias, instrument="Cam1", detector=1, collections="imported_g"), 

1920 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_g"), 

1921 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_g"), 

1922 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_r"), 

1923 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_r"), 

1924 ] 

1925 # - ...and without duplicates: 

1926 expectedDeduplicatedBiases = [ 

1927 registry.findDataset(bias, instrument="Cam1", detector=1, collections="imported_g"), 

1928 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_r"), 

1929 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_r"), 

1930 ] 

1931 # Test against those expected results, using a "lazy" query for the 

1932 # data IDs (which re-executes that query each time we use it to do 

1933 # something new). 

1934 dataIds = registry.queryDataIds( 

1935 ["detector", "physical_filter"], 

1936 where="detector.purpose = 'SCIENCE'", # this rejects detector=4 

1937 instrument="Cam1", 

1938 ) 

1939 self.assertEqual(dataIds.dimensions, expected_dimensions) 

1940 self.assertEqual(dataIds.toSet(), expectedDataIds) 

1941 self.assertCountEqual( 

1942 list( 

1943 dataIds.findDatasets( 

1944 flat, 

1945 collections=["imported_r"], 

1946 ) 

1947 ), 

1948 expectedFlats, 

1949 ) 

1950 subsetDataIds = dataIds.subset(expected_subset_dimensions, unique=True) 

1951 self.assertEqual(subsetDataIds.dimensions, expected_subset_dimensions) 

1952 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds) 

1953 self.assertCountEqual( 

1954 list(subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=False)), 

1955 expectedAllBiases, 

1956 ) 

1957 self.assertCountEqual( 

1958 list(subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=True)), 

1959 expectedDeduplicatedBiases, 

1960 ) 

1961 

1962 # Searching for a dataset with dimensions we had projected away 

1963 # restores those dimensions. 

1964 self.assertCountEqual( 

1965 list(subsetDataIds.findDatasets("flat", collections=["imported_r"], findFirst=True)), 

1966 expectedFlats, 

1967 ) 

1968 

1969 # Use a named dataset type that does not exist and a dataset type 

1970 # object that does not exist. 

1971 unknown_type = DatasetType("not_known", dimensions=bias.dimensions, storageClass="Exposure") 

1972 

1973 # Test both string name and dataset type object. 

1974 test_type: str | DatasetType 

1975 for test_type, test_type_name in ( 

1976 (unknown_type, unknown_type.name), 

1977 (unknown_type.name, unknown_type.name), 

1978 ): 

1979 with self.assertRaisesRegex(DatasetTypeError, expected_regex=test_type_name): 

1980 list( 

1981 subsetDataIds.findDatasets( 

1982 test_type, collections=["imported_r", "imported_g"], findFirst=True 

1983 ) 

1984 ) 

1985 

1986 # Materialize the data ID subset query, but not the dataset queries. 

1987 with subsetDataIds.materialize() as subsetDataIds: 

1988 self.assertEqual(subsetDataIds.dimensions, expected_subset_dimensions) 

1989 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds) 

1990 self.assertCountEqual( 

1991 list( 

1992 subsetDataIds.findDatasets( 

1993 bias, collections=["imported_r", "imported_g"], findFirst=False 

1994 ) 

1995 ), 

1996 expectedAllBiases, 

1997 ) 

1998 self.assertCountEqual( 

1999 list( 

2000 subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=True) 

2001 ), 

2002 expectedDeduplicatedBiases, 

2003 ) 

2004 # Materialize the original query, but none of the follow-up queries. 

2005 with dataIds.materialize() as dataIds: 

2006 self.assertEqual(dataIds.dimensions, expected_dimensions) 

2007 self.assertEqual(dataIds.toSet(), expectedDataIds) 

2008 self.assertCountEqual( 

2009 list( 

2010 dataIds.findDatasets( 

2011 flat, 

2012 collections=["imported_r"], 

2013 ) 

2014 ), 

2015 expectedFlats, 

2016 ) 

2017 subsetDataIds = dataIds.subset(expected_subset_dimensions, unique=True) 

2018 self.assertEqual(subsetDataIds.dimensions, expected_subset_dimensions) 

2019 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds) 

2020 self.assertCountEqual( 

2021 list( 

2022 subsetDataIds.findDatasets( 

2023 bias, collections=["imported_r", "imported_g"], findFirst=False 

2024 ) 

2025 ), 

2026 expectedAllBiases, 

2027 ) 

2028 self.assertCountEqual( 

2029 list( 

2030 subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=True) 

2031 ), 

2032 expectedDeduplicatedBiases, 

2033 ) 

2034 # Materialize the subset data ID query, but not the dataset 

2035 # queries. 

2036 with subsetDataIds.materialize() as subsetDataIds: 

2037 self.assertEqual(subsetDataIds.dimensions, expected_subset_dimensions) 

2038 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds) 

2039 self.assertCountEqual( 

2040 list( 

2041 subsetDataIds.findDatasets( 

2042 bias, collections=["imported_r", "imported_g"], findFirst=False 

2043 ) 

2044 ), 

2045 expectedAllBiases, 

2046 ) 

2047 self.assertCountEqual( 

2048 list( 

2049 subsetDataIds.findDatasets( 

2050 bias, collections=["imported_r", "imported_g"], findFirst=True 

2051 ) 

2052 ), 

2053 expectedDeduplicatedBiases, 

2054 ) 

2055 

2056 def testStorageClassPropagation(self): 

2057 """Test that queries for datasets respect the storage class passed in 

2058 as part of a full dataset type. 

2059 """ 

2060 butler = self.make_butler() 

2061 registry = butler.registry 

2062 self.load_data(butler, "base.yaml") 

2063 dataset_type_in_registry = DatasetType( 

2064 "tbl", dimensions=["instrument"], storageClass="Packages", universe=registry.dimensions 

2065 ) 

2066 registry.registerDatasetType(dataset_type_in_registry) 

2067 run = "run1" 

2068 registry.registerRun(run) 

2069 (inserted_ref,) = registry.insertDatasets( 

2070 dataset_type_in_registry, [registry.expandDataId(instrument="Cam1")], run=run 

2071 ) 

2072 self.assertEqual(inserted_ref.datasetType, dataset_type_in_registry) 

2073 query_dataset_type = DatasetType( 

2074 "tbl", dimensions=["instrument"], storageClass="StructuredDataDict", universe=registry.dimensions 

2075 ) 

2076 self.assertNotEqual(dataset_type_in_registry, query_dataset_type) 

2077 query_datasets_result = registry.queryDatasets(query_dataset_type, collections=[run]) 

2078 self.assertEqual(query_datasets_result.parentDatasetType, query_dataset_type) # type: ignore 

2079 (query_datasets_ref,) = query_datasets_result 

2080 self.assertEqual(query_datasets_ref.datasetType, query_dataset_type) 

2081 query_data_ids_find_datasets_result = registry.queryDataIds(["instrument"]).findDatasets( 

2082 query_dataset_type, collections=[run] 

2083 ) 

2084 self.assertEqual(query_data_ids_find_datasets_result.parentDatasetType, query_dataset_type) 

2085 (query_data_ids_find_datasets_ref,) = query_data_ids_find_datasets_result 

2086 self.assertEqual(query_data_ids_find_datasets_ref.datasetType, query_dataset_type) 

2087 query_dataset_types_result = registry.queryDatasetTypes(query_dataset_type) 

2088 self.assertEqual(list(query_dataset_types_result), [query_dataset_type]) 

2089 find_dataset_ref = registry.findDataset(query_dataset_type, instrument="Cam1", collections=[run]) 

2090 self.assertEqual(find_dataset_ref.datasetType, query_dataset_type) 

2091 

2092 def testEmptyDimensionsQueries(self): 

2093 """Test Query and QueryResults objects in the case where there are no 

2094 dimensions. 

2095 """ 

2096 # Set up test data: one dataset type, two runs, one dataset in each. 

2097 butler = self.make_butler() 

2098 registry = butler.registry 

2099 self.load_data(butler, "base.yaml") 

2100 schema = DatasetType("schema", dimensions=registry.dimensions.empty, storageClass="Catalog") 

2101 registry.registerDatasetType(schema) 

2102 dataId = DataCoordinate.make_empty(registry.dimensions) 

2103 run1 = "run1" 

2104 run2 = "run2" 

2105 registry.registerRun(run1) 

2106 registry.registerRun(run2) 

2107 (dataset1,) = registry.insertDatasets(schema, dataIds=[dataId], run=run1) 

2108 (dataset2,) = registry.insertDatasets(schema, dataIds=[dataId], run=run2) 

2109 # Query directly for both of the datasets, and each one, one at a time. 

2110 self.checkQueryResults( 

2111 registry.queryDatasets(schema, collections=[run1, run2], findFirst=False), [dataset1, dataset2] 

2112 ) 

2113 self.checkQueryResults( 

2114 registry.queryDatasets(schema, collections=[run1, run2], findFirst=True), 

2115 [dataset1], 

2116 ) 

2117 self.checkQueryResults( 

2118 registry.queryDatasets(schema, collections=[run2, run1], findFirst=True), 

2119 [dataset2], 

2120 ) 

2121 # Query for data IDs with no dimensions. 

2122 dataIds = registry.queryDataIds([]) 

2123 self.checkQueryResults(dataIds, [dataId]) 

2124 # Use queried data IDs to find the datasets. 

2125 self.checkQueryResults( 

2126 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False), 

2127 [dataset1, dataset2], 

2128 ) 

2129 self.checkQueryResults( 

2130 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), 

2131 [dataset1], 

2132 ) 

2133 self.checkQueryResults( 

2134 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), 

2135 [dataset2], 

2136 ) 

2137 # Now materialize the data ID query results and repeat those tests. 

2138 with dataIds.materialize() as dataIds: 

2139 self.checkQueryResults(dataIds, [dataId]) 

2140 self.checkQueryResults( 

2141 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), 

2142 [dataset1], 

2143 ) 

2144 self.checkQueryResults( 

2145 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), 

2146 [dataset2], 

2147 ) 

2148 # Query for non-empty data IDs, then subset that to get the empty one. 

2149 # Repeat the above tests starting from that. 

2150 dataIds = registry.queryDataIds(["instrument"]).subset(registry.dimensions.empty, unique=True) 

2151 self.checkQueryResults(dataIds, [dataId]) 

2152 self.checkQueryResults( 

2153 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False), 

2154 [dataset1, dataset2], 

2155 ) 

2156 self.checkQueryResults( 

2157 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), 

2158 [dataset1], 

2159 ) 

2160 self.checkQueryResults( 

2161 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), 

2162 [dataset2], 

2163 ) 

2164 with dataIds.materialize() as dataIds: 

2165 self.checkQueryResults(dataIds, [dataId]) 

2166 self.checkQueryResults( 

2167 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False), 

2168 [dataset1, dataset2], 

2169 ) 

2170 self.checkQueryResults( 

2171 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), 

2172 [dataset1], 

2173 ) 

2174 self.checkQueryResults( 

2175 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), 

2176 [dataset2], 

2177 ) 

2178 # Query for non-empty data IDs, then materialize, then subset to get 

2179 # the empty one. Repeat again. 

2180 with registry.queryDataIds(["instrument"]).materialize() as nonEmptyDataIds: 

2181 dataIds = nonEmptyDataIds.subset(registry.dimensions.empty, unique=True) 

2182 self.checkQueryResults(dataIds, [dataId]) 

2183 self.checkQueryResults( 

2184 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False), 

2185 [dataset1, dataset2], 

2186 ) 

2187 self.checkQueryResults( 

2188 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), 

2189 [dataset1], 

2190 ) 

2191 self.checkQueryResults( 

2192 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), 

2193 [dataset2], 

2194 ) 

2195 with dataIds.materialize() as dataIds: 

2196 self.checkQueryResults(dataIds, [dataId]) 

2197 self.checkQueryResults( 

2198 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False), 

2199 [dataset1, dataset2], 

2200 ) 

2201 self.checkQueryResults( 

2202 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), 

2203 [dataset1], 

2204 ) 

2205 self.checkQueryResults( 

2206 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), 

2207 [dataset2], 

2208 ) 

2209 # Query for non-empty data IDs with a constraint on an empty-data-ID 

2210 # dataset that exists. 

2211 dataIds = registry.queryDataIds(["instrument"], datasets="schema", collections=...) 

2212 self.checkQueryResults( 

2213 dataIds.subset(unique=True), 

2214 [DataCoordinate.standardize(instrument="Cam1", universe=registry.dimensions)], 

2215 ) 

2216 # Again query for non-empty data IDs with a constraint on empty-data-ID 

2217 # datasets, but when the datasets don't exist. We delete the existing 

2218 # dataset and query just that collection rather than creating a new 

2219 # empty collection because this is a bit less likely for our build-time 

2220 # logic to shortcut-out (via the collection summaries), and such a 

2221 # shortcut would make this test a bit more trivial than we'd like. 

2222 registry.removeDatasets([dataset2]) 

2223 dataIds = registry.queryDataIds(["instrument"], datasets="schema", collections=run2) 

2224 self.checkQueryResults(dataIds, []) 

2225 

2226 def testDimensionDataModifications(self): 

2227 """Test that modifying dimension records via: 

2228 syncDimensionData(..., update=True) and 

2229 insertDimensionData(..., replace=True) works as expected, even in the 

2230 presence of datasets using those dimensions and spatial overlap 

2231 relationships. 

2232 """ 

2233 

2234 def _unpack_range_set(ranges: lsst.sphgeom.RangeSet) -> Iterator[int]: 

2235 """Unpack a sphgeom.RangeSet into the integers it contains.""" 

2236 for begin, end in ranges: 

2237 yield from range(begin, end) 

2238 

2239 def _range_set_hull( 

2240 ranges: lsst.sphgeom.RangeSet, 

2241 pixelization: lsst.sphgeom.HtmPixelization, 

2242 ) -> lsst.sphgeom.ConvexPolygon: 

2243 """Create a ConvexPolygon hull of the region defined by a set of 

2244 HTM pixelization index ranges. 

2245 """ 

2246 points = [] 

2247 for index in _unpack_range_set(ranges): 

2248 points.extend(pixelization.triangle(index).getVertices()) 

2249 return lsst.sphgeom.ConvexPolygon(points) 

2250 

2251 # Use HTM to set up an initial parent region (one arbitrary trixel) 

2252 # and four child regions (the trixels within the parent at the next 

2253 # level. We'll use the parent as a tract/visit region and the children 

2254 # as its patch/visit_detector regions. 

2255 butler = self.make_butler() 

2256 registry = butler.registry 

2257 htm6 = registry.dimensions.skypix["htm"][6].pixelization 

2258 commonSkyPix = registry.dimensions.commonSkyPix.pixelization 

2259 index = 12288 

2260 child_ranges_small = lsst.sphgeom.RangeSet(index).scaled(4) 

2261 assert htm6.universe().contains(child_ranges_small) 

2262 child_regions_small = [htm6.triangle(i) for i in _unpack_range_set(child_ranges_small)] 

2263 parent_region_small = lsst.sphgeom.ConvexPolygon( 

2264 list(itertools.chain.from_iterable(c.getVertices() for c in child_regions_small)) 

2265 ) 

2266 assert all(parent_region_small.contains(c) for c in child_regions_small) 

2267 # Make a larger version of each child region, defined to be the set of 

2268 # htm6 trixels that overlap the original's bounding circle. Make a new 

2269 # parent that's the convex hull of the new children. 

2270 child_regions_large = [ 

2271 _range_set_hull(htm6.envelope(c.getBoundingCircle()), htm6) for c in child_regions_small 

2272 ] 

2273 assert all( 

2274 large.contains(small) 

2275 for large, small in zip(child_regions_large, child_regions_small, strict=True) 

2276 ) 

2277 parent_region_large = lsst.sphgeom.ConvexPolygon( 

2278 list(itertools.chain.from_iterable(c.getVertices() for c in child_regions_large)) 

2279 ) 

2280 assert all(parent_region_large.contains(c) for c in child_regions_large) 

2281 assert parent_region_large.contains(parent_region_small) 

2282 assert not parent_region_small.contains(parent_region_large) 

2283 assert not all(parent_region_small.contains(c) for c in child_regions_large) 

2284 # Find some commonSkyPix indices that overlap the large regions but not 

2285 # overlap the small regions. We use commonSkyPix here to make sure the 

2286 # real tests later involve what's in the database, not just post-query 

2287 # filtering of regions. 

2288 child_difference_indices = [] 

2289 for large, small in zip(child_regions_large, child_regions_small, strict=True): 

2290 difference = list(_unpack_range_set(commonSkyPix.envelope(large) - commonSkyPix.envelope(small))) 

2291 assert difference, "if this is empty, we can't test anything useful with these regions" 

2292 assert all( 

2293 not commonSkyPix.triangle(d).isDisjointFrom(large) 

2294 and commonSkyPix.triangle(d).isDisjointFrom(small) 

2295 for d in difference 

2296 ) 

2297 child_difference_indices.append(difference) 

2298 parent_difference_indices = list( 

2299 _unpack_range_set( 

2300 commonSkyPix.envelope(parent_region_large) - commonSkyPix.envelope(parent_region_small) 

2301 ) 

2302 ) 

2303 assert parent_difference_indices, "if this is empty, we can't test anything useful with these regions" 

2304 assert all( 

2305 ( 

2306 not commonSkyPix.triangle(d).isDisjointFrom(parent_region_large) 

2307 and commonSkyPix.triangle(d).isDisjointFrom(parent_region_small) 

2308 ) 

2309 for d in parent_difference_indices 

2310 ) 

2311 # Now that we've finally got those regions, we'll insert the large ones 

2312 # as tract/patch dimension records. 

2313 skymap_name = "testing_v1" 

2314 registry.insertDimensionData( 

2315 "skymap", 

2316 { 

2317 "name": skymap_name, 

2318 "hash": bytes([42]), 

2319 "tract_max": 1, 

2320 "patch_nx_max": 2, 

2321 "patch_ny_max": 2, 

2322 }, 

2323 ) 

2324 registry.insertDimensionData("tract", {"skymap": skymap_name, "id": 0, "region": parent_region_large}) 

2325 registry.insertDimensionData( 

2326 "patch", 

2327 *[ 

2328 {"skymap": skymap_name, "tract": 0, "id": n, "cell_x": n % 2, "cell_y": n // 2, "region": c} 

2329 for n, c in enumerate(child_regions_large) 

2330 ], 

2331 ) 

2332 # Add at dataset that uses these dimensions to make sure that modifying 

2333 # them doesn't disrupt foreign keys (need to make sure DB doesn't 

2334 # implement insert with replace=True as delete-then-insert). 

2335 dataset_type = DatasetType( 

2336 "coadd", 

2337 dimensions=["tract", "patch"], 

2338 universe=registry.dimensions, 

2339 storageClass="Exposure", 

2340 ) 

2341 registry.registerDatasetType(dataset_type) 

2342 registry.registerCollection("the_run", CollectionType.RUN) 

2343 registry.insertDatasets( 

2344 dataset_type, 

2345 [{"skymap": skymap_name, "tract": 0, "patch": 2}], 

2346 run="the_run", 

2347 ) 

2348 # Query for tracts and patches that overlap some "difference" htm9 

2349 # pixels; there should be overlaps, because the database has 

2350 # the "large" suite of regions. 

2351 self.assertEqual( 

2352 {0}, 

2353 { 

2354 data_id["tract"] 

2355 for data_id in registry.queryDataIds( 

2356 ["tract"], 

2357 skymap=skymap_name, 

2358 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]}, 

2359 ) 

2360 }, 

2361 ) 

2362 for patch_id, patch_difference_indices in enumerate(child_difference_indices): 

2363 self.assertIn( 

2364 patch_id, 

2365 { 

2366 data_id["patch"] 

2367 for data_id in registry.queryDataIds( 

2368 ["patch"], 

2369 skymap=skymap_name, 

2370 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]}, 

2371 ) 

2372 }, 

2373 ) 

2374 # Use sync to update the tract region and insert to update the regions 

2375 # of the patches, to the "small" suite. 

2376 updated = registry.syncDimensionData( 

2377 "tract", 

2378 {"skymap": skymap_name, "id": 0, "region": parent_region_small}, 

2379 update=True, 

2380 ) 

2381 self.assertEqual(updated, {"region": parent_region_large}) 

2382 registry.insertDimensionData( 

2383 "patch", 

2384 *[ 

2385 {"skymap": skymap_name, "tract": 0, "id": n, "cell_x": n % 2, "cell_y": n // 2, "region": c} 

2386 for n, c in enumerate(child_regions_small) 

2387 ], 

2388 replace=True, 

2389 ) 

2390 # Query again; there now should be no such overlaps, because the 

2391 # database has the "small" suite of regions. 

2392 self.assertFalse( 

2393 set( 

2394 registry.queryDataIds( 

2395 ["tract"], 

2396 skymap=skymap_name, 

2397 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]}, 

2398 ) 

2399 ) 

2400 ) 

2401 for patch_id, patch_difference_indices in enumerate(child_difference_indices): 

2402 self.assertNotIn( 

2403 patch_id, 

2404 { 

2405 data_id["patch"] 

2406 for data_id in registry.queryDataIds( 

2407 ["patch"], 

2408 skymap=skymap_name, 

2409 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]}, 

2410 ) 

2411 }, 

2412 ) 

2413 # Update back to the large regions and query one more time. 

2414 updated = registry.syncDimensionData( 

2415 "tract", 

2416 {"skymap": skymap_name, "id": 0, "region": parent_region_large}, 

2417 update=True, 

2418 ) 

2419 self.assertEqual(updated, {"region": parent_region_small}) 

2420 registry.insertDimensionData( 

2421 "patch", 

2422 *[ 

2423 {"skymap": skymap_name, "tract": 0, "id": n, "cell_x": n % 2, "cell_y": n // 2, "region": c} 

2424 for n, c in enumerate(child_regions_large) 

2425 ], 

2426 replace=True, 

2427 ) 

2428 self.assertEqual( 

2429 {0}, 

2430 { 

2431 data_id["tract"] 

2432 for data_id in registry.queryDataIds( 

2433 ["tract"], 

2434 skymap=skymap_name, 

2435 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]}, 

2436 ) 

2437 }, 

2438 ) 

2439 for patch_id, patch_difference_indices in enumerate(child_difference_indices): 

2440 self.assertIn( 

2441 patch_id, 

2442 { 

2443 data_id["patch"] 

2444 for data_id in registry.queryDataIds( 

2445 ["patch"], 

2446 skymap=skymap_name, 

2447 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]}, 

2448 ) 

2449 }, 

2450 ) 

2451 

2452 def testCalibrationCollections(self): 

2453 """Test operations on `~CollectionType.CALIBRATION` collections, 

2454 including `SqlRegistry.certify`, `SqlRegistry.decertify`, 

2455 `SqlRegistry.findDataset`, and 

2456 `DataCoordinateQueryResults.findRelatedDatasets`. 

2457 """ 

2458 # Setup - make a Registry, fill it with some datasets in 

2459 # non-calibration collections. 

2460 butler = self.make_butler() 

2461 registry = butler.registry 

2462 self.load_data(butler, "base.yaml", "datasets.yaml") 

2463 # Set up some timestamps. 

2464 t1 = astropy.time.Time("2020-01-01T01:00:00", format="isot", scale="tai") 

2465 t2 = astropy.time.Time("2020-01-01T02:00:00", format="isot", scale="tai") 

2466 t3 = astropy.time.Time("2020-01-01T03:00:00", format="isot", scale="tai") 

2467 t4 = astropy.time.Time("2020-01-01T04:00:00", format="isot", scale="tai") 

2468 t5 = astropy.time.Time("2020-01-01T05:00:00", format="isot", scale="tai") 

2469 allTimespans = [ 

2470 Timespan(a, b) for a, b in itertools.combinations([None, t1, t2, t3, t4, t5, None], r=2) 

2471 ] 

2472 # Insert some exposure records with timespans between each sequential 

2473 # pair of those. 

2474 registry.insertDimensionData( 

2475 "day_obs", {"instrument": "Cam1", "id": 20200101, "timespan": Timespan(t1, t5)} 

2476 ) 

2477 registry.insertDimensionData( 

2478 "group", 

2479 {"instrument": "Cam1", "name": "group0"}, 

2480 {"instrument": "Cam1", "name": "group1"}, 

2481 {"instrument": "Cam1", "name": "group2"}, 

2482 {"instrument": "Cam1", "name": "group3"}, 

2483 ) 

2484 registry.insertDimensionData( 

2485 "exposure", 

2486 { 

2487 "instrument": "Cam1", 

2488 "id": 0, 

2489 "group": "group0", 

2490 "obs_id": "zero", 

2491 "physical_filter": "Cam1-G", 

2492 "day_obs": 20200101, 

2493 "timespan": Timespan(t1, t2), 

2494 }, 

2495 { 

2496 "instrument": "Cam1", 

2497 "id": 1, 

2498 "group": "group1", 

2499 "obs_id": "one", 

2500 "physical_filter": "Cam1-G", 

2501 "day_obs": 20200101, 

2502 "timespan": Timespan(t2, t3), 

2503 }, 

2504 { 

2505 "instrument": "Cam1", 

2506 "id": 2, 

2507 "group": "group2", 

2508 "obs_id": "two", 

2509 "physical_filter": "Cam1-G", 

2510 "day_obs": 20200101, 

2511 "timespan": Timespan(t3, t4), 

2512 }, 

2513 { 

2514 "instrument": "Cam1", 

2515 "id": 3, 

2516 "group": "group3", 

2517 "obs_id": "three", 

2518 "physical_filter": "Cam1-G", 

2519 "day_obs": 20200101, 

2520 "timespan": Timespan(t4, t5), 

2521 }, 

2522 ) 

2523 # Get references to some datasets. 

2524 bias2a = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g") 

2525 bias3a = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g") 

2526 bias2b = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r") 

2527 bias3b = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r") 

2528 # Register the main calibration collection we'll be working with. 

2529 collection = "Cam1/calibs/default" 

2530 registry.registerCollection(collection, type=CollectionType.CALIBRATION) 

2531 # Cannot associate into a calibration collection (no timespan). 

2532 with self.assertRaises(CollectionTypeError): 

2533 registry.associate(collection, [bias2a]) 

2534 # Certify 2a dataset with [t2, t4) validity. 

2535 registry.certify(collection, [bias2a], Timespan(begin=t2, end=t4)) 

2536 # Test that we can query for this dataset via the new collection, both 

2537 # on its own and with a RUN collection. 

2538 self.assertEqual( 

2539 set(registry.queryDatasets("bias", findFirst=False, collections=collection)), 

2540 {bias2a}, 

2541 ) 

2542 self.assertEqual( 

2543 set(registry.queryDatasets("bias", findFirst=False, collections=[collection, "imported_r"])), 

2544 { 

2545 bias2a, 

2546 bias2b, 

2547 bias3b, 

2548 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

2549 }, 

2550 ) 

2551 self.assertEqual( 

2552 set(registry.queryDataIds("detector", datasets="bias", collections=collection)), 

2553 {registry.expandDataId(instrument="Cam1", detector=2)}, 

2554 ) 

2555 self.assertEqual( 

2556 set(registry.queryDataIds("detector", datasets="bias", collections=[collection, "imported_r"])), 

2557 { 

2558 registry.expandDataId(instrument="Cam1", detector=2), 

2559 registry.expandDataId(instrument="Cam1", detector=3), 

2560 registry.expandDataId(instrument="Cam1", detector=4), 

2561 }, 

2562 ) 

2563 self.assertEqual( 

2564 set( 

2565 registry.queryDataIds(["exposure", "detector"]).findRelatedDatasets( 

2566 "bias", findFirst=True, collections=[collection] 

2567 ) 

2568 ), 

2569 { 

2570 (registry.expandDataId(instrument="Cam1", detector=2, exposure=1), bias2a), 

2571 (registry.expandDataId(instrument="Cam1", detector=2, exposure=2), bias2a), 

2572 }, 

2573 ) 

2574 

2575 # We should not be able to certify 2b with anything overlapping that 

2576 # window. 

2577 with self.assertRaises(ConflictingDefinitionError): 

2578 registry.certify(collection, [bias2b], Timespan(begin=None, end=t3)) 

2579 with self.assertRaises(ConflictingDefinitionError): 

2580 registry.certify(collection, [bias2b], Timespan(begin=None, end=t5)) 

2581 with self.assertRaises(ConflictingDefinitionError): 

2582 registry.certify(collection, [bias2b], Timespan(begin=t1, end=t3)) 

2583 with self.assertRaises(ConflictingDefinitionError): 

2584 registry.certify(collection, [bias2b], Timespan(begin=t1, end=t5)) 

2585 with self.assertRaises(ConflictingDefinitionError): 

2586 registry.certify(collection, [bias2b], Timespan(begin=t1, end=None)) 

2587 with self.assertRaises(ConflictingDefinitionError): 

2588 registry.certify(collection, [bias2b], Timespan(begin=t2, end=t3)) 

2589 with self.assertRaises(ConflictingDefinitionError): 

2590 registry.certify(collection, [bias2b], Timespan(begin=t2, end=t5)) 

2591 with self.assertRaises(ConflictingDefinitionError): 

2592 registry.certify(collection, [bias2b], Timespan(begin=t2, end=None)) 

2593 # We should be able to certify 3a with a range overlapping that window, 

2594 # because it's for a different detector. 

2595 # We'll certify 3a over [t1, t3). 

2596 registry.certify(collection, [bias3a], Timespan(begin=t1, end=t3)) 

2597 # Now we'll certify 2b and 3b together over [t4, ∞). 

2598 registry.certify(collection, [bias2b, bias3b], Timespan(begin=t4, end=None)) 

2599 

2600 # Fetch all associations and check that they are what we expect. 

2601 self.assertCountEqual( 

2602 list( 

2603 registry.queryDatasetAssociations( 

2604 "bias", 

2605 collections=[collection, "imported_g", "imported_r"], 

2606 ) 

2607 ), 

2608 [ 

2609 DatasetAssociation( 

2610 ref=registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"), 

2611 collection="imported_g", 

2612 timespan=None, 

2613 ), 

2614 DatasetAssociation( 

2615 ref=registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

2616 collection="imported_r", 

2617 timespan=None, 

2618 ), 

2619 DatasetAssociation(ref=bias2a, collection="imported_g", timespan=None), 

2620 DatasetAssociation(ref=bias3a, collection="imported_g", timespan=None), 

2621 DatasetAssociation(ref=bias2b, collection="imported_r", timespan=None), 

2622 DatasetAssociation(ref=bias3b, collection="imported_r", timespan=None), 

2623 DatasetAssociation(ref=bias2a, collection=collection, timespan=Timespan(begin=t2, end=t4)), 

2624 DatasetAssociation(ref=bias3a, collection=collection, timespan=Timespan(begin=t1, end=t3)), 

2625 DatasetAssociation(ref=bias2b, collection=collection, timespan=Timespan(begin=t4, end=None)), 

2626 DatasetAssociation(ref=bias3b, collection=collection, timespan=Timespan(begin=t4, end=None)), 

2627 ], 

2628 ) 

2629 

2630 # Test dataset association query against a chained collection. 

2631 # This is a regression test for DM-53179, as well as verification 

2632 # that the flattenChains parameter has never had any effect. 

2633 butler.collections.register("chain", CollectionType.CHAINED) 

2634 butler.collections.redefine_chain("chain", [collection]) 

2635 expected_datasets = ( 

2636 DatasetAssociation(ref=bias2a, collection=collection, timespan=Timespan(begin=t2, end=t4)), 

2637 DatasetAssociation(ref=bias3a, collection=collection, timespan=Timespan(begin=t1, end=t3)), 

2638 DatasetAssociation(ref=bias2b, collection=collection, timespan=Timespan(begin=t4, end=None)), 

2639 DatasetAssociation(ref=bias3b, collection=collection, timespan=Timespan(begin=t4, end=None)), 

2640 ) 

2641 self.assertCountEqual( 

2642 list(registry.queryDatasetAssociations("bias", collections=["chain"], flattenChains=False)), 

2643 expected_datasets, 

2644 ) 

2645 self.assertCountEqual( 

2646 list(registry.queryDatasetAssociations("bias", collections=["chain"], flattenChains=True)), 

2647 expected_datasets, 

2648 ) 

2649 

2650 class Ambiguous: 

2651 """Tag class to denote lookups that should be ambiguous.""" 

2652 

2653 pass 

2654 

2655 def _assertLookup( 

2656 detector: int, timespan: Timespan, expected: DatasetRef | type[Ambiguous] | None 

2657 ) -> None: 

2658 """Local function that asserts that a bias lookup returns the given 

2659 expected result. 

2660 """ 

2661 if expected is Ambiguous: 

2662 with self.assertRaises((DatasetTypeError, LookupError)): 

2663 registry.findDataset( 

2664 "bias", 

2665 collections=collection, 

2666 instrument="Cam1", 

2667 detector=detector, 

2668 timespan=timespan, 

2669 ) 

2670 else: 

2671 self.assertEqual( 

2672 expected, 

2673 registry.findDataset( 

2674 "bias", 

2675 collections=collection, 

2676 instrument="Cam1", 

2677 detector=detector, 

2678 timespan=timespan, 

2679 ), 

2680 ) 

2681 

2682 # Systematically test lookups against expected results. 

2683 _assertLookup(detector=2, timespan=Timespan(None, t1), expected=None) 

2684 _assertLookup(detector=2, timespan=Timespan(None, t2), expected=None) 

2685 _assertLookup(detector=2, timespan=Timespan(None, t3), expected=bias2a) 

2686 _assertLookup(detector=2, timespan=Timespan(None, t4), expected=bias2a) 

2687 _assertLookup(detector=2, timespan=Timespan(None, t5), expected=Ambiguous) 

2688 _assertLookup(detector=2, timespan=Timespan(None, None), expected=Ambiguous) 

2689 _assertLookup(detector=2, timespan=Timespan(t1, t2), expected=None) 

2690 _assertLookup(detector=2, timespan=Timespan(t1, t3), expected=bias2a) 

2691 _assertLookup(detector=2, timespan=Timespan(t1, t4), expected=bias2a) 

2692 _assertLookup(detector=2, timespan=Timespan(t1, t5), expected=Ambiguous) 

2693 _assertLookup(detector=2, timespan=Timespan(t1, None), expected=Ambiguous) 

2694 _assertLookup(detector=2, timespan=Timespan(t2, t3), expected=bias2a) 

2695 _assertLookup(detector=2, timespan=Timespan(t2, t4), expected=bias2a) 

2696 _assertLookup(detector=2, timespan=Timespan(t2, t5), expected=Ambiguous) 

2697 _assertLookup(detector=2, timespan=Timespan(t2, None), expected=Ambiguous) 

2698 _assertLookup(detector=2, timespan=Timespan(t3, t4), expected=bias2a) 

2699 _assertLookup(detector=2, timespan=Timespan(t3, t5), expected=Ambiguous) 

2700 _assertLookup(detector=2, timespan=Timespan(t3, None), expected=Ambiguous) 

2701 _assertLookup(detector=2, timespan=Timespan(t4, t5), expected=bias2b) 

2702 _assertLookup(detector=2, timespan=Timespan(t4, None), expected=bias2b) 

2703 _assertLookup(detector=2, timespan=Timespan(t5, None), expected=bias2b) 

2704 _assertLookup(detector=3, timespan=Timespan(None, t1), expected=None) 

2705 _assertLookup(detector=3, timespan=Timespan(None, t2), expected=bias3a) 

2706 _assertLookup(detector=3, timespan=Timespan(None, t3), expected=bias3a) 

2707 _assertLookup(detector=3, timespan=Timespan(None, t4), expected=bias3a) 

2708 _assertLookup(detector=3, timespan=Timespan(None, t5), expected=Ambiguous) 

2709 _assertLookup(detector=3, timespan=Timespan(None, None), expected=Ambiguous) 

2710 _assertLookup(detector=3, timespan=Timespan(t1, t2), expected=bias3a) 

2711 _assertLookup(detector=3, timespan=Timespan(t1, t3), expected=bias3a) 

2712 _assertLookup(detector=3, timespan=Timespan(t1, t4), expected=bias3a) 

2713 _assertLookup(detector=3, timespan=Timespan(t1, t5), expected=Ambiguous) 

2714 _assertLookup(detector=3, timespan=Timespan(t1, None), expected=Ambiguous) 

2715 _assertLookup(detector=3, timespan=Timespan(t2, t3), expected=bias3a) 

2716 _assertLookup(detector=3, timespan=Timespan(t2, t4), expected=bias3a) 

2717 _assertLookup(detector=3, timespan=Timespan(t2, t5), expected=Ambiguous) 

2718 _assertLookup(detector=3, timespan=Timespan(t2, None), expected=Ambiguous) 

2719 _assertLookup(detector=3, timespan=Timespan(t3, t4), expected=None) 

2720 _assertLookup(detector=3, timespan=Timespan(t3, t5), expected=bias3b) 

2721 _assertLookup(detector=3, timespan=Timespan(t3, None), expected=bias3b) 

2722 _assertLookup(detector=3, timespan=Timespan(t4, t5), expected=bias3b) 

2723 _assertLookup(detector=3, timespan=Timespan(t4, None), expected=bias3b) 

2724 _assertLookup(detector=3, timespan=Timespan(t5, None), expected=bias3b) 

2725 

2726 # Test lookups via temporal joins to exposures. 

2727 self.assertEqual( 

2728 set( 

2729 registry.queryDataIds( 

2730 ["exposure", "detector"], instrument="Cam1", detector=2 

2731 ).findRelatedDatasets("bias", collections=[collection]) 

2732 ), 

2733 { 

2734 (registry.expandDataId(instrument="Cam1", exposure=1, detector=2), bias2a), 

2735 (registry.expandDataId(instrument="Cam1", exposure=2, detector=2), bias2a), 

2736 (registry.expandDataId(instrument="Cam1", exposure=3, detector=2), bias2b), 

2737 }, 

2738 ) 

2739 self.assertEqual( 

2740 set( 

2741 registry.queryDataIds( 

2742 ["exposure", "detector"], instrument="Cam1", detector=3 

2743 ).findRelatedDatasets("bias", collections=[collection]) 

2744 ), 

2745 { 

2746 (registry.expandDataId(instrument="Cam1", exposure=0, detector=3), bias3a), 

2747 (registry.expandDataId(instrument="Cam1", exposure=1, detector=3), bias3a), 

2748 (registry.expandDataId(instrument="Cam1", exposure=3, detector=3), bias3b), 

2749 }, 

2750 ) 

2751 

2752 # Decertify [t3, t5) for all data IDs, and do test lookups again. 

2753 # This should truncate bias2a to [t2, t3), leave bias3a unchanged at 

2754 # [t1, t3), and truncate bias2b and bias3b to [t5, ∞). 

2755 registry.decertify(collection=collection, datasetType="bias", timespan=Timespan(t3, t5)) 

2756 _assertLookup(detector=2, timespan=Timespan(None, t1), expected=None) 

2757 _assertLookup(detector=2, timespan=Timespan(None, t2), expected=None) 

2758 _assertLookup(detector=2, timespan=Timespan(None, t3), expected=bias2a) 

2759 _assertLookup(detector=2, timespan=Timespan(None, t4), expected=bias2a) 

2760 _assertLookup(detector=2, timespan=Timespan(None, t5), expected=bias2a) 

2761 _assertLookup(detector=2, timespan=Timespan(None, None), expected=Ambiguous) 

2762 _assertLookup(detector=2, timespan=Timespan(t1, t2), expected=None) 

2763 _assertLookup(detector=2, timespan=Timespan(t1, t3), expected=bias2a) 

2764 _assertLookup(detector=2, timespan=Timespan(t1, t4), expected=bias2a) 

2765 _assertLookup(detector=2, timespan=Timespan(t1, t5), expected=bias2a) 

2766 _assertLookup(detector=2, timespan=Timespan(t1, None), expected=Ambiguous) 

2767 _assertLookup(detector=2, timespan=Timespan(t2, t3), expected=bias2a) 

2768 _assertLookup(detector=2, timespan=Timespan(t2, t4), expected=bias2a) 

2769 _assertLookup(detector=2, timespan=Timespan(t2, t5), expected=bias2a) 

2770 _assertLookup(detector=2, timespan=Timespan(t2, None), expected=Ambiguous) 

2771 _assertLookup(detector=2, timespan=Timespan(t3, t4), expected=None) 

2772 _assertLookup(detector=2, timespan=Timespan(t3, t5), expected=None) 

2773 _assertLookup(detector=2, timespan=Timespan(t3, None), expected=bias2b) 

2774 _assertLookup(detector=2, timespan=Timespan(t4, t5), expected=None) 

2775 _assertLookup(detector=2, timespan=Timespan(t4, None), expected=bias2b) 

2776 _assertLookup(detector=2, timespan=Timespan(t5, None), expected=bias2b) 

2777 _assertLookup(detector=3, timespan=Timespan(None, t1), expected=None) 

2778 _assertLookup(detector=3, timespan=Timespan(None, t2), expected=bias3a) 

2779 _assertLookup(detector=3, timespan=Timespan(None, t3), expected=bias3a) 

2780 _assertLookup(detector=3, timespan=Timespan(None, t4), expected=bias3a) 

2781 _assertLookup(detector=3, timespan=Timespan(None, t5), expected=bias3a) 

2782 _assertLookup(detector=3, timespan=Timespan(None, None), expected=Ambiguous) 

2783 _assertLookup(detector=3, timespan=Timespan(t1, t2), expected=bias3a) 

2784 _assertLookup(detector=3, timespan=Timespan(t1, t3), expected=bias3a) 

2785 _assertLookup(detector=3, timespan=Timespan(t1, t4), expected=bias3a) 

2786 _assertLookup(detector=3, timespan=Timespan(t1, t5), expected=bias3a) 

2787 _assertLookup(detector=3, timespan=Timespan(t1, None), expected=Ambiguous) 

2788 _assertLookup(detector=3, timespan=Timespan(t2, t3), expected=bias3a) 

2789 _assertLookup(detector=3, timespan=Timespan(t2, t4), expected=bias3a) 

2790 _assertLookup(detector=3, timespan=Timespan(t2, t5), expected=bias3a) 

2791 _assertLookup(detector=3, timespan=Timespan(t2, None), expected=Ambiguous) 

2792 _assertLookup(detector=3, timespan=Timespan(t3, t4), expected=None) 

2793 _assertLookup(detector=3, timespan=Timespan(t3, t5), expected=None) 

2794 _assertLookup(detector=3, timespan=Timespan(t3, None), expected=bias3b) 

2795 _assertLookup(detector=3, timespan=Timespan(t4, t5), expected=None) 

2796 _assertLookup(detector=3, timespan=Timespan(t4, None), expected=bias3b) 

2797 _assertLookup(detector=3, timespan=Timespan(t5, None), expected=bias3b) 

2798 

2799 # Decertify everything, this time with explicit data IDs, then check 

2800 # that no lookups succeed. 

2801 registry.decertify( 

2802 collection, 

2803 "bias", 

2804 Timespan(None, None), 

2805 dataIds=[ 

2806 dict(instrument="Cam1", detector=2), 

2807 dict(instrument="Cam1", detector=3), 

2808 ], 

2809 ) 

2810 for detector in (2, 3): 

2811 for timespan in allTimespans: 

2812 _assertLookup(detector=detector, timespan=timespan, expected=None) 

2813 # Certify bias2a and bias3a over (-∞, ∞), check that all lookups return 

2814 # those. 

2815 registry.certify( 

2816 collection, 

2817 [bias2a, bias3a], 

2818 Timespan(None, None), 

2819 ) 

2820 for timespan in allTimespans: 

2821 _assertLookup(detector=2, timespan=timespan, expected=bias2a) 

2822 _assertLookup(detector=3, timespan=timespan, expected=bias3a) 

2823 # Decertify just bias2 over [t2, t4). 

2824 # This should split a single certification row into two (and leave the 

2825 # other existing row, for bias3a, alone). 

2826 registry.decertify( 

2827 collection, "bias", Timespan(t2, t4), dataIds=[dict(instrument="Cam1", detector=2)] 

2828 ) 

2829 for timespan in allTimespans: 

2830 _assertLookup(detector=3, timespan=timespan, expected=bias3a) 

2831 overlapsBefore = timespan.overlaps(Timespan(None, t2)) 

2832 overlapsAfter = timespan.overlaps(Timespan(t4, None)) 

2833 if overlapsBefore and overlapsAfter: 

2834 expected = Ambiguous 

2835 elif overlapsBefore or overlapsAfter: 

2836 expected = bias2a 

2837 else: 

2838 expected = None 

2839 _assertLookup(detector=2, timespan=timespan, expected=expected) 

2840 

2841 def testSkipCalibs(self): 

2842 """Test how queries handle skipping of calibration collections.""" 

2843 butler = self.make_butler() 

2844 registry = butler.registry 

2845 self.load_data(butler, "base.yaml", "datasets.yaml") 

2846 

2847 coll_calib = "Cam1/calibs/default" 

2848 registry.registerCollection(coll_calib, type=CollectionType.CALIBRATION) 

2849 

2850 # Add all biases to the calibration collection. 

2851 # Without this, the logic that prunes dataset subqueries based on 

2852 # datasetType-collection summary information will fire before the logic 

2853 # we want to test below. This is a good thing (it avoids the dreaded 

2854 # NotImplementedError a bit more often) everywhere but here. 

2855 registry.certify(coll_calib, registry.queryDatasets("bias", collections=...), Timespan(None, None)) 

2856 

2857 coll_list = [coll_calib, "imported_g", "imported_r"] 

2858 chain = "Cam1/chain" 

2859 registry.registerCollection(chain, type=CollectionType.CHAINED) 

2860 registry.setCollectionChain(chain, coll_list) 

2861 

2862 # Lookup is ambiguous due to multiple datasets with the same data ID 

2863 # in the calibration collection. 

2864 with self.assertRaises(CalibrationLookupError): 

2865 list(registry.queryDatasets("bias", collections=coll_list, findFirst=True)) 

2866 

2867 # chain will skip 

2868 datasets = list(registry.queryDatasets("bias", collections=chain)) 

2869 self.assertGreater(len(datasets), 0) 

2870 

2871 dataIds = list(registry.queryDataIds(["instrument", "detector"], datasets="bias", collections=chain)) 

2872 self.assertGreater(len(dataIds), 0) 

2873 

2874 # glob will skip too 

2875 datasets = list(registry.queryDatasets("bias", collections="*d*")) 

2876 self.assertGreater(len(datasets), 0) 

2877 

2878 # regular expression will skip too 

2879 if self.supportsCollectionRegex: 

2880 pattern = re.compile(".*") 

2881 with self.assertWarns(FutureWarning): 

2882 datasets = list(registry.queryDatasets("bias", collections=pattern)) 

2883 self.assertGreater(len(datasets), 0) 

2884 

2885 # ellipsis should work as usual 

2886 datasets = list(registry.queryDatasets("bias", collections=...)) 

2887 self.assertGreater(len(datasets), 0) 

2888 

2889 # New query system correctly determines that this search is 

2890 # ambiguous, because there are multiple datasets with the same 

2891 # {instrument=Cam1, detector=2} data ID in the calibration 

2892 # collection at the beginning of the chain. 

2893 with self.assertRaises(CalibrationLookupError): 

2894 datasets = list(registry.queryDatasets("bias", collections=chain, findFirst=True)) 

2895 

2896 def testIngestTimeQuery(self): 

2897 butler = self.make_butler() 

2898 registry = butler.registry 

2899 dt0 = datetime.datetime.now(datetime.UTC) 

2900 self.load_data(butler, "base.yaml", "datasets.yaml") 

2901 dt1 = datetime.datetime.now(datetime.UTC) 

2902 

2903 datasets = list(registry.queryDatasets(..., collections=...)) 

2904 len0 = len(datasets) 

2905 self.assertGreater(len0, 0) 

2906 

2907 for where in ("ingest_date > T'2000-01-01'", "T'2000-01-01' < ingest_date"): 

2908 datasets = list(registry.queryDatasets(..., collections=..., where=where)) 

2909 len1 = len(datasets) 

2910 self.assertEqual(len0, len1) 

2911 

2912 # no one will ever use this piece of software in 30 years 

2913 for where in ("ingest_date > T'2050-01-01'", "T'2050-01-01' < ingest_date"): 

2914 datasets = list(registry.queryDatasets(..., collections=..., where=where)) 

2915 len2 = len(datasets) 

2916 self.assertEqual(len2, 0) 

2917 

2918 # Check more exact timing to make sure there is no 37 seconds offset 

2919 # (after fixing DM-30124). SQLite time precision is 1 second, make 

2920 # sure that we don't test with higher precision. 

2921 tests = [ 

2922 # format: (timestamp, operator, expected_len) 

2923 (dt0 - timedelta(seconds=1), ">", len0), 

2924 (dt0 - timedelta(seconds=1), "<", 0), 

2925 (dt1 + timedelta(seconds=1), "<", len0), 

2926 (dt1 + timedelta(seconds=1), ">", 0), 

2927 ] 

2928 for dt, op, expect_len in tests: 

2929 dt_str = dt.isoformat(sep=" ") 

2930 

2931 where = f"ingest_date {op} T'{dt_str}'" 

2932 datasets = list(registry.queryDatasets(..., collections=..., where=where)) 

2933 self.assertEqual(len(datasets), expect_len) 

2934 

2935 # same with bind using datetime or astropy Time 

2936 where = f"ingest_date {op} :ingest_time" 

2937 datasets = list( 

2938 registry.queryDatasets(..., collections=..., where=where, bind={"ingest_time": dt}) 

2939 ) 

2940 self.assertEqual(len(datasets), expect_len) 

2941 

2942 dt_astropy = astropy.time.Time(dt, format="datetime") 

2943 datasets = list( 

2944 registry.queryDatasets(..., collections=..., where=where, bind={"ingest_time": dt_astropy}) 

2945 ) 

2946 self.assertEqual(len(datasets), expect_len) 

2947 

2948 def testTimespanQueries(self): 

2949 """Test query expressions involving timespans.""" 

2950 butler = self.make_butler() 

2951 registry = butler.registry 

2952 self.load_data(butler, "ci_hsc-subset.yaml") 

2953 # All exposures in the database; mapping from ID to timespan. 

2954 visits = {record.id: record.timespan for record in registry.queryDimensionRecords("visit")} 

2955 # Just those IDs, sorted (which is also temporal sorting, because HSC 

2956 # exposure IDs are monotonically increasing). 

2957 ids = sorted(visits.keys()) 

2958 self.assertEqual(len(ids), 11) 

2959 # Pick some quasi-random indexes into `ids` to play with. 

2960 i1 = int(len(ids) * 0.1) 

2961 i2 = int(len(ids) * 0.3) 

2962 i3 = int(len(ids) * 0.6) 

2963 i4 = int(len(ids) * 0.8) 

2964 # Extract some times from those: just before the beginning of i1 (which 

2965 # should be after the end of the exposure before), exactly the 

2966 # beginning of i2, just after the beginning of i3 (and before its end), 

2967 # and the exact end of i4. 

2968 t1 = visits[ids[i1]].begin - astropy.time.TimeDelta(1.0, format="sec") 

2969 self.assertGreater(t1, visits[ids[i1 - 1]].end) 

2970 t2 = visits[ids[i2]].begin 

2971 t3 = visits[ids[i3]].begin + astropy.time.TimeDelta(1.0, format="sec") 

2972 self.assertLess(t3, visits[ids[i3]].end) 

2973 t4 = visits[ids[i4]].end 

2974 # Make sure those are actually in order. 

2975 self.assertEqual([t1, t2, t3, t4], sorted([t4, t3, t2, t1])) 

2976 

2977 bind = { 

2978 "t1": t1, 

2979 "t2": t2, 

2980 "t3": t3, 

2981 "t4": t4, 

2982 "ts23": Timespan(t2, t3), 

2983 } 

2984 

2985 def query(where): 

2986 """Return results as a sorted, deduplicated list of visit IDs. 

2987 

2988 Parameters 

2989 ---------- 

2990 where : `str` 

2991 The WHERE clause for the query. 

2992 """ 

2993 return sorted( 

2994 { 

2995 dataId["visit"] 

2996 for dataId in registry.queryDataIds("visit", instrument="HSC", bind=bind, where=where) 

2997 } 

2998 ) 

2999 

3000 # Try a bunch of timespan queries, mixing up the bounds themselves, 

3001 # where they appear in the expression, and how we get the timespan into 

3002 # the expression. 

3003 

3004 # t1 is before the start of i1, so this should not include i1. 

3005 self.assertEqual(ids[:i1], query("visit.timespan OVERLAPS (null, :t1)")) 

3006 # t2 is exactly at the start of i2, but ends are exclusive, so these 

3007 # should not include i2. 

3008 self.assertEqual(ids[i1:i2], query("(:t1, :t2) OVERLAPS visit.timespan")) 

3009 # t3 is in the middle of i3, so this should include i3. 

3010 self.assertEqual(ids[i2 : i3 + 1], query("visit.timespan OVERLAPS :ts23")) 

3011 # This one should not include t3 by the same reasoning. 

3012 # t4 is exactly at the end of i4, so this should include i4. 

3013 self.assertEqual(ids[i3 : i4 + 1], query(f"visit.timespan OVERLAPS (T'{t3.tai.isot}/tai', :t4)")) 

3014 # i4's upper bound of t4 is exclusive so this should not include t4. 

3015 self.assertEqual(ids[i4 + 1 :], query("visit.timespan OVERLAPS (:t4, NULL)")) 

3016 

3017 # Now some timespan vs. time scalar queries. 

3018 self.assertEqual(ids[i3 : i3 + 1], query("visit.timespan OVERLAPS :t3")) 

3019 self.assertEqual(ids[i3 : i3 + 1], query(f"T'{t3.tai.isot}/tai' OVERLAPS visit.timespan")) 

3020 

3021 # Empty timespans should not overlap anything. 

3022 self.assertEqual([], query("visit.timespan OVERLAPS (:t3, :t2)")) 

3023 

3024 # Make sure that expanded data IDs include the timespans. 

3025 results = list( 

3026 registry.queryDataIds(["visit"], dataId={"instrument": "HSC", "visit": ids[1]}).expanded() 

3027 ) 

3028 self.assertEqual(len(results), 1) 

3029 visit_timespan = visits[ids[1]] 

3030 self.assertEqual(results[0].timespan, visit_timespan) 

3031 visit_record = results[0].records["visit"] 

3032 assert visit_record is not None 

3033 self.assertEqual(visit_record.timespan, visit_timespan) 

3034 day_obs_record = results[0].records["day_obs"] 

3035 assert day_obs_record is not None 

3036 self.assertEqual(day_obs_record.id, 20130617) 

3037 self.assertEqual( 

3038 day_obs_record.timespan, 

3039 Timespan( 

3040 astropy.time.Time("2013-06-17T00:00:00", scale="tai"), 

3041 astropy.time.Time("2013-06-18T00:00:00", scale="tai"), 

3042 ), 

3043 ) 

3044 

3045 def testCollectionSummaries(self): 

3046 """Test recording and retrieval of collection summaries.""" 

3047 self.maxDiff = None 

3048 butler = self.make_butler() 

3049 registry = butler.registry 

3050 # Importing datasets from yaml should go through the code path where 

3051 # we update collection summaries as we insert datasets. 

3052 self.load_data(butler, "base.yaml", "datasets.yaml") 

3053 flat = registry.getDatasetType("flat") 

3054 expected1 = CollectionSummary() 

3055 expected1.dataset_types.add(registry.getDatasetType("bias")) 

3056 expected1.add_data_ids( 

3057 flat, [DataCoordinate.standardize(instrument="Cam1", universe=registry.dimensions)] 

3058 ) 

3059 self.assertEqual(registry.getCollectionSummary("imported_g"), expected1) 

3060 self.assertEqual(registry.getCollectionSummary("imported_r"), expected1) 

3061 # Create a chained collection with both of the imported runs; the 

3062 # summary should be the same, because it's a union with itself. 

3063 chain = "chain" 

3064 registry.registerCollection(chain, CollectionType.CHAINED) 

3065 registry.setCollectionChain(chain, ["imported_r", "imported_g"]) 

3066 self.assertEqual(registry.getCollectionSummary(chain), expected1) 

3067 # Associate flats only into a tagged collection and a calibration 

3068 # collection to check summaries of those. 

3069 tag = "tag" 

3070 registry.registerCollection(tag, CollectionType.TAGGED) 

3071 registry.associate(tag, registry.queryDatasets(flat, collections="imported_g")) 

3072 calibs = "calibs" 

3073 registry.registerCollection(calibs, CollectionType.CALIBRATION) 

3074 registry.certify( 

3075 calibs, registry.queryDatasets(flat, collections="imported_g"), timespan=Timespan(None, None) 

3076 ) 

3077 expected2 = expected1.copy() 

3078 expected2.dataset_types.discard("bias") 

3079 self.assertEqual(registry.getCollectionSummary(tag), expected2) 

3080 self.assertEqual(registry.getCollectionSummary(calibs), expected2) 

3081 # Explicitly calling SqlRegistry.refresh() should load those same 

3082 # summaries, via a totally different code path. 

3083 registry.refresh() 

3084 self.assertEqual(registry.getCollectionSummary("imported_g"), expected1) 

3085 self.assertEqual(registry.getCollectionSummary("imported_r"), expected1) 

3086 self.assertEqual(registry.getCollectionSummary(tag), expected2) 

3087 self.assertEqual(registry.getCollectionSummary(calibs), expected2) 

3088 

3089 def testBindInQueryDatasets(self): 

3090 """Test that the bind parameter is correctly forwarded in 

3091 queryDatasets recursion. 

3092 """ 

3093 butler = self.make_butler() 

3094 registry = butler.registry 

3095 # Importing datasets from yaml should go through the code path where 

3096 # we update collection summaries as we insert datasets. 

3097 self.load_data(butler, "base.yaml", "datasets.yaml") 

3098 self.assertEqual( 

3099 set(registry.queryDatasets("flat", band="r", collections=...)), 

3100 set( 

3101 registry.queryDatasets("flat", where="band=:my_band", bind={"my_band": "r"}, collections=...) 

3102 ), 

3103 ) 

3104 

3105 def testQueryIntRangeExpressions(self): 

3106 """Test integer range expressions in ``where`` arguments. 

3107 

3108 Note that our expressions use inclusive stop values, unlike Python's. 

3109 """ 

3110 butler = self.make_butler() 

3111 registry = butler.registry 

3112 self.load_data(butler, "base.yaml") 

3113 self.assertEqual( 

3114 set(registry.queryDataIds(["detector"], instrument="Cam1", where="detector IN (1..2)")), 

3115 {registry.expandDataId(instrument="Cam1", detector=n) for n in [1, 2]}, 

3116 ) 

3117 self.assertEqual( 

3118 set(registry.queryDataIds(["detector"], instrument="Cam1", where="detector IN (1..4:2)")), 

3119 {registry.expandDataId(instrument="Cam1", detector=n) for n in [1, 3]}, 

3120 ) 

3121 self.assertEqual( 

3122 set(registry.queryDataIds(["detector"], instrument="Cam1", where="detector IN (2..4:2)")), 

3123 {registry.expandDataId(instrument="Cam1", detector=n) for n in [2, 4]}, 

3124 ) 

3125 

3126 def testQueryResultSummaries(self): 

3127 """Test summary methods like `count`, `any`, and `explain_no_results` 

3128 on `DataCoordinateQueryResults` and `DatasetQueryResults`. 

3129 """ 

3130 butler = self.make_butler() 

3131 registry = butler.registry 

3132 self.load_data(butler, "base.yaml", "datasets.yaml", "spatial.yaml") 

3133 # Default test dataset has two collections, each with both flats and 

3134 # biases. Add a new collection with only biases. 

3135 registry.registerCollection("biases", CollectionType.TAGGED) 

3136 registry.associate("biases", registry.queryDatasets("bias", collections=["imported_g"])) 

3137 # First query yields two results, and involves no postprocessing. 

3138 query1 = registry.queryDataIds(["physical_filter"], band="r") 

3139 self.assertTrue(query1.any(execute=False, exact=False)) 

3140 self.assertTrue(query1.any(execute=True, exact=False)) 

3141 self.assertTrue(query1.any(execute=True, exact=True)) 

3142 self.assertEqual(query1.count(exact=False), 2) 

3143 self.assertEqual(query1.count(exact=True), 2) 

3144 self.assertFalse(list(query1.explain_no_results())) 

3145 # Second query should yield no results, which we should see when 

3146 # we attempt to expand the data ID. 

3147 query2 = registry.queryDataIds(["physical_filter"], band="h") 

3148 # There's no execute=False, exact=False test here because the behavior 

3149 # not something we want to guarantee in this case (and exact=False 

3150 # says either answer is legal). 

3151 self.assertFalse(query2.any(execute=True, exact=False)) 

3152 self.assertFalse(query2.any(execute=True, exact=True)) 

3153 self.assertEqual(query2.count(exact=False), 0) 

3154 self.assertEqual(query2.count(exact=True), 0) 

3155 # These queries yield no results due to various problems that can be 

3156 # spotted prior to execution, yielding helpful diagnostics. 

3157 base_query = registry.queryDataIds(["detector", "physical_filter"]) 

3158 queries_and_snippets = [ 

3159 ( 

3160 # Dataset type name doesn't match any existing dataset types. 

3161 registry.queryDatasets("nonexistent", collections=...), 

3162 ["nonexistent"], 

3163 ), 

3164 ( 

3165 # Dataset type object isn't registered. 

3166 registry.queryDatasets( 

3167 DatasetType( 

3168 "nonexistent", 

3169 dimensions=["instrument"], 

3170 universe=registry.dimensions, 

3171 storageClass="Image", 

3172 ), 

3173 collections=..., 

3174 ), 

3175 ["nonexistent"], 

3176 ), 

3177 ( 

3178 # No datasets of this type in this collection. 

3179 registry.queryDatasets("flat", collections=["biases"]), 

3180 ["flat", "biases"], 

3181 ), 

3182 ( 

3183 # No datasets of this type in this collection. 

3184 base_query.findDatasets("flat", collections=["biases"]), 

3185 ["flat", "biases"], 

3186 ), 

3187 ( 

3188 # No collections matching at all. 

3189 registry.queryDatasets("flat", collections="potato*"), 

3190 ["potato"], 

3191 ), 

3192 ] 

3193 with self.assertRaises(MissingDatasetTypeError): 

3194 # Dataset type name doesn't match any existing dataset types. 

3195 list(registry.queryDataIds(["detector"], datasets=["nonexistent"], collections=...)) 

3196 with self.assertRaises(MissingDatasetTypeError): 

3197 # Dataset type name doesn't match any existing dataset types. 

3198 registry.queryDimensionRecords("detector", datasets=["nonexistent"], collections=...).any() 

3199 with self.assertRaises(DatasetTypeExpressionError): 

3200 # queryDimensionRecords does not allow dataset type wildcards. 

3201 registry.queryDimensionRecords("detector", datasets=["f*"], collections=...).any() 

3202 for query, snippets in queries_and_snippets: 

3203 self.assertFalse(query.any(execute=False, exact=False)) 

3204 self.assertFalse(query.any(execute=True, exact=False)) 

3205 self.assertFalse(query.any(execute=True, exact=True)) 

3206 self.assertEqual(query.count(exact=False), 0) 

3207 self.assertEqual(query.count(exact=True), 0) 

3208 messages = list(query.explain_no_results()) 

3209 self.assertTrue(messages) 

3210 # Want all expected snippets to appear in at least one message. 

3211 self.assertTrue( 

3212 any( 

3213 all(snippet in message for snippet in snippets) for message in query.explain_no_results() 

3214 ), 

3215 messages, 

3216 ) 

3217 

3218 # Wildcards on dataset types are not permitted in queryDataIds. 

3219 with self.assertRaises(DatasetTypeExpressionError): 

3220 registry.queryDataIds(["detector"], datasets=re.compile("^nonexistent$"), collections=...) 

3221 

3222 # This query yields four overlaps in the database, but one is filtered 

3223 # out in postprocessing. The count queries aren't accurate because 

3224 # they don't account for duplication that happens due to an internal 

3225 # join against commonSkyPix. 

3226 query3 = registry.queryDataIds(["visit", "tract"], instrument="Cam1", skymap="SkyMap1") 

3227 self.assertEqual( 

3228 { 

3229 DataCoordinate.standardize( 

3230 instrument="Cam1", 

3231 skymap="SkyMap1", 

3232 visit=v, 

3233 tract=t, 

3234 universe=registry.dimensions, 

3235 ) 

3236 for v, t in [(1, 0), (2, 0), (2, 1)] 

3237 }, 

3238 set(query3), 

3239 ) 

3240 self.assertTrue(query3.any(execute=False, exact=False)) 

3241 self.assertTrue(query3.any(execute=True, exact=False)) 

3242 self.assertTrue(query3.any(execute=True, exact=True)) 

3243 self.assertGreaterEqual(query3.count(exact=False), 3) 

3244 self.assertGreaterEqual(query3.count(exact=True, discard=True), 3) 

3245 self.assertFalse(list(query3.explain_no_results())) 

3246 # This query yields overlaps in the database, but all are filtered 

3247 # out in postprocessing. The count queries again aren't very useful. 

3248 # We have to use `where=` here to avoid an optimization that 

3249 # (currently) skips the spatial postprocess-filtering because it 

3250 # recognizes that no spatial join is necessary. That's not ideal, but 

3251 # fixing it is out of scope for this ticket. 

3252 query4 = registry.queryDataIds( 

3253 ["visit", "tract"], 

3254 instrument="Cam1", 

3255 skymap="SkyMap1", 

3256 where="visit=1 AND detector=1 AND tract=0 AND patch=4", 

3257 ) 

3258 self.assertFalse(set(query4)) 

3259 self.assertTrue(query4.any(execute=False, exact=False)) 

3260 self.assertTrue(query4.any(execute=True, exact=False)) 

3261 self.assertFalse(query4.any(execute=True, exact=True)) 

3262 self.assertGreaterEqual(query4.count(exact=False), 1) 

3263 self.assertEqual(query4.count(exact=True, discard=True), 0) 

3264 # This query should yield results from one dataset type but not the 

3265 # other, which is not registered. 

3266 query5 = registry.queryDatasets(["bias", "nonexistent"], collections=["biases"]) 

3267 self.assertTrue(set(query5)) 

3268 self.assertTrue(query5.any(execute=False, exact=False)) 

3269 self.assertTrue(query5.any(execute=True, exact=False)) 

3270 self.assertTrue(query5.any(execute=True, exact=True)) 

3271 self.assertGreaterEqual(query5.count(exact=False), 1) 

3272 self.assertGreaterEqual(query5.count(exact=True), 1) 

3273 # This query applies a selection that yields no results, fully in the 

3274 # database. Explaining why it fails involves traversing the relation 

3275 # tree and running a LIMIT 1 query at each level that has the potential 

3276 # to remove rows. 

3277 query6 = registry.queryDimensionRecords( 

3278 "detector", where="detector.purpose = 'no-purpose'", instrument="Cam1" 

3279 ) 

3280 self.assertEqual(query6.count(exact=True), 0) 

3281 self.assertFalse(query6.any()) 

3282 

3283 def testQueryDataIdsExpressionError(self): 

3284 """Test error checking of 'where' expressions in queryDataIds.""" 

3285 butler = self.make_butler() 

3286 registry = butler.registry 

3287 self.load_data(butler, "base.yaml") 

3288 bind = {"time": astropy.time.Time("2020-01-01T01:00:00", format="isot", scale="tai")} 

3289 # The diagnostics raised are slightly different between the old query 

3290 # system (ValueError, first error string) and the new query system 

3291 # (InvalidQueryError, second error string). 

3292 with self.assertRaisesRegex( 

3293 (LookupError, InvalidQueryError), 

3294 r"(No dimension element with name 'foo' in 'foo\.bar'\.)|(Unrecognized identifier 'foo.bar')", 

3295 ): 

3296 list(registry.queryDataIds(["detector"], where="foo.bar = 12")) 

3297 with self.assertRaisesRegex( 

3298 (LookupError, InvalidQueryError), 

3299 "(Dimension element name cannot be inferred in this context.)" 

3300 "|(Unrecognized identifier 'timespan')", 

3301 ): 

3302 list(registry.queryDataIds(["detector"], where="timespan.end < :time", bind=bind)) 

3303 

3304 def testQueryDataIdsOrderBy(self): 

3305 """Test order_by and limit on result returned by queryDataIds().""" 

3306 butler = self.make_butler() 

3307 registry = butler.registry 

3308 self.load_data(butler, "base.yaml", "datasets.yaml", "spatial.yaml") 

3309 

3310 def do_query(dimensions=("visit", "tract"), datasets=None, collections=None): 

3311 return registry.queryDataIds( 

3312 dimensions, datasets=datasets, collections=collections, instrument="Cam1", skymap="SkyMap1" 

3313 ) 

3314 

3315 Test = namedtuple( 

3316 "testQueryDataIdsOrderByTest", 

3317 ("order_by", "keys", "result", "limit", "datasets", "collections"), 

3318 defaults=(None, None, None), 

3319 ) 

3320 

3321 test_data = [ 

3322 Test("tract,visit", "tract,visit", ((0, 1), (0, 2), (1, 2))), 

3323 Test("-tract,visit", "tract,visit", ((1, 2), (0, 1), (0, 2))), 

3324 Test("tract,-visit", "tract,visit", ((0, 2), (0, 1), (1, 2))), 

3325 Test("-tract,-visit", "tract,visit", ((1, 2), (0, 2), (0, 1))), 

3326 Test("tract.id,visit.id", "tract,visit", ((0, 1),), limit=(1,)), 

3327 Test("-tract,-visit", "tract,visit", ((1, 2),), limit=(1,)), 

3328 Test("tract,visit.exposure_time", "tract,visit", ((0, 2), (0, 1), (1, 2))), 

3329 Test("-tract,-visit.exposure_time", "tract,visit", ((1, 2), (0, 1), (0, 2))), 

3330 Test("tract,-exposure_time", "tract,visit", ((0, 1), (0, 2), (1, 2))), 

3331 Test("tract,visit.name", "tract,visit", ((0, 1), (0, 2), (1, 2))), 

3332 Test( 

3333 "tract,-visit.timespan.begin,visit.timespan.end", 

3334 "tract,visit", 

3335 ((0, 2), (0, 1), (1, 2)), 

3336 ), 

3337 Test("visit.day_obs,exposure.day_obs", "visit,exposure", ()), 

3338 Test("visit.timespan.begin,-exposure.timespan.begin", "visit,exposure", ()), 

3339 Test( 

3340 "tract,detector", 

3341 "tract,detector", 

3342 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)), 

3343 datasets="flat", 

3344 collections="imported_r", 

3345 ), 

3346 Test( 

3347 "tract,detector.full_name", 

3348 "tract,detector", 

3349 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)), 

3350 datasets="flat", 

3351 collections="imported_r", 

3352 ), 

3353 Test( 

3354 "tract,detector.raft,detector.name_in_raft", 

3355 "tract,detector", 

3356 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)), 

3357 datasets="flat", 

3358 collections="imported_r", 

3359 ), 

3360 ] 

3361 

3362 for test in test_data: 

3363 with self.subTest(test=repr(test)): 

3364 order_by = test.order_by.split(",") 

3365 keys = test.keys.split(",") 

3366 query = do_query(keys, test.datasets, test.collections).order_by(*order_by) 

3367 if test.limit is not None: 

3368 query = query.limit(*test.limit) 

3369 dataIds = tuple(tuple(dataId[k] for k in keys) for dataId in query) 

3370 self.assertEqual(dataIds, test.result) 

3371 

3372 # and materialize 

3373 query = do_query(keys).order_by(*order_by) 

3374 if test.limit is not None: 

3375 query = query.limit(*test.limit) 

3376 

3377 # Test exceptions for errors in a name. 

3378 # Many of these raise slightly different diagnostics in the old query 

3379 # system (ValueError, first error string) than the new query system 

3380 # (InvalidQueryError, second error string). 

3381 for order_by in ("", "-"): 

3382 with self.assertRaisesRegex((ValueError, InvalidQueryError), "Empty dimension name in ORDER BY"): 

3383 list(do_query().order_by(order_by)) 

3384 

3385 for order_by in ("undimension.name", "-undimension.name"): 

3386 with self.assertRaisesRegex( 

3387 (ValueError, InvalidQueryError), 

3388 "(Unknown dimension element 'undimension')|(Unrecognized identifier 'undimension.name')", 

3389 ): 

3390 list(do_query().order_by(order_by)) 

3391 

3392 for order_by in ("attract", "-attract"): 

3393 with self.assertRaisesRegex( 

3394 (ValueError, InvalidQueryError), 

3395 "(Metadata 'attract' cannot be found in any dimension)|(Unrecognized identifier 'attract')", 

3396 ): 

3397 list(do_query().order_by(order_by)) 

3398 

3399 with self.assertRaisesRegex( 

3400 (ValueError, InvalidQueryError), 

3401 "(Metadata 'exposure_time' exists in more than one dimension)" 

3402 "|(Ambiguous identifier 'exposure_time' matches multiple fields)", 

3403 ): 

3404 list(do_query(("exposure", "visit")).order_by("exposure_time")) 

3405 

3406 with self.assertRaisesRegex( 

3407 (ValueError, InvalidQueryError), 

3408 r"(Timespan exists in more than one dimension element \(day_obs, exposure, visit\); " 

3409 r"qualify timespan with specific dimension name\.)|" 

3410 r"(Ambiguous identifier 'timespan' matches multiple fields)", 

3411 ): 

3412 list(do_query(("exposure", "visit")).order_by("timespan.begin")) 

3413 

3414 with self.assertRaisesRegex( 

3415 (ValueError, InvalidQueryError), 

3416 "(Cannot find any temporal dimension element for 'timespan.begin')" 

3417 "|(Unrecognized identifier 'timespan')", 

3418 ): 

3419 list(do_query("tract").order_by("timespan.begin")) 

3420 

3421 with self.assertRaisesRegex( 

3422 (ValueError, InvalidQueryError), 

3423 "(Cannot use 'timespan.begin' with non-temporal element)" 

3424 "|(Unrecognized field 'timespan' for tract)", 

3425 ): 

3426 list(do_query("tract").order_by("tract.timespan.begin")) 

3427 

3428 with self.assertRaisesRegex( 

3429 (ValueError, InvalidQueryError), 

3430 "(Field 'name' does not exist in 'tract')|(Unrecognized field 'name' for tract.)", 

3431 ): 

3432 list(do_query("tract").order_by("tract.name")) 

3433 

3434 with self.assertRaisesRegex( 

3435 (ValueError, InvalidQueryError), 

3436 r"(Unknown dimension element 'timestamp'; perhaps you meant 'timespan.begin'\?)" 

3437 r"|(Unrecognized identifier 'timestamp.begin')", 

3438 ): 

3439 list(do_query("visit").order_by("timestamp.begin")) 

3440 

3441 def testQueryDataIdsGovernorExceptions(self): 

3442 """Test exceptions raised by queryDataIds() for incorrect governors.""" 

3443 butler = self.make_butler() 

3444 registry = butler.registry 

3445 self.load_data(butler, "base.yaml", "datasets.yaml", "spatial.yaml") 

3446 

3447 def do_query(dimensions, dataId=None, where="", bind=None, **kwargs): 

3448 return registry.queryDataIds(dimensions, dataId=dataId, where=where, bind=bind, **kwargs) 

3449 

3450 Test = namedtuple( 

3451 "testQueryDataIdExceptionsTest", 

3452 ("dimensions", "dataId", "where", "bind", "kwargs", "exception", "count"), 

3453 defaults=(None, None, None, {}, None, 0), 

3454 ) 

3455 

3456 test_data = ( 

3457 Test("tract,visit", count=3), 

3458 Test("tract,visit", kwargs={"instrument": "Cam1", "skymap": "SkyMap1"}, count=3), 

3459 Test("tract,visit", kwargs={"instrument": "Cam2", "skymap": "SkyMap1"}, count=0), 

3460 Test("tract,visit", dataId={"instrument": "Cam1", "skymap": "SkyMap1"}, count=3), 

3461 Test("tract,visit", dataId={"instrument": "Cam1", "skymap": "SkyMap2"}, count=0), 

3462 Test("tract,visit", where="instrument='Cam1' AND skymap='SkyMap1'", count=3), 

3463 Test("tract,visit", where="instrument='Cam1' AND skymap='SkyMap5'", count=0), 

3464 Test( 

3465 "tract,visit", 

3466 where="instrument=:cam AND skymap=:map", 

3467 bind={"cam": "Cam1", "map": "SkyMap1"}, 

3468 count=3, 

3469 ), 

3470 Test( 

3471 "tract,visit", 

3472 where="instrument=:cam AND skymap=:map", 

3473 bind={"cam": "Cam", "map": "SkyMap"}, 

3474 count=0, 

3475 ), 

3476 ) 

3477 

3478 for test in test_data: 

3479 print(test) 

3480 dimensions = test.dimensions.split(",") 

3481 if test.exception: 

3482 with self.assertRaises(test.exception): 

3483 with ExitStack() as stack: 

3484 if test.exception == DataIdValueError: 

3485 stack.enter_context(self.assertWarns(FutureWarning)) 

3486 do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs).count() 

3487 else: 

3488 query = do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs) 

3489 print(list(query)) 

3490 self.assertEqual(query.count(discard=True), test.count) 

3491 

3492 # and materialize 

3493 if test.exception: 

3494 with self.assertRaises(test.exception): 

3495 with ExitStack() as stack: 

3496 if test.exception == DataIdValueError: 

3497 stack.enter_context(self.assertWarns(FutureWarning)) 

3498 query = do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs) 

3499 with query.materialize() as materialized: 

3500 materialized.count(discard=True) 

3501 else: 

3502 query = do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs) 

3503 with query.materialize() as materialized: 

3504 self.assertEqual(materialized.count(discard=True), test.count) 

3505 

3506 def testQueryDimensionRecordsOrderBy(self): 

3507 """Test order_by and limit on result returned by 

3508 queryDimensionRecords(). 

3509 """ 

3510 butler = self.make_butler() 

3511 registry = butler.registry 

3512 self.load_data(butler, "base.yaml", "datasets.yaml", "spatial.yaml") 

3513 

3514 def do_query(element, datasets=None, collections=None): 

3515 return registry.queryDimensionRecords( 

3516 element, instrument="Cam1", datasets=datasets, collections=collections 

3517 ) 

3518 

3519 query = do_query("detector") 

3520 self.assertEqual(len(list(query)), 4) 

3521 

3522 Test = namedtuple( 

3523 "testQueryDataIdsOrderByTest", 

3524 ("element", "order_by", "result", "limit", "datasets", "collections"), 

3525 defaults=(None, None, None), 

3526 ) 

3527 

3528 test_data = [ 

3529 Test("detector", "detector", (1, 2, 3, 4)), 

3530 Test("detector", "-detector", (4, 3, 2, 1)), 

3531 Test("detector", "raft,-name_in_raft", (2, 1, 4, 3)), 

3532 Test("detector", "-detector.purpose", (4,), limit=(1,)), 

3533 Test("visit", "visit", (1, 2)), 

3534 Test("visit", "-visit.id", (2, 1)), 

3535 Test("visit", "zenith_angle", (1, 2)), 

3536 Test("visit", "-visit.name", (2, 1)), 

3537 Test("visit", "day_obs,-visit.timespan.begin", (2, 1)), 

3538 ] 

3539 

3540 def do_test(test: Test): 

3541 order_by = test.order_by.split(",") 

3542 query = do_query(test.element).order_by(*order_by) 

3543 if test.limit is not None: 

3544 query = query.limit(*test.limit) 

3545 dataIds = tuple(rec.id for rec in query) 

3546 self.assertEqual(dataIds, test.result) 

3547 

3548 for test in test_data: 

3549 do_test(test) 

3550 

3551 # errors in a name 

3552 for order_by in ("", "-"): 

3553 with self.assertRaisesRegex( 

3554 (ValueError, InvalidQueryError), 

3555 "(Empty dimension name in ORDER BY)|(Unrecognized identifier)", 

3556 ): 

3557 list(do_query("detector").order_by(order_by)) 

3558 

3559 for order_by in ("undimension.name", "-undimension.name"): 

3560 with self.assertRaisesRegex( 

3561 (ValueError, InvalidQueryError), 

3562 "(Element name mismatch: 'undimension')|(Unrecognized identifier)", 

3563 ): 

3564 list(do_query("detector").order_by(order_by)) 

3565 

3566 for order_by in ("attract", "-attract"): 

3567 with self.assertRaisesRegex( 

3568 (ValueError, InvalidQueryError), 

3569 "(Field 'attract' does not exist in 'detector'.)|(Unrecognized identifier)", 

3570 ): 

3571 list(do_query("detector").order_by(order_by)) 

3572 

3573 for order_by in ("timestamp.begin", "-timestamp.begin"): 

3574 with self.assertRaisesRegex( 

3575 (ValueError, InvalidQueryError), 

3576 r"(Element name mismatch: 'timestamp' instead of 'visit'; " 

3577 r"perhaps you meant 'timespan.begin'\?)" 

3578 r"|(Unrecognized identifier)", 

3579 ): 

3580 list(do_query("visit").order_by(order_by)) 

3581 

3582 def testQueryDimensionRecordsExceptions(self): 

3583 """Test exceptions raised by queryDimensionRecords().""" 

3584 butler = self.make_butler() 

3585 registry = butler.registry 

3586 self.load_data(butler, "base.yaml", "datasets.yaml", "spatial.yaml") 

3587 

3588 result = registry.queryDimensionRecords("detector") 

3589 self.assertEqual(result.count(), 4) 

3590 result = registry.queryDimensionRecords("detector", instrument="Cam1") 

3591 self.assertEqual(result.count(), 4) 

3592 result = registry.queryDimensionRecords("detector", dataId={"instrument": "Cam1"}) 

3593 self.assertEqual(result.count(), 4) 

3594 

3595 # Test that values specified in kwargs override those specified in 

3596 # dataId. 

3597 result = registry.queryDimensionRecords( 

3598 "detector", dataId={"instrument": "NotCam1"}, instrument="Cam1" 

3599 ) 

3600 self.assertEqual(result.count(), 4) 

3601 

3602 result = registry.queryDimensionRecords("detector", where="instrument='Cam1'") 

3603 self.assertEqual(result.count(), 4) 

3604 result = registry.queryDimensionRecords("detector", where="instrument=:instr", bind={"instr": "Cam1"}) 

3605 self.assertTrue(result.any()) 

3606 self.assertEqual(result.count(), 4) 

3607 

3608 def testDatasetConstrainedDimensionRecordQueries(self): 

3609 """Test that queryDimensionRecords works even when given a dataset 

3610 constraint whose dimensions extend beyond the requested dimension 

3611 element's. 

3612 """ 

3613 butler = self.make_butler() 

3614 registry = butler.registry 

3615 self.load_data(butler, "base.yaml", "datasets.yaml") 

3616 # Query for physical_filter dimension records, using a dataset that 

3617 # has both physical_filter and dataset dimensions. 

3618 records = registry.queryDimensionRecords( 

3619 "physical_filter", 

3620 datasets=["flat"], 

3621 collections="imported_r", 

3622 ) 

3623 self.assertEqual({record.name for record in records}, {"Cam1-R1", "Cam1-R2"}) 

3624 # Trying to constrain by all dataset types is an error. 

3625 with self.assertRaises(TypeError): 

3626 list(registry.queryDimensionRecords("physical_filter", datasets=..., collections="imported_r")) 

3627 

3628 def testSkyPixDatasetQueries(self): 

3629 """Test that we can build queries involving skypix dimensions as long 

3630 as a dataset type that uses those dimensions is included. 

3631 """ 

3632 butler = self.make_butler() 

3633 registry = butler.registry 

3634 self.load_data(butler, "base.yaml") 

3635 dataset_type = DatasetType( 

3636 "a", dimensions=["htm7", "instrument"], universe=registry.dimensions, storageClass="int" 

3637 ) 

3638 registry.registerDatasetType(dataset_type) 

3639 run = "r" 

3640 registry.registerRun(run) 

3641 # First try queries where there are no datasets; the concern is whether 

3642 # we can even build and execute these queries without raising, even 

3643 # when "doomed" query shortcuts are in play. 

3644 self.assertFalse( 

3645 list(registry.queryDataIds(["htm7", "instrument"], datasets=dataset_type, collections=run)) 

3646 ) 

3647 self.assertFalse(list(registry.queryDatasets(dataset_type, collections=run))) 

3648 # Now add a dataset and see that we can get it back. 

3649 htm7 = registry.dimensions.skypix["htm"][7].pixelization 

3650 data_id = registry.expandDataId(instrument="Cam1", htm7=htm7.universe()[0][0]) 

3651 (ref,) = registry.insertDatasets(dataset_type, [data_id], run=run) 

3652 self.assertEqual( 

3653 set(registry.queryDataIds(["htm7", "instrument"], datasets=dataset_type, collections=run)), 

3654 {data_id}, 

3655 ) 

3656 self.assertEqual(set(registry.queryDatasets(dataset_type, collections=run)), {ref}) 

3657 

3658 def testDatasetIdFactory(self): 

3659 """Simple test for DatasetIdFactory, mostly to catch potential changes 

3660 in its API. 

3661 """ 

3662 butler = self.make_butler() 

3663 registry = butler.registry 

3664 factory = DatasetIdFactory() 

3665 dataset_type = DatasetType( 

3666 "datasetType", 

3667 dimensions=["detector", "instrument"], 

3668 universe=registry.dimensions, 

3669 storageClass="int", 

3670 ) 

3671 run = "run" 

3672 data_id = DataCoordinate.standardize( 

3673 instrument="Cam1", detector=1, dimensions=dataset_type.dimensions 

3674 ) 

3675 

3676 datasetId = factory.makeDatasetId(run, dataset_type, data_id, DatasetIdGenEnum.UNIQUE) 

3677 self.assertIsInstance(datasetId, uuid.UUID) 

3678 self.assertEqual(datasetId.version, 7) 

3679 

3680 datasetId = factory.makeDatasetId(run, dataset_type, data_id, DatasetIdGenEnum.DATAID_TYPE) 

3681 self.assertIsInstance(datasetId, uuid.UUID) 

3682 self.assertEqual(datasetId.version, 5) 

3683 

3684 datasetId = factory.makeDatasetId(run, dataset_type, data_id, DatasetIdGenEnum.DATAID_TYPE_RUN) 

3685 self.assertIsInstance(datasetId, uuid.UUID) 

3686 self.assertEqual(datasetId.version, 5) 

3687 

3688 def testExposureQueries(self): 

3689 """Test query methods using arguments sourced from the exposure log 

3690 service. 

3691 

3692 The most complete test dataset currently available to daf_butler tests 

3693 is ci_hsc-subset.yaml export , but that does not have 'exposure' 

3694 dimension records. So in this test we need to translate queries that 

3695 originally used the exposure dimension to use the (very similar) visit 

3696 dimension instead. 

3697 """ 

3698 butler = self.make_butler() 

3699 registry = butler.registry 

3700 self.load_data(butler, "ci_hsc-subset.yaml") 

3701 self.assertEqual( 

3702 [ 

3703 record.id 

3704 for record in registry.queryDimensionRecords("visit", instrument="HSC") 

3705 .order_by("visit") 

3706 .limit(5) 

3707 ], 

3708 [903334, 903336, 903338, 903342, 903344], 

3709 ) 

3710 self.assertEqual( 

3711 [ 

3712 data_id["visit"] 

3713 for data_id in registry.queryDataIds(["visit"], instrument="HSC").order_by("visit").limit(5) 

3714 ], 

3715 [903334, 903336, 903338, 903342, 903344], 

3716 ) 

3717 self.assertEqual( 

3718 [ 

3719 record.id 

3720 for record in registry.queryDimensionRecords("detector", instrument="HSC") 

3721 .order_by("full_name") 

3722 .limit(5) 

3723 ], 

3724 [25, 24, 23, 22, 18], 

3725 ) 

3726 self.assertEqual( 

3727 [ 

3728 data_id["detector"] 

3729 for data_id in registry.queryDataIds(["detector"], instrument="HSC") 

3730 .order_by("full_name") 

3731 .limit(5) 

3732 ], 

3733 [25, 24, 23, 22, 18], 

3734 ) 

3735 

3736 def test_long_query_names(self) -> None: 

3737 """Test that queries involving very long names are handled correctly. 

3738 

3739 This is especially important for PostgreSQL, which truncates symbols 

3740 longer than 64 chars, but it's worth testing for all DBs. 

3741 """ 

3742 butler = self.make_butler() 

3743 registry = butler.registry 

3744 name = "abcd" * 17 

3745 registry.registerDatasetType( 

3746 DatasetType( 

3747 name, 

3748 dimensions=(), 

3749 storageClass="Exposure", 

3750 universe=registry.dimensions, 

3751 ) 

3752 ) 

3753 # Need to search more than one collection actually containing a 

3754 # matching dataset to avoid optimizations that sidestep bugs due to 

3755 # truncation by making findFirst=True a no-op. 

3756 run1 = "run1" 

3757 registry.registerRun(run1) 

3758 run2 = "run2" 

3759 registry.registerRun(run2) 

3760 (ref1,) = registry.insertDatasets(name, [DataCoordinate.make_empty(registry.dimensions)], run1) 

3761 registry.insertDatasets(name, [DataCoordinate.make_empty(registry.dimensions)], run2) 

3762 self.assertEqual( 

3763 set(registry.queryDatasets(name, collections=[run1, run2], findFirst=True)), 

3764 {ref1}, 

3765 ) 

3766 

3767 def test_skypix_constraint_queries(self) -> None: 

3768 """Test queries spatially constrained by a skypix data ID.""" 

3769 butler = self.make_butler() 

3770 registry = butler.registry 

3771 self.load_data(butler, "base.yaml", "spatial.yaml") 

3772 patch_regions = { 

3773 (data_id["tract"], data_id["patch"]): data_id.region 

3774 for data_id in registry.queryDataIds(["patch"]).expanded() 

3775 } 

3776 skypix_dimension: SkyPixDimension = registry.dimensions["htm11"] 

3777 # This check ensures the test doesn't become trivial due to a config 

3778 # change; if it does, just pick a different HTML level. 

3779 self.assertNotEqual(skypix_dimension, registry.dimensions.commonSkyPix) 

3780 # Gather all skypix IDs that definitely overlap at least one of these 

3781 # patches. 

3782 relevant_skypix_ids = lsst.sphgeom.RangeSet() 

3783 for patch_region in patch_regions.values(): 

3784 relevant_skypix_ids |= skypix_dimension.pixelization.interior(patch_region) 

3785 # Look for a "nontrivial" skypix_id that overlaps at least one patch 

3786 # and does not overlap at least one other patch. 

3787 for skypix_id in itertools.chain.from_iterable( 

3788 range(begin, end) for begin, end in relevant_skypix_ids 

3789 ): 

3790 skypix_region = skypix_dimension.pixelization.pixel(skypix_id) 

3791 overlapping_patches = { 

3792 patch_key 

3793 for patch_key, patch_region in patch_regions.items() 

3794 if not patch_region.isDisjointFrom(skypix_region) 

3795 } 

3796 if overlapping_patches and overlapping_patches != patch_regions.keys(): 

3797 break 

3798 else: 

3799 raise RuntimeError("Could not find usable skypix ID for this dimension configuration.") 

3800 # Test that a three-way join that includes the common skypix system in 

3801 # the dimensions doesn't generate redundant join terms in the query. 

3802 with self.assertRaises(InvalidQueryError): 

3803 set( 

3804 registry.queryDataIds( 

3805 ["tract", "visit", "htm7"], skymap="SkyMap1", instrument="Cam1" 

3806 ).expanded() 

3807 ) 

3808 

3809 def test_spatial_constraint_queries(self) -> None: 

3810 """Test queries in which one spatial dimension in the constraint (data 

3811 ID or ``where`` string) constrains a different spatial dimension in the 

3812 query result columns. 

3813 """ 

3814 butler = self.make_butler() 

3815 registry = butler.registry 

3816 self.load_data(butler, "base.yaml", "spatial.yaml") 

3817 patch_regions = { 

3818 (data_id["tract"], data_id["patch"]): data_id.region 

3819 for data_id in registry.queryDataIds(["patch"]).expanded() 

3820 } 

3821 observation_regions = { 

3822 (data_id["visit"], data_id["detector"]): data_id.region 

3823 for data_id in registry.queryDataIds(["visit", "detector"]).expanded() 

3824 } 

3825 all_combos = { 

3826 (patch_key, observation_key) 

3827 for patch_key, observation_key in itertools.product(patch_regions, observation_regions) 

3828 } 

3829 overlapping_combos = { 

3830 (patch_key, observation_key) 

3831 for patch_key, observation_key in all_combos 

3832 if not patch_regions[patch_key].isDisjointFrom(observation_regions[observation_key]) 

3833 } 

3834 # Check a direct spatial join with no constraint first. 

3835 self.assertEqual( 

3836 { 

3837 ((data_id["tract"], data_id["patch"]), (data_id["visit"], data_id["detector"])) 

3838 for data_id in registry.queryDataIds(["patch", "visit", "detector"]) 

3839 }, 

3840 overlapping_combos, 

3841 ) 

3842 overlaps_by_patch: defaultdict[tuple[int, int], set[tuple[str, str]]] = defaultdict(set) 

3843 overlaps_by_observation: defaultdict[tuple[int, int], set[tuple[str, str]]] = defaultdict(set) 

3844 for patch_key, observation_key in overlapping_combos: 

3845 overlaps_by_patch[patch_key].add(observation_key) 

3846 overlaps_by_observation[observation_key].add(patch_key) 

3847 # Find patches and observations that overlap at least one of the other 

3848 # but not all of the other. 

3849 nontrivial_patch = next( 

3850 iter( 

3851 patch_key 

3852 for patch_key, observation_keys in overlaps_by_patch.items() 

3853 if observation_keys and observation_keys != observation_regions.keys() 

3854 ) 

3855 ) 

3856 nontrivial_observation = next( 

3857 iter( 

3858 observation_key 

3859 for observation_key, patch_keys in overlaps_by_observation.items() 

3860 if patch_keys and patch_keys != patch_regions.keys() 

3861 ) 

3862 ) 

3863 # Use the nontrivial patches and observations as constraints on the 

3864 # other dimensions in various ways, first via a 'where' expression. 

3865 # It's better in general to us 'bind' instead of f-strings, but these 

3866 # all integers so there are no quoting concerns. 

3867 self.assertEqual( 

3868 { 

3869 (data_id["visit"], data_id["detector"]) 

3870 for data_id in registry.queryDataIds( 

3871 ["visit", "detector"], 

3872 where=f"tract={nontrivial_patch[0]} AND patch={nontrivial_patch[1]}", 

3873 skymap="SkyMap1", 

3874 ) 

3875 }, 

3876 overlaps_by_patch[nontrivial_patch], 

3877 ) 

3878 self.assertEqual( 

3879 { 

3880 (data_id["tract"], data_id["patch"]) 

3881 for data_id in registry.queryDataIds( 

3882 ["patch"], 

3883 where=f"visit={nontrivial_observation[0]} AND detector={nontrivial_observation[1]}", 

3884 instrument="Cam1", 

3885 ) 

3886 }, 

3887 overlaps_by_observation[nontrivial_observation], 

3888 ) 

3889 # and then via the dataId argument. 

3890 self.assertEqual( 

3891 { 

3892 (data_id["visit"], data_id["detector"]) 

3893 for data_id in registry.queryDataIds( 

3894 ["visit", "detector"], 

3895 dataId={ 

3896 "tract": nontrivial_patch[0], 

3897 "patch": nontrivial_patch[1], 

3898 }, 

3899 skymap="SkyMap1", 

3900 ) 

3901 }, 

3902 overlaps_by_patch[nontrivial_patch], 

3903 ) 

3904 self.assertEqual( 

3905 { 

3906 (data_id["tract"], data_id["patch"]) 

3907 for data_id in registry.queryDataIds( 

3908 ["patch"], 

3909 dataId={ 

3910 "visit": nontrivial_observation[0], 

3911 "detector": nontrivial_observation[1], 

3912 }, 

3913 instrument="Cam1", 

3914 ) 

3915 }, 

3916 overlaps_by_observation[nontrivial_observation], 

3917 ) 

3918 

3919 def test_query_empty_collections(self) -> None: 

3920 """Test for registry query methods with empty collections. The methods 

3921 should return empty result set (or None when applicable) and provide 

3922 "doomed" diagnostics. 

3923 """ 

3924 butler = self.make_butler() 

3925 registry = butler.registry 

3926 self.load_data(butler, "base.yaml", "datasets.yaml") 

3927 

3928 # Tests for registry.findDataset() 

3929 with self.assertRaises(NoDefaultCollectionError): 

3930 registry.findDataset("bias", instrument="Cam1", detector=1) 

3931 self.assertIsNotNone(registry.findDataset("bias", instrument="Cam1", detector=1, collections=...)) 

3932 self.assertIsNone(registry.findDataset("bias", instrument="Cam1", detector=1, collections=[])) 

3933 

3934 # Tests for registry.queryDatasets() 

3935 with self.assertRaises(NoDefaultCollectionError): 

3936 registry.queryDatasets("bias") 

3937 self.assertTrue(list(registry.queryDatasets("bias", collections=...))) 

3938 

3939 result = registry.queryDatasets("bias", collections=[]) 

3940 self.assertEqual(len(list(result)), 0) 

3941 messages = list(result.explain_no_results()) 

3942 self.assertTrue(messages) 

3943 self.assertTrue(any("because collection list is empty" in message for message in messages)) 

3944 

3945 # Tests for registry.queryDataIds() 

3946 with self.assertRaises(NoDefaultCollectionError): 

3947 registry.queryDataIds("detector", datasets="bias") 

3948 self.assertTrue(list(registry.queryDataIds("detector", datasets="bias", collections=...))) 

3949 

3950 result = registry.queryDataIds("detector", datasets="bias", collections=[]) 

3951 self.assertEqual(len(list(result)), 0) 

3952 messages = list(result.explain_no_results()) 

3953 self.assertTrue(messages) 

3954 self.assertTrue(any("because collection list is empty" in message for message in messages)) 

3955 

3956 # Tests for registry.queryDimensionRecords() 

3957 with self.assertRaises(NoDefaultCollectionError): 

3958 registry.queryDimensionRecords("detector", datasets="bias") 

3959 self.assertTrue(list(registry.queryDimensionRecords("detector", datasets="bias", collections=...))) 

3960 

3961 result = registry.queryDimensionRecords("detector", datasets="bias", collections=[]) 

3962 self.assertEqual(len(list(result)), 0) 

3963 messages = list(result.explain_no_results()) 

3964 self.assertTrue(messages) 

3965 self.assertTrue(any("because collection list is empty" in message for message in messages)) 

3966 

3967 def test_dataset_followup_spatial_joins(self) -> None: 

3968 """Test queryDataIds(...).findRelatedDatasets(...) where a spatial join 

3969 is involved. 

3970 """ 

3971 butler = self.make_butler() 

3972 registry = butler.registry 

3973 self.load_data(butler, "base.yaml", "spatial.yaml") 

3974 pvi_dataset_type = DatasetType( 

3975 "pvi", {"visit", "detector"}, storageClass="StructuredDataDict", universe=registry.dimensions 

3976 ) 

3977 registry.registerDatasetType(pvi_dataset_type) 

3978 collection = "datasets" 

3979 registry.registerRun(collection) 

3980 (pvi1,) = registry.insertDatasets( 

3981 pvi_dataset_type, [{"instrument": "Cam1", "visit": 1, "detector": 1}], run=collection 

3982 ) 

3983 (pvi2,) = registry.insertDatasets( 

3984 pvi_dataset_type, [{"instrument": "Cam1", "visit": 1, "detector": 2}], run=collection 

3985 ) 

3986 (pvi3,) = registry.insertDatasets( 

3987 pvi_dataset_type, [{"instrument": "Cam1", "visit": 1, "detector": 3}], run=collection 

3988 ) 

3989 self.assertEqual( 

3990 set( 

3991 registry.queryDataIds(["patch"], skymap="SkyMap1", tract=0) 

3992 .expanded() 

3993 .findRelatedDatasets("pvi", [collection]) 

3994 ), 

3995 { 

3996 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=0), pvi1), 

3997 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=0), pvi2), 

3998 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=1), pvi2), 

3999 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=2), pvi1), 

4000 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=2), pvi2), 

4001 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=2), pvi3), 

4002 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=3), pvi2), 

4003 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=4), pvi3), 

4004 }, 

4005 ) 

4006 

4007 def test_expanded_data_id_queries(self) -> None: 

4008 """Tests for basic functionality of expanded() on queryDataIds and 

4009 queryDatasets. 

4010 """ 

4011 butler = self.make_butler() 

4012 registry = butler.registry 

4013 self.load_data(butler, "base.yaml", "spatial.yaml", "datasets.yaml") 

4014 

4015 result_obj = ( 

4016 registry.queryDataIds(["visit"], where="instrument = 'Cam1' and (visit.id = 1 or visit.id = 2)") 

4017 .expanded() 

4018 .order_by("visit.id") 

4019 ) 

4020 self.assertTrue(result_obj.hasRecords()) 

4021 visits = list(result_obj) 

4022 self.assertEqual(len(visits), 2) 

4023 

4024 self.assertEqual(visits[0]["visit"], 1) 

4025 self.assertEqual(visits[1]["visit"], 2) 

4026 self.assertEqual(visits[0].records["visit"].exposure_time, 60.0) 

4027 self.assertEqual(visits[1].records["visit"].exposure_time, 45.0) 

4028 # physical_filter is a "cacheable" dimension, so its records are loaded 

4029 # from local cache rather than being part of the DB rows. 

4030 self.assertEqual(visits[0].records["physical_filter"].band, "g") 

4031 self.assertEqual(visits[1].records["physical_filter"].band, "r") 

4032 

4033 # Make sure that we can fetch nulls in dimension records 

4034 registry.insertDimensionData( 

4035 "detector", 

4036 { 

4037 "instrument": "Cam1", 

4038 "id": 5, 

4039 "raft": "Z", 

4040 "name_in_raft": "z", 

4041 "full_name": "Zz", 

4042 "purpose": None, 

4043 }, 

4044 ) 

4045 detectors = list( 

4046 registry.queryDataIds("detector", dataId={"instrument": "Cam1", "detector": 5}).expanded() 

4047 ) 

4048 self.assertIsNone(detectors[0].records["detector"].purpose) 

4049 

4050 datasets_query = registry.queryDatasets( 

4051 "flat", collections="imported_g", where="instrument = 'Cam1' and detector <= 3" 

4052 ).expanded() 

4053 datasets = list(datasets_query) 

4054 datasets.sort(key=lambda ref: ref.dataId["detector"]) 

4055 self.assertEqual(len(datasets), 2) 

4056 self.assertEqual(datasets[0].id, uuid.UUID("60c8a65c-7290-4c38-b1de-e3b1cdcf872d")) 

4057 self.assertEqual(datasets[1].id, uuid.UUID("84239e7f-c41f-46d5-97b9-a27976b98ceb")) 

4058 # All of the dimensions for flat are "cached" dimensions. 

4059 self.assertEqual(datasets[0].dataId.records["detector"].full_name, "Ab") 

4060 self.assertEqual(datasets[1].dataId.records["detector"].full_name, "Ba") 

4061 self.assertEqual(datasets[0].dataId.records["instrument"].visit_system, 1) 

4062 assert isinstance(datasets_query, ParentDatasetQueryResults) 

4063 data_ids = list(datasets_query.dataIds) 

4064 data_ids.sort(key=lambda data_id: data_id["detector"]) 

4065 self.assertEqual(len(data_ids), 2) 

4066 self.assertEqual(data_ids[0].records["detector"].full_name, "Ab") 

4067 self.assertEqual(data_ids[1].records["detector"].full_name, "Ba") 

4068 self.assertEqual(data_ids[0].records["instrument"].visit_system, 1) 

4069 

4070 # None of the datasets in the test data include any uncached 

4071 # dimensions, so we have to set one up. 

4072 registry.registerDatasetType(DatasetType("test", ["visit"], "int", universe=registry.dimensions)) 

4073 registry.insertDatasets("test", [{"instrument": "Cam1", "visit": 1}], run="imported_g") 

4074 ref = list(registry.queryDatasets("test", collections="imported_g").expanded())[0] 

4075 self.assertEqual(ref.dataId.records["visit"].zenith_angle, 5.0) 

4076 self.assertEqual(ref.dataId.records["physical_filter"].band, "g") 

4077 self.assertEqual( 

4078 ref.dataId.timespan, 

4079 Timespan( 

4080 begin=astropy.time.Time("2021-09-09 03:00:00.000000000", scale="tai"), 

4081 end=astropy.time.Time("2021-09-09 03:01:00.000000000", scale="tai"), 

4082 ), 

4083 ) 

4084 

4085 def test_collection_summary(self) -> None: 

4086 """Test for collection summary methods.""" 

4087 butler = self.make_butler() 

4088 registry = butler.registry 

4089 self.load_data(butler, "base.yaml", "datasets.yaml", "spatial.yaml") 

4090 

4091 # Add one more dataset type, just for its existence to trigger a bug 

4092 # in `associate` (DM-44311). 

4093 test_dataset_type = DatasetType("test", ["tract", "patch"], "int", universe=registry.dimensions) 

4094 registry.registerDatasetType(test_dataset_type) 

4095 

4096 # Check for what has been imported. 

4097 summary = registry.getCollectionSummary("imported_g") 

4098 self.assertEqual(summary.dataset_types.names, {"bias", "flat"}) 

4099 self.assertEqual(summary.governors, {"instrument": {"Cam1"}}) 

4100 

4101 # Make a tagged collection and associate some datasets. 

4102 tagged_coll = "tagged" 

4103 registry.registerCollection(tagged_coll, CollectionType.TAGGED) 

4104 refsets = registry.queryDatasets(..., collections=["imported_g"]).byParentDatasetType() 

4105 for refs in refsets: 

4106 registry.associate(tagged_coll, refs) 

4107 

4108 # Summary has to have the same dataset types. 

4109 summary = registry.getCollectionSummary(tagged_coll) 

4110 self.assertEqual(summary.dataset_types.names, {"bias", "flat"}) 

4111 self.assertEqual(summary.governors, {"instrument": {"Cam1"}}) 

4112 

4113 # Remove all datasets from the tagged collection. 

4114 refs = list(registry.queryDatasets(..., collections=[tagged_coll])) 

4115 registry.disassociate(tagged_coll, refs) 

4116 

4117 # Summaries should not have changed. 

4118 summary = registry.getCollectionSummary(tagged_coll) 

4119 self.assertEqual(summary.dataset_types.names, {"bias", "flat"}) 

4120 self.assertEqual(summary.governors, {"instrument": {"Cam1"}}) 

4121 

4122 # Cleanup summaries. 

4123 registry.refresh_collection_summaries() 

4124 summary = registry.getCollectionSummary(tagged_coll) 

4125 self.assertFalse(summary.dataset_types.names) 

4126 # We do not clean governor summaries yet, but because how the query is 

4127 # run, it returns empty governors when collection is missing from 

4128 # summaries. 

4129 self.assertFalse(summary.governors) 

4130 

4131 # Add dataset with different governor, this is to test that governors 

4132 # are not actually cleaned. 

4133 refs = registry.insertDatasets("test", [{"skymap": "SkyMap1", "tract": 0, "patch": 0}], "imported_g") 

4134 registry.associate(tagged_coll, refs) 

4135 summary = registry.getCollectionSummary(tagged_coll) 

4136 self.assertEqual(summary.dataset_types.names, {"test"}) 

4137 # Note that instrument governor resurrects here, even though there are 

4138 # no datasets left with that governor. 

4139 self.assertEqual(summary.governors, {"instrument": {"Cam1"}, "skymap": {"SkyMap1"}}) 

4140 

4141 def test_temp_table_config(self) -> None: 

4142 config = self.makeRegistryConfig() 

4143 config["temporary_tables"] = False 

4144 self.assertEqual(config.areTemporaryTablesAllowed, False) 

4145 butler = self.make_butler(config) 

4146 if not isinstance(butler, DirectButler): 

4147 raise unittest.SkipTest("Test only makes sense for registry with direct database connection.") 

4148 self.assertEqual(butler._registry._db.supports_temporary_tables, False) 

4149 with self.assertRaisesRegex(ReadOnlyDatabaseError, "temporary tables"): 

4150 with butler._registry._db.temporary_table(...): 

4151 pass