Coverage for python/lsst/daf/butler/registries/sql.py: 16%

515 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-08-05 01:26 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ("SqlRegistry",) 

25 

26import contextlib 

27import logging 

28import warnings 

29from collections.abc import Iterable, Iterator, Mapping, Sequence 

30from typing import TYPE_CHECKING, Any, Literal, cast 

31 

32import sqlalchemy 

33from lsst.daf.relation import LeafRelation, Relation 

34from lsst.resources import ResourcePathExpression 

35from lsst.utils.introspection import find_outside_stacklevel 

36from lsst.utils.iteration import ensure_iterable 

37 

38from ..core import ( 

39 Config, 

40 DataCoordinate, 

41 DataId, 

42 DatasetAssociation, 

43 DatasetColumnTag, 

44 DatasetId, 

45 DatasetIdGenEnum, 

46 DatasetRef, 

47 DatasetType, 

48 Dimension, 

49 DimensionConfig, 

50 DimensionElement, 

51 DimensionGraph, 

52 DimensionRecord, 

53 DimensionUniverse, 

54 NamedKeyMapping, 

55 NameLookupMapping, 

56 Progress, 

57 StorageClassFactory, 

58 Timespan, 

59 ddl, 

60) 

61from ..core.utils import transactional 

62from ..registry import ( 

63 ArgumentError, 

64 CollectionExpressionError, 

65 CollectionSummary, 

66 CollectionType, 

67 CollectionTypeError, 

68 ConflictingDefinitionError, 

69 DataIdValueError, 

70 DatasetTypeError, 

71 DimensionNameError, 

72 InconsistentDataIdError, 

73 NoDefaultCollectionError, 

74 OrphanedRecordError, 

75 RegistryConfig, 

76 RegistryConsistencyError, 

77 RegistryDefaults, 

78 _ButlerRegistry, 

79 queries, 

80) 

81from ..registry.interfaces import ChainedCollectionRecord, RunRecord 

82from ..registry.managers import RegistryManagerInstances, RegistryManagerTypes 

83from ..registry.wildcards import CollectionWildcard, DatasetTypeWildcard 

84 

85if TYPE_CHECKING: 

86 from .._butlerConfig import ButlerConfig 

87 from ..registry._registry import CollectionArgType 

88 from ..registry.interfaces import ( 

89 CollectionRecord, 

90 Database, 

91 DatastoreRegistryBridgeManager, 

92 ObsCoreTableManager, 

93 ) 

94 

95 

96_LOG = logging.getLogger(__name__) 

97 

98 

99class SqlRegistry(_ButlerRegistry): 

100 """Registry implementation based on SQLAlchemy. 

101 

102 Parameters 

103 ---------- 

104 database : `Database` 

105 Database instance to store Registry. 

106 defaults : `RegistryDefaults` 

107 Default collection search path and/or output `~CollectionType.RUN` 

108 collection. 

109 managers : `RegistryManagerInstances` 

110 All the managers required for this registry. 

111 """ 

112 

113 defaultConfigFile: str | None = None 

114 """Path to configuration defaults. Accessed within the ``configs`` resource 

115 or relative to a search path. Can be None if no defaults specified. 

116 """ 

117 

118 @classmethod 

119 def createFromConfig( 

120 cls, 

121 config: RegistryConfig | str | None = None, 

122 dimensionConfig: DimensionConfig | str | None = None, 

123 butlerRoot: ResourcePathExpression | None = None, 

124 ) -> _ButlerRegistry: 

125 """Create registry database and return `SqlRegistry` instance. 

126 

127 This method initializes database contents, database must be empty 

128 prior to calling this method. 

129 

130 Parameters 

131 ---------- 

132 config : `RegistryConfig` or `str`, optional 

133 Registry configuration, if missing then default configuration will 

134 be loaded from registry.yaml. 

135 dimensionConfig : `DimensionConfig` or `str`, optional 

136 Dimensions configuration, if missing then default configuration 

137 will be loaded from dimensions.yaml. 

138 butlerRoot : convertible to `lsst.resources.ResourcePath`, optional 

139 Path to the repository root this `SqlRegistry` will manage. 

140 

141 Returns 

142 ------- 

143 registry : `SqlRegistry` 

144 A new `SqlRegistry` instance. 

145 """ 

146 config = cls.forceRegistryConfig(config) 

147 config.replaceRoot(butlerRoot) 

148 

149 if isinstance(dimensionConfig, str): 

150 dimensionConfig = DimensionConfig(dimensionConfig) 

151 elif dimensionConfig is None: 

152 dimensionConfig = DimensionConfig() 

153 elif not isinstance(dimensionConfig, DimensionConfig): 

154 raise TypeError(f"Incompatible Dimension configuration type: {type(dimensionConfig)}") 

155 

156 DatabaseClass = config.getDatabaseClass() 

157 database = DatabaseClass.fromUri( 

158 config.connectionString, origin=config.get("origin", 0), namespace=config.get("namespace") 

159 ) 

160 managerTypes = RegistryManagerTypes.fromConfig(config) 

161 managers = managerTypes.makeRepo(database, dimensionConfig) 

162 return cls(database, RegistryDefaults(), managers) 

163 

164 @classmethod 

165 def fromConfig( 

166 cls, 

167 config: ButlerConfig | RegistryConfig | Config | str, 

168 butlerRoot: ResourcePathExpression | None = None, 

169 writeable: bool = True, 

170 defaults: RegistryDefaults | None = None, 

171 ) -> _ButlerRegistry: 

172 """Create `Registry` subclass instance from `config`. 

173 

174 Registry database must be initialized prior to calling this method. 

175 

176 Parameters 

177 ---------- 

178 config : `ButlerConfig`, `RegistryConfig`, `Config` or `str` 

179 Registry configuration 

180 butlerRoot : `lsst.resources.ResourcePathExpression`, optional 

181 Path to the repository root this `Registry` will manage. 

182 writeable : `bool`, optional 

183 If `True` (default) create a read-write connection to the database. 

184 defaults : `RegistryDefaults`, optional 

185 Default collection search path and/or output `~CollectionType.RUN` 

186 collection. 

187 

188 Returns 

189 ------- 

190 registry : `SqlRegistry` (subclass) 

191 A new `SqlRegistry` subclass instance. 

192 """ 

193 config = cls.forceRegistryConfig(config) 

194 config.replaceRoot(butlerRoot) 

195 DatabaseClass = config.getDatabaseClass() 

196 database = DatabaseClass.fromUri( 

197 config.connectionString, 

198 origin=config.get("origin", 0), 

199 namespace=config.get("namespace"), 

200 writeable=writeable, 

201 ) 

202 managerTypes = RegistryManagerTypes.fromConfig(config) 

203 with database.session(): 

204 managers = managerTypes.loadRepo(database) 

205 if defaults is None: 

206 defaults = RegistryDefaults() 

207 return cls(database, defaults, managers) 

208 

209 def __init__(self, database: Database, defaults: RegistryDefaults, managers: RegistryManagerInstances): 

210 self._db = database 

211 self._managers = managers 

212 self.storageClasses = StorageClassFactory() 

213 # Intentionally invoke property setter to initialize defaults. This 

214 # can only be done after most of the rest of Registry has already been 

215 # initialized, and must be done before the property getter is used. 

216 self.defaults = defaults 

217 

218 def __str__(self) -> str: 

219 return str(self._db) 

220 

221 def __repr__(self) -> str: 

222 return f"SqlRegistry({self._db!r}, {self.dimensions!r})" 

223 

224 def isWriteable(self) -> bool: 

225 # Docstring inherited from lsst.daf.butler.registry.Registry 

226 return self._db.isWriteable() 

227 

228 def copy(self, defaults: RegistryDefaults | None = None) -> _ButlerRegistry: 

229 # Docstring inherited from lsst.daf.butler.registry.Registry 

230 if defaults is None: 

231 # No need to copy, because `RegistryDefaults` is immutable; we 

232 # effectively copy on write. 

233 defaults = self.defaults 

234 return type(self)(self._db, defaults, self._managers) 

235 

236 @property 

237 def dimensions(self) -> DimensionUniverse: 

238 # Docstring inherited from lsst.daf.butler.registry.Registry 

239 return self._managers.dimensions.universe 

240 

241 def refresh(self) -> None: 

242 # Docstring inherited from lsst.daf.butler.registry.Registry 

243 with self._db.transaction(): 

244 self._managers.refresh() 

245 

246 @contextlib.contextmanager 

247 def transaction(self, *, savepoint: bool = False) -> Iterator[None]: 

248 # Docstring inherited from lsst.daf.butler.registry.Registry 

249 try: 

250 with self._db.transaction(savepoint=savepoint): 

251 yield 

252 except BaseException: 

253 # TODO: this clears the caches sometimes when we wouldn't actually 

254 # need to. Can we avoid that? 

255 self._managers.dimensions.clearCaches() 

256 raise 

257 

258 def resetConnectionPool(self) -> None: 

259 """Reset SQLAlchemy connection pool for `SqlRegistry` database. 

260 

261 This operation is useful when using registry with fork-based 

262 multiprocessing. To use registry across fork boundary one has to make 

263 sure that there are no currently active connections (no session or 

264 transaction is in progress) and connection pool is reset using this 

265 method. This method should be called by the child process immediately 

266 after the fork. 

267 """ 

268 self._db._engine.dispose() 

269 

270 def registerOpaqueTable(self, tableName: str, spec: ddl.TableSpec) -> None: 

271 """Add an opaque (to the `Registry`) table for use by a `Datastore` or 

272 other data repository client. 

273 

274 Opaque table records can be added via `insertOpaqueData`, retrieved via 

275 `fetchOpaqueData`, and removed via `deleteOpaqueData`. 

276 

277 Parameters 

278 ---------- 

279 tableName : `str` 

280 Logical name of the opaque table. This may differ from the 

281 actual name used in the database by a prefix and/or suffix. 

282 spec : `ddl.TableSpec` 

283 Specification for the table to be added. 

284 """ 

285 self._managers.opaque.register(tableName, spec) 

286 

287 @transactional 

288 def insertOpaqueData(self, tableName: str, *data: dict) -> None: 

289 """Insert records into an opaque table. 

290 

291 Parameters 

292 ---------- 

293 tableName : `str` 

294 Logical name of the opaque table. Must match the name used in a 

295 previous call to `registerOpaqueTable`. 

296 data 

297 Each additional positional argument is a dictionary that represents 

298 a single row to be added. 

299 """ 

300 self._managers.opaque[tableName].insert(*data) 

301 

302 def fetchOpaqueData(self, tableName: str, **where: Any) -> Iterator[Mapping[str, Any]]: 

303 """Retrieve records from an opaque table. 

304 

305 Parameters 

306 ---------- 

307 tableName : `str` 

308 Logical name of the opaque table. Must match the name used in a 

309 previous call to `registerOpaqueTable`. 

310 where 

311 Additional keyword arguments are interpreted as equality 

312 constraints that restrict the returned rows (combined with AND); 

313 keyword arguments are column names and values are the values they 

314 must have. 

315 

316 Yields 

317 ------ 

318 row : `dict` 

319 A dictionary representing a single result row. 

320 """ 

321 yield from self._managers.opaque[tableName].fetch(**where) 

322 

323 @transactional 

324 def deleteOpaqueData(self, tableName: str, **where: Any) -> None: 

325 """Remove records from an opaque table. 

326 

327 Parameters 

328 ---------- 

329 tableName : `str` 

330 Logical name of the opaque table. Must match the name used in a 

331 previous call to `registerOpaqueTable`. 

332 where 

333 Additional keyword arguments are interpreted as equality 

334 constraints that restrict the deleted rows (combined with AND); 

335 keyword arguments are column names and values are the values they 

336 must have. 

337 """ 

338 self._managers.opaque[tableName].delete(where.keys(), where) 

339 

340 def registerCollection( 

341 self, name: str, type: CollectionType = CollectionType.TAGGED, doc: str | None = None 

342 ) -> bool: 

343 # Docstring inherited from lsst.daf.butler.registry.Registry 

344 _, registered = self._managers.collections.register(name, type, doc=doc) 

345 return registered 

346 

347 def getCollectionType(self, name: str) -> CollectionType: 

348 # Docstring inherited from lsst.daf.butler.registry.Registry 

349 return self._managers.collections.find(name).type 

350 

351 def _get_collection_record(self, name: str) -> CollectionRecord: 

352 # Docstring inherited from lsst.daf.butler.registry.Registry 

353 return self._managers.collections.find(name) 

354 

355 def registerRun(self, name: str, doc: str | None = None) -> bool: 

356 # Docstring inherited from lsst.daf.butler.registry.Registry 

357 _, registered = self._managers.collections.register(name, CollectionType.RUN, doc=doc) 

358 return registered 

359 

360 @transactional 

361 def removeCollection(self, name: str) -> None: 

362 # Docstring inherited from lsst.daf.butler.registry.Registry 

363 self._managers.collections.remove(name) 

364 

365 def getCollectionChain(self, parent: str) -> tuple[str, ...]: 

366 # Docstring inherited from lsst.daf.butler.registry.Registry 

367 record = self._managers.collections.find(parent) 

368 if record.type is not CollectionType.CHAINED: 

369 raise CollectionTypeError(f"Collection '{parent}' has type {record.type.name}, not CHAINED.") 

370 assert isinstance(record, ChainedCollectionRecord) 

371 return record.children 

372 

373 @transactional 

374 def setCollectionChain(self, parent: str, children: Any, *, flatten: bool = False) -> None: 

375 # Docstring inherited from lsst.daf.butler.registry.Registry 

376 record = self._managers.collections.find(parent) 

377 if record.type is not CollectionType.CHAINED: 

378 raise CollectionTypeError(f"Collection '{parent}' has type {record.type.name}, not CHAINED.") 

379 assert isinstance(record, ChainedCollectionRecord) 

380 children = CollectionWildcard.from_expression(children).require_ordered() 

381 if children != record.children or flatten: 

382 record.update(self._managers.collections, children, flatten=flatten) 

383 

384 def getCollectionParentChains(self, collection: str) -> set[str]: 

385 # Docstring inherited from lsst.daf.butler.registry.Registry 

386 return { 

387 record.name 

388 for record in self._managers.collections.getParentChains( 

389 self._managers.collections.find(collection).key 

390 ) 

391 } 

392 

393 def getCollectionDocumentation(self, collection: str) -> str | None: 

394 # Docstring inherited from lsst.daf.butler.registry.Registry 

395 return self._managers.collections.getDocumentation(self._managers.collections.find(collection).key) 

396 

397 def setCollectionDocumentation(self, collection: str, doc: str | None) -> None: 

398 # Docstring inherited from lsst.daf.butler.registry.Registry 

399 self._managers.collections.setDocumentation(self._managers.collections.find(collection).key, doc) 

400 

401 def getCollectionSummary(self, collection: str) -> CollectionSummary: 

402 # Docstring inherited from lsst.daf.butler.registry.Registry 

403 record = self._managers.collections.find(collection) 

404 return self._managers.datasets.getCollectionSummary(record) 

405 

406 def registerDatasetType(self, datasetType: DatasetType) -> bool: 

407 # Docstring inherited from lsst.daf.butler.registry.Registry 

408 _, inserted = self._managers.datasets.register(datasetType) 

409 return inserted 

410 

411 def removeDatasetType(self, name: str | tuple[str, ...]) -> None: 

412 # Docstring inherited from lsst.daf.butler.registry.Registry 

413 

414 for datasetTypeExpression in ensure_iterable(name): 

415 # Catch any warnings from the caller specifying a component 

416 # dataset type. This will result in an error later but the 

417 # warning could be confusing when the caller is not querying 

418 # anything. 

419 with warnings.catch_warnings(): 

420 warnings.simplefilter("ignore", category=FutureWarning) 

421 datasetTypes = list(self.queryDatasetTypes(datasetTypeExpression)) 

422 if not datasetTypes: 

423 _LOG.info("Dataset type %r not defined", datasetTypeExpression) 

424 else: 

425 for datasetType in datasetTypes: 

426 self._managers.datasets.remove(datasetType.name) 

427 _LOG.info("Removed dataset type %r", datasetType.name) 

428 

429 def getDatasetType(self, name: str) -> DatasetType: 

430 # Docstring inherited from lsst.daf.butler.registry.Registry 

431 parent_name, component = DatasetType.splitDatasetTypeName(name) 

432 storage = self._managers.datasets[parent_name] 

433 if component is None: 

434 return storage.datasetType 

435 else: 

436 return storage.datasetType.makeComponentDatasetType(component) 

437 

438 def supportsIdGenerationMode(self, mode: DatasetIdGenEnum) -> bool: 

439 # Docstring inherited from lsst.daf.butler.registry.Registry 

440 return self._managers.datasets.supportsIdGenerationMode(mode) 

441 

442 def findDataset( 

443 self, 

444 datasetType: DatasetType | str, 

445 dataId: DataId | None = None, 

446 *, 

447 collections: CollectionArgType | None = None, 

448 timespan: Timespan | None = None, 

449 **kwargs: Any, 

450 ) -> DatasetRef | None: 

451 # Docstring inherited from lsst.daf.butler.registry.Registry 

452 if collections is None: 

453 if not self.defaults.collections: 

454 raise NoDefaultCollectionError( 

455 "No collections provided to findDataset, and no defaults from registry construction." 

456 ) 

457 collections = self.defaults.collections 

458 backend = queries.SqlQueryBackend(self._db, self._managers) 

459 collection_wildcard = CollectionWildcard.from_expression(collections, require_ordered=True) 

460 if collection_wildcard.empty(): 

461 return None 

462 matched_collections = backend.resolve_collection_wildcard(collection_wildcard) 

463 parent_dataset_type, components = backend.resolve_single_dataset_type_wildcard( 

464 datasetType, components_deprecated=False 

465 ) 

466 if len(components) > 1: 

467 raise DatasetTypeError( 

468 f"findDataset requires exactly one dataset type; got multiple components {components} " 

469 f"for parent dataset type {parent_dataset_type.name}." 

470 ) 

471 component = components[0] 

472 dataId = DataCoordinate.standardize( 

473 dataId, 

474 graph=parent_dataset_type.dimensions, 

475 universe=self.dimensions, 

476 defaults=self.defaults.dataId, 

477 **kwargs, 

478 ) 

479 governor_constraints = {name: {cast(str, dataId[name])} for name in dataId.graph.governors.names} 

480 (filtered_collections,) = backend.filter_dataset_collections( 

481 [parent_dataset_type], 

482 matched_collections, 

483 governor_constraints=governor_constraints, 

484 ).values() 

485 if not filtered_collections: 

486 return None 

487 if timespan is None: 

488 filtered_collections = [ 

489 collection_record 

490 for collection_record in filtered_collections 

491 if collection_record.type is not CollectionType.CALIBRATION 

492 ] 

493 if filtered_collections: 

494 requested_columns = {"dataset_id", "run", "collection"} 

495 with backend.context() as context: 

496 predicate = context.make_data_coordinate_predicate( 

497 dataId.subset(parent_dataset_type.dimensions), full=False 

498 ) 

499 if timespan is not None: 

500 requested_columns.add("timespan") 

501 predicate = predicate.logical_and( 

502 context.make_timespan_overlap_predicate( 

503 DatasetColumnTag(parent_dataset_type.name, "timespan"), timespan 

504 ) 

505 ) 

506 relation = backend.make_dataset_query_relation( 

507 parent_dataset_type, filtered_collections, requested_columns, context 

508 ).with_rows_satisfying(predicate) 

509 rows = list(context.fetch_iterable(relation)) 

510 else: 

511 rows = [] 

512 if not rows: 

513 return None 

514 elif len(rows) == 1: 

515 best_row = rows[0] 

516 else: 

517 rank_by_collection_key = {record.key: n for n, record in enumerate(filtered_collections)} 

518 collection_tag = DatasetColumnTag(parent_dataset_type.name, "collection") 

519 row_iter = iter(rows) 

520 best_row = next(row_iter) 

521 best_rank = rank_by_collection_key[best_row[collection_tag]] 

522 have_tie = False 

523 for row in row_iter: 

524 if (rank := rank_by_collection_key[row[collection_tag]]) < best_rank: 

525 best_row = row 

526 best_rank = rank 

527 have_tie = False 

528 elif rank == best_rank: 

529 have_tie = True 

530 assert timespan is not None, "Rank ties should be impossible given DB constraints." 

531 if have_tie: 

532 raise LookupError( 

533 f"Ambiguous calibration lookup for {parent_dataset_type.name} in collections " 

534 f"{collection_wildcard.strings} with timespan {timespan}." 

535 ) 

536 reader = queries.DatasetRefReader( 

537 parent_dataset_type, 

538 translate_collection=lambda k: self._managers.collections[k].name, 

539 ) 

540 ref = reader.read(best_row, data_id=dataId) 

541 if component is not None: 

542 ref = ref.makeComponentRef(component) 

543 return ref 

544 

545 @transactional 

546 def insertDatasets( 

547 self, 

548 datasetType: DatasetType | str, 

549 dataIds: Iterable[DataId], 

550 run: str | None = None, 

551 expand: bool = True, 

552 idGenerationMode: DatasetIdGenEnum = DatasetIdGenEnum.UNIQUE, 

553 ) -> list[DatasetRef]: 

554 # Docstring inherited from lsst.daf.butler.registry.Registry 

555 if isinstance(datasetType, DatasetType): 

556 storage = self._managers.datasets.find(datasetType.name) 

557 if storage is None: 

558 raise DatasetTypeError(f"DatasetType '{datasetType}' has not been registered.") 

559 else: 

560 storage = self._managers.datasets.find(datasetType) 

561 if storage is None: 

562 raise DatasetTypeError(f"DatasetType with name '{datasetType}' has not been registered.") 

563 if run is None: 

564 if self.defaults.run is None: 

565 raise NoDefaultCollectionError( 

566 "No run provided to insertDatasets, and no default from registry construction." 

567 ) 

568 run = self.defaults.run 

569 runRecord = self._managers.collections.find(run) 

570 if runRecord.type is not CollectionType.RUN: 

571 raise CollectionTypeError( 

572 f"Given collection is of type {runRecord.type.name}; RUN collection required." 

573 ) 

574 assert isinstance(runRecord, RunRecord) 

575 progress = Progress("daf.butler.Registry.insertDatasets", level=logging.DEBUG) 

576 if expand: 

577 expandedDataIds = [ 

578 self.expandDataId(dataId, graph=storage.datasetType.dimensions) 

579 for dataId in progress.wrap(dataIds, f"Expanding {storage.datasetType.name} data IDs") 

580 ] 

581 else: 

582 expandedDataIds = [ 

583 DataCoordinate.standardize(dataId, graph=storage.datasetType.dimensions) for dataId in dataIds 

584 ] 

585 try: 

586 refs = list(storage.insert(runRecord, expandedDataIds, idGenerationMode)) 

587 if self._managers.obscore: 

588 context = queries.SqlQueryContext(self._db, self._managers.column_types) 

589 self._managers.obscore.add_datasets(refs, context) 

590 except sqlalchemy.exc.IntegrityError as err: 

591 raise ConflictingDefinitionError( 

592 "A database constraint failure was triggered by inserting " 

593 f"one or more datasets of type {storage.datasetType} into " 

594 f"collection '{run}'. " 

595 "This probably means a dataset with the same data ID " 

596 "and dataset type already exists, but it may also mean a " 

597 "dimension row is missing." 

598 ) from err 

599 return refs 

600 

601 @transactional 

602 def _importDatasets( 

603 self, 

604 datasets: Iterable[DatasetRef], 

605 expand: bool = True, 

606 ) -> list[DatasetRef]: 

607 # Docstring inherited from lsst.daf.butler.registry.Registry 

608 datasets = list(datasets) 

609 if not datasets: 

610 # nothing to do 

611 return [] 

612 

613 # find dataset type 

614 datasetTypes = {dataset.datasetType for dataset in datasets} 

615 if len(datasetTypes) != 1: 

616 raise DatasetTypeError(f"Multiple dataset types in input datasets: {datasetTypes}") 

617 datasetType = datasetTypes.pop() 

618 

619 # get storage handler for this dataset type 

620 storage = self._managers.datasets.find(datasetType.name) 

621 if storage is None: 

622 raise DatasetTypeError(f"DatasetType '{datasetType}' has not been registered.") 

623 

624 # find run name 

625 runs = {dataset.run for dataset in datasets} 

626 if len(runs) != 1: 

627 raise ValueError(f"Multiple run names in input datasets: {runs}") 

628 run = runs.pop() 

629 

630 runRecord = self._managers.collections.find(run) 

631 if runRecord.type is not CollectionType.RUN: 

632 raise CollectionTypeError( 

633 f"Given collection '{runRecord.name}' is of type {runRecord.type.name};" 

634 " RUN collection required." 

635 ) 

636 assert isinstance(runRecord, RunRecord) 

637 

638 progress = Progress("daf.butler.Registry.insertDatasets", level=logging.DEBUG) 

639 if expand: 

640 expandedDatasets = [ 

641 dataset.expanded(self.expandDataId(dataset.dataId, graph=storage.datasetType.dimensions)) 

642 for dataset in progress.wrap(datasets, f"Expanding {storage.datasetType.name} data IDs") 

643 ] 

644 else: 

645 expandedDatasets = [ 

646 DatasetRef(datasetType, dataset.dataId, id=dataset.id, run=dataset.run, conform=True) 

647 for dataset in datasets 

648 ] 

649 

650 try: 

651 refs = list(storage.import_(runRecord, expandedDatasets)) 

652 if self._managers.obscore: 

653 context = queries.SqlQueryContext(self._db, self._managers.column_types) 

654 self._managers.obscore.add_datasets(refs, context) 

655 except sqlalchemy.exc.IntegrityError as err: 

656 raise ConflictingDefinitionError( 

657 "A database constraint failure was triggered by inserting " 

658 f"one or more datasets of type {storage.datasetType} into " 

659 f"collection '{run}'. " 

660 "This probably means a dataset with the same data ID " 

661 "and dataset type already exists, but it may also mean a " 

662 "dimension row is missing." 

663 ) from err 

664 # Check that imported dataset IDs match the input 

665 for imported_ref, input_ref in zip(refs, datasets, strict=True): 

666 if imported_ref.id != input_ref.id: 

667 raise RegistryConsistencyError( 

668 "Imported dataset ID differs from input dataset ID, " 

669 f"input ref: {input_ref}, imported ref: {imported_ref}" 

670 ) 

671 return refs 

672 

673 def getDataset(self, id: DatasetId) -> DatasetRef | None: 

674 # Docstring inherited from lsst.daf.butler.registry.Registry 

675 return self._managers.datasets.getDatasetRef(id) 

676 

677 @transactional 

678 def removeDatasets(self, refs: Iterable[DatasetRef]) -> None: 

679 # Docstring inherited from lsst.daf.butler.registry.Registry 

680 progress = Progress("lsst.daf.butler.Registry.removeDatasets", level=logging.DEBUG) 

681 for datasetType, refsForType in progress.iter_item_chunks( 

682 DatasetRef.iter_by_type(refs), desc="Removing datasets by type" 

683 ): 

684 storage = self._managers.datasets[datasetType.name] 

685 try: 

686 storage.delete(refsForType) 

687 except sqlalchemy.exc.IntegrityError as err: 

688 raise OrphanedRecordError( 

689 "One or more datasets is still present in one or more Datastores." 

690 ) from err 

691 

692 @transactional 

693 def associate(self, collection: str, refs: Iterable[DatasetRef]) -> None: 

694 # Docstring inherited from lsst.daf.butler.registry.Registry 

695 progress = Progress("lsst.daf.butler.Registry.associate", level=logging.DEBUG) 

696 collectionRecord = self._managers.collections.find(collection) 

697 if collectionRecord.type is not CollectionType.TAGGED: 

698 raise CollectionTypeError( 

699 f"Collection '{collection}' has type {collectionRecord.type.name}, not TAGGED." 

700 ) 

701 for datasetType, refsForType in progress.iter_item_chunks( 

702 DatasetRef.iter_by_type(refs), desc="Associating datasets by type" 

703 ): 

704 storage = self._managers.datasets[datasetType.name] 

705 try: 

706 storage.associate(collectionRecord, refsForType) 

707 if self._managers.obscore: 

708 # If a TAGGED collection is being monitored by ObsCore 

709 # manager then we may need to save the dataset. 

710 context = queries.SqlQueryContext(self._db, self._managers.column_types) 

711 self._managers.obscore.associate(refsForType, collectionRecord, context) 

712 except sqlalchemy.exc.IntegrityError as err: 

713 raise ConflictingDefinitionError( 

714 f"Constraint violation while associating dataset of type {datasetType.name} with " 

715 f"collection {collection}. This probably means that one or more datasets with the same " 

716 "dataset type and data ID already exist in the collection, but it may also indicate " 

717 "that the datasets do not exist." 

718 ) from err 

719 

720 @transactional 

721 def disassociate(self, collection: str, refs: Iterable[DatasetRef]) -> None: 

722 # Docstring inherited from lsst.daf.butler.registry.Registry 

723 progress = Progress("lsst.daf.butler.Registry.disassociate", level=logging.DEBUG) 

724 collectionRecord = self._managers.collections.find(collection) 

725 if collectionRecord.type is not CollectionType.TAGGED: 

726 raise CollectionTypeError( 

727 f"Collection '{collection}' has type {collectionRecord.type.name}; expected TAGGED." 

728 ) 

729 for datasetType, refsForType in progress.iter_item_chunks( 

730 DatasetRef.iter_by_type(refs), desc="Disassociating datasets by type" 

731 ): 

732 storage = self._managers.datasets[datasetType.name] 

733 storage.disassociate(collectionRecord, refsForType) 

734 if self._managers.obscore: 

735 self._managers.obscore.disassociate(refsForType, collectionRecord) 

736 

737 @transactional 

738 def certify(self, collection: str, refs: Iterable[DatasetRef], timespan: Timespan) -> None: 

739 # Docstring inherited from lsst.daf.butler.registry.Registry 

740 progress = Progress("lsst.daf.butler.Registry.certify", level=logging.DEBUG) 

741 collectionRecord = self._managers.collections.find(collection) 

742 for datasetType, refsForType in progress.iter_item_chunks( 

743 DatasetRef.iter_by_type(refs), desc="Certifying datasets by type" 

744 ): 

745 storage = self._managers.datasets[datasetType.name] 

746 storage.certify( 

747 collectionRecord, 

748 refsForType, 

749 timespan, 

750 context=queries.SqlQueryContext(self._db, self._managers.column_types), 

751 ) 

752 

753 @transactional 

754 def decertify( 

755 self, 

756 collection: str, 

757 datasetType: str | DatasetType, 

758 timespan: Timespan, 

759 *, 

760 dataIds: Iterable[DataId] | None = None, 

761 ) -> None: 

762 # Docstring inherited from lsst.daf.butler.registry.Registry 

763 collectionRecord = self._managers.collections.find(collection) 

764 if isinstance(datasetType, str): 

765 storage = self._managers.datasets[datasetType] 

766 else: 

767 storage = self._managers.datasets[datasetType.name] 

768 standardizedDataIds = None 

769 if dataIds is not None: 

770 standardizedDataIds = [ 

771 DataCoordinate.standardize(d, graph=storage.datasetType.dimensions) for d in dataIds 

772 ] 

773 storage.decertify( 

774 collectionRecord, 

775 timespan, 

776 dataIds=standardizedDataIds, 

777 context=queries.SqlQueryContext(self._db, self._managers.column_types), 

778 ) 

779 

780 def getDatastoreBridgeManager(self) -> DatastoreRegistryBridgeManager: 

781 """Return an object that allows a new `Datastore` instance to 

782 communicate with this `Registry`. 

783 

784 Returns 

785 ------- 

786 manager : `DatastoreRegistryBridgeManager` 

787 Object that mediates communication between this `Registry` and its 

788 associated datastores. 

789 """ 

790 return self._managers.datastores 

791 

792 def getDatasetLocations(self, ref: DatasetRef) -> Iterable[str]: 

793 # Docstring inherited from lsst.daf.butler.registry.Registry 

794 return self._managers.datastores.findDatastores(ref) 

795 

796 def expandDataId( 

797 self, 

798 dataId: DataId | None = None, 

799 *, 

800 graph: DimensionGraph | None = None, 

801 records: NameLookupMapping[DimensionElement, DimensionRecord | None] | None = None, 

802 withDefaults: bool = True, 

803 **kwargs: Any, 

804 ) -> DataCoordinate: 

805 # Docstring inherited from lsst.daf.butler.registry.Registry 

806 if not withDefaults: 

807 defaults = None 

808 else: 

809 defaults = self.defaults.dataId 

810 try: 

811 standardized = DataCoordinate.standardize( 

812 dataId, graph=graph, universe=self.dimensions, defaults=defaults, **kwargs 

813 ) 

814 except KeyError as exc: 

815 # This means either kwargs have some odd name or required 

816 # dimension is missing. 

817 raise DimensionNameError(str(exc)) from exc 

818 if standardized.hasRecords(): 

819 return standardized 

820 if records is None: 

821 records = {} 

822 elif isinstance(records, NamedKeyMapping): 

823 records = records.byName() 

824 else: 

825 records = dict(records) 

826 if isinstance(dataId, DataCoordinate) and dataId.hasRecords(): 

827 records.update(dataId.records.byName()) 

828 keys = standardized.byName() 

829 context = queries.SqlQueryContext(self._db, self._managers.column_types) 

830 for element in standardized.graph.primaryKeyTraversalOrder: 

831 record = records.get(element.name, ...) # Use ... to mean not found; None might mean NULL 

832 if record is ...: 

833 if isinstance(element, Dimension) and keys.get(element.name) is None: 

834 if element in standardized.graph.required: 

835 raise DimensionNameError( 

836 f"No value or null value for required dimension {element.name}." 

837 ) 

838 keys[element.name] = None 

839 record = None 

840 else: 

841 storage = self._managers.dimensions[element] 

842 record = storage.fetch_one(DataCoordinate.standardize(keys, graph=element.graph), context) 

843 records[element.name] = record 

844 if record is not None: 

845 for d in element.implied: 

846 value = getattr(record, d.name) 

847 if keys.setdefault(d.name, value) != value: 

848 raise InconsistentDataIdError( 

849 f"Data ID {standardized} has {d.name}={keys[d.name]!r}, " 

850 f"but {element.name} implies {d.name}={value!r}." 

851 ) 

852 else: 

853 if element in standardized.graph.required: 

854 raise DataIdValueError( 

855 f"Could not fetch record for required dimension {element.name} via keys {keys}." 

856 ) 

857 if element.alwaysJoin: 

858 raise InconsistentDataIdError( 

859 f"Could not fetch record for element {element.name} via keys {keys}, ", 

860 "but it is marked alwaysJoin=True; this means one or more dimensions are not " 

861 "related.", 

862 ) 

863 for d in element.implied: 

864 keys.setdefault(d.name, None) 

865 records.setdefault(d.name, None) 

866 return DataCoordinate.standardize(keys, graph=standardized.graph).expanded(records=records) 

867 

868 def insertDimensionData( 

869 self, 

870 element: DimensionElement | str, 

871 *data: Mapping[str, Any] | DimensionRecord, 

872 conform: bool = True, 

873 replace: bool = False, 

874 skip_existing: bool = False, 

875 ) -> None: 

876 # Docstring inherited from lsst.daf.butler.registry.Registry 

877 if conform: 

878 if isinstance(element, str): 

879 element = self.dimensions[element] 

880 records = [ 

881 row if isinstance(row, DimensionRecord) else element.RecordClass(**row) for row in data 

882 ] 

883 else: 

884 # Ignore typing since caller said to trust them with conform=False. 

885 records = data # type: ignore 

886 storage = self._managers.dimensions[element] 

887 storage.insert(*records, replace=replace, skip_existing=skip_existing) 

888 

889 def syncDimensionData( 

890 self, 

891 element: DimensionElement | str, 

892 row: Mapping[str, Any] | DimensionRecord, 

893 conform: bool = True, 

894 update: bool = False, 

895 ) -> bool | dict[str, Any]: 

896 # Docstring inherited from lsst.daf.butler.registry.Registry 

897 if conform: 

898 if isinstance(element, str): 

899 element = self.dimensions[element] 

900 record = row if isinstance(row, DimensionRecord) else element.RecordClass(**row) 

901 else: 

902 # Ignore typing since caller said to trust them with conform=False. 

903 record = row # type: ignore 

904 storage = self._managers.dimensions[element] 

905 return storage.sync(record, update=update) 

906 

907 def queryDatasetTypes( 

908 self, 

909 expression: Any = ..., 

910 *, 

911 components: bool | None = None, 

912 missing: list[str] | None = None, 

913 ) -> Iterable[DatasetType]: 

914 # Docstring inherited from lsst.daf.butler.registry.Registry 

915 wildcard = DatasetTypeWildcard.from_expression(expression) 

916 composition_dict = self._managers.datasets.resolve_wildcard( 

917 wildcard, 

918 components=components, 

919 missing=missing, 

920 ) 

921 result: list[DatasetType] = [] 

922 for parent_dataset_type, components_for_parent in composition_dict.items(): 

923 result.extend( 

924 parent_dataset_type.makeComponentDatasetType(c) if c is not None else parent_dataset_type 

925 for c in components_for_parent 

926 ) 

927 return result 

928 

929 def queryCollections( 

930 self, 

931 expression: Any = ..., 

932 datasetType: DatasetType | None = None, 

933 collectionTypes: Iterable[CollectionType] | CollectionType = CollectionType.all(), 

934 flattenChains: bool = False, 

935 includeChains: bool | None = None, 

936 ) -> Sequence[str]: 

937 # Docstring inherited from lsst.daf.butler.registry.Registry 

938 

939 # Right now the datasetTypes argument is completely ignored, but that 

940 # is consistent with its [lack of] guarantees. DM-24939 or a follow-up 

941 # ticket will take care of that. 

942 try: 

943 wildcard = CollectionWildcard.from_expression(expression) 

944 except TypeError as exc: 

945 raise CollectionExpressionError(f"Invalid collection expression '{expression}'") from exc 

946 collectionTypes = ensure_iterable(collectionTypes) 

947 return [ 

948 record.name 

949 for record in self._managers.collections.resolve_wildcard( 

950 wildcard, 

951 collection_types=frozenset(collectionTypes), 

952 flatten_chains=flattenChains, 

953 include_chains=includeChains, 

954 ) 

955 ] 

956 

957 def _makeQueryBuilder( 

958 self, 

959 summary: queries.QuerySummary, 

960 doomed_by: Iterable[str] = (), 

961 ) -> queries.QueryBuilder: 

962 """Return a `QueryBuilder` instance capable of constructing and 

963 managing more complex queries than those obtainable via `Registry` 

964 interfaces. 

965 

966 This is an advanced interface; downstream code should prefer 

967 `Registry.queryDataIds` and `Registry.queryDatasets` whenever those 

968 are sufficient. 

969 

970 Parameters 

971 ---------- 

972 summary : `queries.QuerySummary` 

973 Object describing and categorizing the full set of dimensions that 

974 will be included in the query. 

975 doomed_by : `~collections.abc.Iterable` of `str`, optional 

976 A list of diagnostic messages that indicate why the query is going 

977 to yield no results and should not even be executed. If an empty 

978 container (default) the query will be executed unless other code 

979 determines that it is doomed. 

980 

981 Returns 

982 ------- 

983 builder : `queries.QueryBuilder` 

984 Object that can be used to construct and perform advanced queries. 

985 """ 

986 doomed_by = list(doomed_by) 

987 backend = queries.SqlQueryBackend(self._db, self._managers) 

988 context = backend.context() 

989 relation: Relation | None = None 

990 if doomed_by: 

991 relation = LeafRelation.make_doomed(context.sql_engine, set(), doomed_by) 

992 return queries.QueryBuilder( 

993 summary, 

994 backend=backend, 

995 context=context, 

996 relation=relation, 

997 ) 

998 

999 def _standardize_query_data_id_args( 

1000 self, data_id: DataId | None, *, doomed_by: list[str], **kwargs: Any 

1001 ) -> DataCoordinate: 

1002 """Preprocess the data ID arguments passed to query* methods. 

1003 

1004 Parameters 

1005 ---------- 

1006 data_id : `DataId` or `None` 

1007 Data ID that constrains the query results. 

1008 doomed_by : `list` [ `str` ] 

1009 List to append messages indicating why the query is doomed to 

1010 yield no results. 

1011 **kwargs 

1012 Additional data ID key-value pairs, extending and overriding 

1013 ``data_id``. 

1014 

1015 Returns 

1016 ------- 

1017 data_id : `DataCoordinate` 

1018 Standardized data ID. Will be fully expanded unless expansion 

1019 fails, in which case a message will be appended to ``doomed_by`` 

1020 on return. 

1021 """ 

1022 try: 

1023 return self.expandDataId(data_id, **kwargs) 

1024 except DataIdValueError as err: 

1025 doomed_by.append(str(err)) 

1026 return DataCoordinate.standardize( 

1027 data_id, **kwargs, universe=self.dimensions, defaults=self.defaults.dataId 

1028 ) 

1029 

1030 def _standardize_query_dataset_args( 

1031 self, 

1032 datasets: Any, 

1033 collections: CollectionArgType | None, 

1034 components: bool | None, 

1035 mode: Literal["find_first"] | Literal["find_all"] | Literal["constrain"] = "constrain", 

1036 *, 

1037 doomed_by: list[str], 

1038 ) -> tuple[dict[DatasetType, list[str | None]], CollectionWildcard | None]: 

1039 """Preprocess dataset arguments passed to query* methods. 

1040 

1041 Parameters 

1042 ---------- 

1043 datasets : `DatasetType`, `str`, `re.Pattern`, or iterable of these 

1044 Expression identifying dataset types. See `queryDatasetTypes` for 

1045 details. 

1046 collections : `str`, `re.Pattern`, or iterable of these 

1047 Expression identifying collections to be searched. See 

1048 `queryCollections` for details. 

1049 components : `bool`, optional 

1050 If `True`, apply all expression patterns to component dataset type 

1051 names as well. If `False`, never apply patterns to components. 

1052 If `None` (default), apply patterns to components only if their 

1053 parent datasets were not matched by the expression. 

1054 Fully-specified component datasets (`str` or `DatasetType` 

1055 instances) are always included. 

1056 

1057 Values other than `False` are deprecated, and only `False` will be 

1058 supported after v26. After v27 this argument will be removed 

1059 entirely. 

1060 mode : `str`, optional 

1061 The way in which datasets are being used in this query; one of: 

1062 

1063 - "find_first": this is a query for the first dataset in an 

1064 ordered list of collections. Prohibits collection wildcards, 

1065 but permits dataset type wildcards. 

1066 

1067 - "find_all": this is a query for all datasets in all matched 

1068 collections. Permits collection and dataset type wildcards. 

1069 

1070 - "constrain": this is a query for something other than datasets, 

1071 with results constrained by dataset existence. Permits 

1072 collection wildcards and prohibits ``...`` as a dataset type 

1073 wildcard. 

1074 doomed_by : `list` [ `str` ] 

1075 List to append messages indicating why the query is doomed to 

1076 yield no results. 

1077 

1078 Returns 

1079 ------- 

1080 composition : `defaultdict` [ `DatasetType`, `list` [ `str` ] ] 

1081 Dictionary mapping parent dataset type to `list` of components 

1082 matched for that dataset type (or `None` for the parent itself). 

1083 collections : `CollectionWildcard` 

1084 Processed collection expression. 

1085 """ 

1086 composition: dict[DatasetType, list[str | None]] = {} 

1087 collection_wildcard: CollectionWildcard | None = None 

1088 if datasets is not None: 

1089 if collections is None: 

1090 if not self.defaults.collections: 

1091 raise NoDefaultCollectionError("No collections, and no registry default collections.") 

1092 collection_wildcard = CollectionWildcard.from_expression(self.defaults.collections) 

1093 else: 

1094 collection_wildcard = CollectionWildcard.from_expression(collections) 

1095 if mode == "find_first" and collection_wildcard.patterns: 

1096 raise TypeError( 

1097 f"Collection pattern(s) {collection_wildcard.patterns} not allowed in this context." 

1098 ) 

1099 missing: list[str] = [] 

1100 composition = self._managers.datasets.resolve_wildcard( 

1101 datasets, components=components, missing=missing, explicit_only=(mode == "constrain") 

1102 ) 

1103 if missing and mode == "constrain": 

1104 # After v26 this should raise MissingDatasetTypeError, to be 

1105 # implemented on DM-36303. 

1106 warnings.warn( 

1107 f"Dataset type(s) {missing} are not registered; this will be an error after v26.", 

1108 FutureWarning, 

1109 stacklevel=find_outside_stacklevel("lsst.daf.butler"), 

1110 ) 

1111 doomed_by.extend(f"Dataset type {name} is not registered." for name in missing) 

1112 elif collections: 

1113 # I think this check should actually be `collections is not None`, 

1114 # but it looks like some CLI scripts use empty tuple as default. 

1115 raise ArgumentError(f"Cannot pass 'collections' (='{collections}') without 'datasets'.") 

1116 return composition, collection_wildcard 

1117 

1118 def queryDatasets( 

1119 self, 

1120 datasetType: Any, 

1121 *, 

1122 collections: CollectionArgType | None = None, 

1123 dimensions: Iterable[Dimension | str] | None = None, 

1124 dataId: DataId | None = None, 

1125 where: str = "", 

1126 findFirst: bool = False, 

1127 components: bool | None = None, 

1128 bind: Mapping[str, Any] | None = None, 

1129 check: bool = True, 

1130 **kwargs: Any, 

1131 ) -> queries.DatasetQueryResults: 

1132 # Docstring inherited from lsst.daf.butler.registry.Registry 

1133 doomed_by: list[str] = [] 

1134 data_id = self._standardize_query_data_id_args(dataId, doomed_by=doomed_by, **kwargs) 

1135 dataset_composition, collection_wildcard = self._standardize_query_dataset_args( 

1136 datasetType, 

1137 collections, 

1138 components, 

1139 mode="find_first" if findFirst else "find_all", 

1140 doomed_by=doomed_by, 

1141 ) 

1142 if collection_wildcard is not None and collection_wildcard.empty(): 

1143 doomed_by.append("No datasets can be found because collection list is empty.") 

1144 return queries.ChainedDatasetQueryResults([], doomed_by=doomed_by) 

1145 parent_results: list[queries.ParentDatasetQueryResults] = [] 

1146 for parent_dataset_type, components_for_parent in dataset_composition.items(): 

1147 # The full set of dimensions in the query is the combination of 

1148 # those needed for the DatasetType and those explicitly requested, 

1149 # if any. 

1150 dimension_names = set(parent_dataset_type.dimensions.names) 

1151 if dimensions is not None: 

1152 dimension_names.update(self.dimensions.extract(dimensions).names) 

1153 # Construct the summary structure needed to construct a 

1154 # QueryBuilder. 

1155 summary = queries.QuerySummary( 

1156 requested=DimensionGraph(self.dimensions, names=dimension_names), 

1157 column_types=self._managers.column_types, 

1158 data_id=data_id, 

1159 expression=where, 

1160 bind=bind, 

1161 defaults=self.defaults.dataId, 

1162 check=check, 

1163 datasets=[parent_dataset_type], 

1164 ) 

1165 builder = self._makeQueryBuilder(summary) 

1166 # Add the dataset subquery to the query, telling the QueryBuilder 

1167 # to include the rank of the selected collection in the results 

1168 # only if we need to findFirst. Note that if any of the 

1169 # collections are actually wildcard expressions, and 

1170 # findFirst=True, this will raise TypeError for us. 

1171 builder.joinDataset(parent_dataset_type, collection_wildcard, isResult=True, findFirst=findFirst) 

1172 query = builder.finish() 

1173 parent_results.append( 

1174 queries.ParentDatasetQueryResults( 

1175 query, parent_dataset_type, components=components_for_parent 

1176 ) 

1177 ) 

1178 if not parent_results: 

1179 doomed_by.extend( 

1180 f"No registered dataset type matching {t!r} found, so no matching datasets can " 

1181 "exist in any collection." 

1182 for t in ensure_iterable(datasetType) 

1183 ) 

1184 return queries.ChainedDatasetQueryResults([], doomed_by=doomed_by) 

1185 elif len(parent_results) == 1: 

1186 return parent_results[0] 

1187 else: 

1188 return queries.ChainedDatasetQueryResults(parent_results) 

1189 

1190 def queryDataIds( 

1191 self, 

1192 dimensions: Iterable[Dimension | str] | Dimension | str, 

1193 *, 

1194 dataId: DataId | None = None, 

1195 datasets: Any = None, 

1196 collections: CollectionArgType | None = None, 

1197 where: str = "", 

1198 components: bool | None = None, 

1199 bind: Mapping[str, Any] | None = None, 

1200 check: bool = True, 

1201 **kwargs: Any, 

1202 ) -> queries.DataCoordinateQueryResults: 

1203 # Docstring inherited from lsst.daf.butler.registry.Registry 

1204 dimensions = ensure_iterable(dimensions) 

1205 requestedDimensions = self.dimensions.extract(dimensions) 

1206 doomed_by: list[str] = [] 

1207 data_id = self._standardize_query_data_id_args(dataId, doomed_by=doomed_by, **kwargs) 

1208 dataset_composition, collection_wildcard = self._standardize_query_dataset_args( 

1209 datasets, collections, components, doomed_by=doomed_by 

1210 ) 

1211 if collection_wildcard is not None and collection_wildcard.empty(): 

1212 doomed_by.append("No data coordinates can be found because collection list is empty.") 

1213 summary = queries.QuerySummary( 

1214 requested=requestedDimensions, 

1215 column_types=self._managers.column_types, 

1216 data_id=data_id, 

1217 expression=where, 

1218 bind=bind, 

1219 defaults=self.defaults.dataId, 

1220 check=check, 

1221 datasets=dataset_composition.keys(), 

1222 ) 

1223 builder = self._makeQueryBuilder(summary, doomed_by=doomed_by) 

1224 for datasetType in dataset_composition: 

1225 builder.joinDataset(datasetType, collection_wildcard, isResult=False) 

1226 query = builder.finish() 

1227 

1228 return queries.DataCoordinateQueryResults(query) 

1229 

1230 def queryDimensionRecords( 

1231 self, 

1232 element: DimensionElement | str, 

1233 *, 

1234 dataId: DataId | None = None, 

1235 datasets: Any = None, 

1236 collections: CollectionArgType | None = None, 

1237 where: str = "", 

1238 components: bool | None = None, 

1239 bind: Mapping[str, Any] | None = None, 

1240 check: bool = True, 

1241 **kwargs: Any, 

1242 ) -> queries.DimensionRecordQueryResults: 

1243 # Docstring inherited from lsst.daf.butler.registry.Registry 

1244 if not isinstance(element, DimensionElement): 

1245 try: 

1246 element = self.dimensions[element] 

1247 except KeyError as e: 

1248 raise DimensionNameError( 

1249 f"No such dimension '{element}', available dimensions: " 

1250 + str(self.dimensions.getStaticElements()) 

1251 ) from e 

1252 doomed_by: list[str] = [] 

1253 data_id = self._standardize_query_data_id_args(dataId, doomed_by=doomed_by, **kwargs) 

1254 dataset_composition, collection_wildcard = self._standardize_query_dataset_args( 

1255 datasets, collections, components, doomed_by=doomed_by 

1256 ) 

1257 if collection_wildcard is not None and collection_wildcard.empty(): 

1258 doomed_by.append("No dimension records can be found because collection list is empty.") 

1259 summary = queries.QuerySummary( 

1260 requested=element.graph, 

1261 column_types=self._managers.column_types, 

1262 data_id=data_id, 

1263 expression=where, 

1264 bind=bind, 

1265 defaults=self.defaults.dataId, 

1266 check=check, 

1267 datasets=dataset_composition.keys(), 

1268 ) 

1269 builder = self._makeQueryBuilder(summary, doomed_by=doomed_by) 

1270 for datasetType in dataset_composition: 

1271 builder.joinDataset(datasetType, collection_wildcard, isResult=False) 

1272 query = builder.finish().with_record_columns(element) 

1273 return queries.DatabaseDimensionRecordQueryResults(query, element) 

1274 

1275 def queryDatasetAssociations( 

1276 self, 

1277 datasetType: str | DatasetType, 

1278 collections: CollectionArgType | None = ..., 

1279 *, 

1280 collectionTypes: Iterable[CollectionType] = CollectionType.all(), 

1281 flattenChains: bool = False, 

1282 ) -> Iterator[DatasetAssociation]: 

1283 # Docstring inherited from lsst.daf.butler.registry.Registry 

1284 if collections is None: 

1285 if not self.defaults.collections: 

1286 raise NoDefaultCollectionError( 

1287 "No collections provided to queryDatasetAssociations, " 

1288 "and no defaults from registry construction." 

1289 ) 

1290 collections = self.defaults.collections 

1291 collection_wildcard = CollectionWildcard.from_expression(collections) 

1292 backend = queries.SqlQueryBackend(self._db, self._managers) 

1293 parent_dataset_type, _ = backend.resolve_single_dataset_type_wildcard(datasetType, components=False) 

1294 timespan_tag = DatasetColumnTag(parent_dataset_type.name, "timespan") 

1295 collection_tag = DatasetColumnTag(parent_dataset_type.name, "collection") 

1296 for parent_collection_record in backend.resolve_collection_wildcard( 

1297 collection_wildcard, 

1298 collection_types=frozenset(collectionTypes), 

1299 flatten_chains=flattenChains, 

1300 ): 

1301 # Resolve this possibly-chained collection into a list of 

1302 # non-CHAINED collections that actually hold datasets of this 

1303 # type. 

1304 candidate_collection_records = backend.resolve_dataset_collections( 

1305 parent_dataset_type, 

1306 CollectionWildcard.from_names([parent_collection_record.name]), 

1307 allow_calibration_collections=True, 

1308 governor_constraints={}, 

1309 ) 

1310 if not candidate_collection_records: 

1311 continue 

1312 with backend.context() as context: 

1313 relation = backend.make_dataset_query_relation( 

1314 parent_dataset_type, 

1315 candidate_collection_records, 

1316 columns={"dataset_id", "run", "timespan", "collection"}, 

1317 context=context, 

1318 ) 

1319 reader = queries.DatasetRefReader( 

1320 parent_dataset_type, 

1321 translate_collection=lambda k: self._managers.collections[k].name, 

1322 full=False, 

1323 ) 

1324 for row in context.fetch_iterable(relation): 

1325 ref = reader.read(row) 

1326 collection_record = self._managers.collections[row[collection_tag]] 

1327 if collection_record.type is CollectionType.CALIBRATION: 

1328 timespan = row[timespan_tag] 

1329 else: 

1330 # For backwards compatibility and (possibly?) user 

1331 # convenience we continue to define the timespan of a 

1332 # DatasetAssociation row for a non-CALIBRATION 

1333 # collection to be None rather than a fully unbounded 

1334 # timespan. 

1335 timespan = None 

1336 yield DatasetAssociation(ref=ref, collection=collection_record.name, timespan=timespan) 

1337 

1338 @property 

1339 def obsCoreTableManager(self) -> ObsCoreTableManager | None: 

1340 # Docstring inherited from lsst.daf.butler.registry.Registry 

1341 return self._managers.obscore 

1342 

1343 storageClasses: StorageClassFactory 

1344 """All storage classes known to the registry (`StorageClassFactory`). 

1345 """