Coverage for python/lsst/daf/butler/registries/sql.py: 12%

506 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2023-01-07 02:05 -0800

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ("SqlRegistry",) 

25 

26import contextlib 

27import logging 

28import warnings 

29from typing import ( 

30 TYPE_CHECKING, 

31 Any, 

32 Dict, 

33 Iterable, 

34 Iterator, 

35 List, 

36 Literal, 

37 Mapping, 

38 Optional, 

39 Sequence, 

40 Set, 

41 Union, 

42 cast, 

43) 

44 

45import sqlalchemy 

46from lsst.daf.relation import LeafRelation, Relation 

47from lsst.resources import ResourcePathExpression 

48from lsst.utils.iteration import ensure_iterable 

49 

50from ..core import ( 

51 Config, 

52 DataCoordinate, 

53 DataId, 

54 DatasetAssociation, 

55 DatasetColumnTag, 

56 DatasetId, 

57 DatasetRef, 

58 DatasetType, 

59 Dimension, 

60 DimensionConfig, 

61 DimensionElement, 

62 DimensionGraph, 

63 DimensionRecord, 

64 DimensionUniverse, 

65 NamedKeyMapping, 

66 NameLookupMapping, 

67 Progress, 

68 StorageClassFactory, 

69 Timespan, 

70 ddl, 

71) 

72from ..core.utils import transactional 

73from ..registry import ( 

74 ArgumentError, 

75 CollectionExpressionError, 

76 CollectionSummary, 

77 CollectionType, 

78 CollectionTypeError, 

79 ConflictingDefinitionError, 

80 DataIdValueError, 

81 DatasetTypeError, 

82 DimensionNameError, 

83 InconsistentDataIdError, 

84 NoDefaultCollectionError, 

85 OrphanedRecordError, 

86 Registry, 

87 RegistryConfig, 

88 RegistryDefaults, 

89 queries, 

90) 

91from ..registry.interfaces import ChainedCollectionRecord, DatasetIdFactory, DatasetIdGenEnum, RunRecord 

92from ..registry.managers import RegistryManagerInstances, RegistryManagerTypes 

93from ..registry.wildcards import CollectionWildcard, DatasetTypeWildcard 

94 

95if TYPE_CHECKING: 95 ↛ 96line 95 didn't jump to line 96, because the condition on line 95 was never true

96 from .._butlerConfig import ButlerConfig 

97 from ..registry.interfaces import CollectionRecord, Database, DatastoreRegistryBridgeManager 

98 

99 

100_LOG = logging.getLogger(__name__) 

101 

102 

103class SqlRegistry(Registry): 

104 """Registry implementation based on SQLAlchemy. 

105 

106 Parameters 

107 ---------- 

108 database : `Database` 

109 Database instance to store Registry. 

110 defaults : `RegistryDefaults` 

111 Default collection search path and/or output `~CollectionType.RUN` 

112 collection. 

113 managers : `RegistryManagerInstances` 

114 All the managers required for this registry. 

115 """ 

116 

117 defaultConfigFile: Optional[str] = None 

118 """Path to configuration defaults. Accessed within the ``configs`` resource 

119 or relative to a search path. Can be None if no defaults specified. 

120 """ 

121 

122 @classmethod 

123 def createFromConfig( 

124 cls, 

125 config: Optional[Union[RegistryConfig, str]] = None, 

126 dimensionConfig: Optional[Union[DimensionConfig, str]] = None, 

127 butlerRoot: Optional[ResourcePathExpression] = None, 

128 ) -> Registry: 

129 """Create registry database and return `SqlRegistry` instance. 

130 

131 This method initializes database contents, database must be empty 

132 prior to calling this method. 

133 

134 Parameters 

135 ---------- 

136 config : `RegistryConfig` or `str`, optional 

137 Registry configuration, if missing then default configuration will 

138 be loaded from registry.yaml. 

139 dimensionConfig : `DimensionConfig` or `str`, optional 

140 Dimensions configuration, if missing then default configuration 

141 will be loaded from dimensions.yaml. 

142 butlerRoot : convertible to `lsst.resources.ResourcePath`, optional 

143 Path to the repository root this `SqlRegistry` will manage. 

144 

145 Returns 

146 ------- 

147 registry : `SqlRegistry` 

148 A new `SqlRegistry` instance. 

149 """ 

150 config = cls.forceRegistryConfig(config) 

151 config.replaceRoot(butlerRoot) 

152 

153 if isinstance(dimensionConfig, str): 

154 dimensionConfig = DimensionConfig(dimensionConfig) 

155 elif dimensionConfig is None: 

156 dimensionConfig = DimensionConfig() 

157 elif not isinstance(dimensionConfig, DimensionConfig): 

158 raise TypeError(f"Incompatible Dimension configuration type: {type(dimensionConfig)}") 

159 

160 DatabaseClass = config.getDatabaseClass() 

161 database = DatabaseClass.fromUri( 

162 str(config.connectionString), origin=config.get("origin", 0), namespace=config.get("namespace") 

163 ) 

164 managerTypes = RegistryManagerTypes.fromConfig(config) 

165 managers = managerTypes.makeRepo(database, dimensionConfig) 

166 return cls(database, RegistryDefaults(), managers) 

167 

168 @classmethod 

169 def fromConfig( 

170 cls, 

171 config: Union[ButlerConfig, RegistryConfig, Config, str], 

172 butlerRoot: Optional[ResourcePathExpression] = None, 

173 writeable: bool = True, 

174 defaults: Optional[RegistryDefaults] = None, 

175 ) -> Registry: 

176 """Create `Registry` subclass instance from `config`. 

177 

178 Registry database must be initialized prior to calling this method. 

179 

180 Parameters 

181 ---------- 

182 config : `ButlerConfig`, `RegistryConfig`, `Config` or `str` 

183 Registry configuration 

184 butlerRoot : `lsst.resources.ResourcePathExpression`, optional 

185 Path to the repository root this `Registry` will manage. 

186 writeable : `bool`, optional 

187 If `True` (default) create a read-write connection to the database. 

188 defaults : `RegistryDefaults`, optional 

189 Default collection search path and/or output `~CollectionType.RUN` 

190 collection. 

191 

192 Returns 

193 ------- 

194 registry : `SqlRegistry` (subclass) 

195 A new `SqlRegistry` subclass instance. 

196 """ 

197 config = cls.forceRegistryConfig(config) 

198 config.replaceRoot(butlerRoot) 

199 DatabaseClass = config.getDatabaseClass() 

200 database = DatabaseClass.fromUri( 

201 str(config.connectionString), 

202 origin=config.get("origin", 0), 

203 namespace=config.get("namespace"), 

204 writeable=writeable, 

205 ) 

206 managerTypes = RegistryManagerTypes.fromConfig(config) 

207 with database.session(): 

208 managers = managerTypes.loadRepo(database) 

209 if defaults is None: 

210 defaults = RegistryDefaults() 

211 return cls(database, defaults, managers) 

212 

213 def __init__(self, database: Database, defaults: RegistryDefaults, managers: RegistryManagerInstances): 

214 self._db = database 

215 self._managers = managers 

216 self.storageClasses = StorageClassFactory() 

217 # Intentionally invoke property setter to initialize defaults. This 

218 # can only be done after most of the rest of Registry has already been 

219 # initialized, and must be done before the property getter is used. 

220 self.defaults = defaults 

221 # In the future DatasetIdFactory may become configurable and this 

222 # instance will need to be shared with datasets manager. 

223 self.datasetIdFactory = DatasetIdFactory() 

224 

225 def __str__(self) -> str: 

226 return str(self._db) 

227 

228 def __repr__(self) -> str: 

229 return f"SqlRegistry({self._db!r}, {self.dimensions!r})" 

230 

231 def isWriteable(self) -> bool: 

232 # Docstring inherited from lsst.daf.butler.registry.Registry 

233 return self._db.isWriteable() 

234 

235 def copy(self, defaults: Optional[RegistryDefaults] = None) -> Registry: 

236 # Docstring inherited from lsst.daf.butler.registry.Registry 

237 if defaults is None: 

238 # No need to copy, because `RegistryDefaults` is immutable; we 

239 # effectively copy on write. 

240 defaults = self.defaults 

241 return type(self)(self._db, defaults, self._managers) 

242 

243 @property 

244 def dimensions(self) -> DimensionUniverse: 

245 # Docstring inherited from lsst.daf.butler.registry.Registry 

246 return self._managers.dimensions.universe 

247 

248 def refresh(self) -> None: 

249 # Docstring inherited from lsst.daf.butler.registry.Registry 

250 with self._db.transaction(): 

251 self._managers.refresh() 

252 

253 @contextlib.contextmanager 

254 def transaction(self, *, savepoint: bool = False) -> Iterator[None]: 

255 # Docstring inherited from lsst.daf.butler.registry.Registry 

256 try: 

257 with self._db.transaction(savepoint=savepoint): 

258 yield 

259 except BaseException: 

260 # TODO: this clears the caches sometimes when we wouldn't actually 

261 # need to. Can we avoid that? 

262 self._managers.dimensions.clearCaches() 

263 raise 

264 

265 def resetConnectionPool(self) -> None: 

266 """Reset SQLAlchemy connection pool for `SqlRegistry` database. 

267 

268 This operation is useful when using registry with fork-based 

269 multiprocessing. To use registry across fork boundary one has to make 

270 sure that there are no currently active connections (no session or 

271 transaction is in progress) and connection pool is reset using this 

272 method. This method should be called by the child process immediately 

273 after the fork. 

274 """ 

275 self._db._engine.dispose() 

276 

277 def registerOpaqueTable(self, tableName: str, spec: ddl.TableSpec) -> None: 

278 """Add an opaque (to the `Registry`) table for use by a `Datastore` or 

279 other data repository client. 

280 

281 Opaque table records can be added via `insertOpaqueData`, retrieved via 

282 `fetchOpaqueData`, and removed via `deleteOpaqueData`. 

283 

284 Parameters 

285 ---------- 

286 tableName : `str` 

287 Logical name of the opaque table. This may differ from the 

288 actual name used in the database by a prefix and/or suffix. 

289 spec : `ddl.TableSpec` 

290 Specification for the table to be added. 

291 """ 

292 self._managers.opaque.register(tableName, spec) 

293 

294 @transactional 

295 def insertOpaqueData(self, tableName: str, *data: dict) -> None: 

296 """Insert records into an opaque table. 

297 

298 Parameters 

299 ---------- 

300 tableName : `str` 

301 Logical name of the opaque table. Must match the name used in a 

302 previous call to `registerOpaqueTable`. 

303 data 

304 Each additional positional argument is a dictionary that represents 

305 a single row to be added. 

306 """ 

307 self._managers.opaque[tableName].insert(*data) 

308 

309 def fetchOpaqueData(self, tableName: str, **where: Any) -> Iterator[dict]: 

310 """Retrieve records from an opaque table. 

311 

312 Parameters 

313 ---------- 

314 tableName : `str` 

315 Logical name of the opaque table. Must match the name used in a 

316 previous call to `registerOpaqueTable`. 

317 where 

318 Additional keyword arguments are interpreted as equality 

319 constraints that restrict the returned rows (combined with AND); 

320 keyword arguments are column names and values are the values they 

321 must have. 

322 

323 Yields 

324 ------ 

325 row : `dict` 

326 A dictionary representing a single result row. 

327 """ 

328 yield from self._managers.opaque[tableName].fetch(**where) 

329 

330 @transactional 

331 def deleteOpaqueData(self, tableName: str, **where: Any) -> None: 

332 """Remove records from an opaque table. 

333 

334 Parameters 

335 ---------- 

336 tableName : `str` 

337 Logical name of the opaque table. Must match the name used in a 

338 previous call to `registerOpaqueTable`. 

339 where 

340 Additional keyword arguments are interpreted as equality 

341 constraints that restrict the deleted rows (combined with AND); 

342 keyword arguments are column names and values are the values they 

343 must have. 

344 """ 

345 self._managers.opaque[tableName].delete(where.keys(), where) 

346 

347 def registerCollection( 

348 self, name: str, type: CollectionType = CollectionType.TAGGED, doc: Optional[str] = None 

349 ) -> bool: 

350 # Docstring inherited from lsst.daf.butler.registry.Registry 

351 _, registered = self._managers.collections.register(name, type, doc=doc) 

352 return registered 

353 

354 def getCollectionType(self, name: str) -> CollectionType: 

355 # Docstring inherited from lsst.daf.butler.registry.Registry 

356 return self._managers.collections.find(name).type 

357 

358 def _get_collection_record(self, name: str) -> CollectionRecord: 

359 # Docstring inherited from lsst.daf.butler.registry.Registry 

360 return self._managers.collections.find(name) 

361 

362 def registerRun(self, name: str, doc: Optional[str] = None) -> bool: 

363 # Docstring inherited from lsst.daf.butler.registry.Registry 

364 _, registered = self._managers.collections.register(name, CollectionType.RUN, doc=doc) 

365 return registered 

366 

367 @transactional 

368 def removeCollection(self, name: str) -> None: 

369 # Docstring inherited from lsst.daf.butler.registry.Registry 

370 self._managers.collections.remove(name) 

371 

372 def getCollectionChain(self, parent: str) -> tuple[str, ...]: 

373 # Docstring inherited from lsst.daf.butler.registry.Registry 

374 record = self._managers.collections.find(parent) 

375 if record.type is not CollectionType.CHAINED: 

376 raise CollectionTypeError(f"Collection '{parent}' has type {record.type.name}, not CHAINED.") 

377 assert isinstance(record, ChainedCollectionRecord) 

378 return record.children 

379 

380 @transactional 

381 def setCollectionChain(self, parent: str, children: Any, *, flatten: bool = False) -> None: 

382 # Docstring inherited from lsst.daf.butler.registry.Registry 

383 record = self._managers.collections.find(parent) 

384 if record.type is not CollectionType.CHAINED: 

385 raise CollectionTypeError(f"Collection '{parent}' has type {record.type.name}, not CHAINED.") 

386 assert isinstance(record, ChainedCollectionRecord) 

387 children = CollectionWildcard.from_expression(children).require_ordered() 

388 if children != record.children or flatten: 

389 record.update(self._managers.collections, children, flatten=flatten) 

390 

391 def getCollectionParentChains(self, collection: str) -> Set[str]: 

392 # Docstring inherited from lsst.daf.butler.registry.Registry 

393 return { 

394 record.name 

395 for record in self._managers.collections.getParentChains( 

396 self._managers.collections.find(collection).key 

397 ) 

398 } 

399 

400 def getCollectionDocumentation(self, collection: str) -> Optional[str]: 

401 # Docstring inherited from lsst.daf.butler.registry.Registry 

402 return self._managers.collections.getDocumentation(self._managers.collections.find(collection).key) 

403 

404 def setCollectionDocumentation(self, collection: str, doc: Optional[str]) -> None: 

405 # Docstring inherited from lsst.daf.butler.registry.Registry 

406 self._managers.collections.setDocumentation(self._managers.collections.find(collection).key, doc) 

407 

408 def getCollectionSummary(self, collection: str) -> CollectionSummary: 

409 # Docstring inherited from lsst.daf.butler.registry.Registry 

410 record = self._managers.collections.find(collection) 

411 return self._managers.datasets.getCollectionSummary(record) 

412 

413 def registerDatasetType(self, datasetType: DatasetType) -> bool: 

414 # Docstring inherited from lsst.daf.butler.registry.Registry 

415 _, inserted = self._managers.datasets.register(datasetType) 

416 return inserted 

417 

418 def removeDatasetType(self, name: str) -> None: 

419 # Docstring inherited from lsst.daf.butler.registry.Registry 

420 self._managers.datasets.remove(name) 

421 

422 def getDatasetType(self, name: str) -> DatasetType: 

423 # Docstring inherited from lsst.daf.butler.registry.Registry 

424 parent_name, component = DatasetType.splitDatasetTypeName(name) 

425 storage = self._managers.datasets[parent_name] 

426 if component is None: 

427 return storage.datasetType 

428 else: 

429 return storage.datasetType.makeComponentDatasetType(component) 

430 

431 def supportsIdGenerationMode(self, mode: DatasetIdGenEnum) -> bool: 

432 # Docstring inherited from lsst.daf.butler.registry.Registry 

433 return self._managers.datasets.supportsIdGenerationMode(mode) 

434 

435 def findDataset( 

436 self, 

437 datasetType: Union[DatasetType, str], 

438 dataId: Optional[DataId] = None, 

439 *, 

440 collections: Any = None, 

441 timespan: Optional[Timespan] = None, 

442 **kwargs: Any, 

443 ) -> Optional[DatasetRef]: 

444 # Docstring inherited from lsst.daf.butler.registry.Registry 

445 if collections is None: 

446 if not self.defaults.collections: 

447 raise NoDefaultCollectionError( 

448 "No collections provided to findDataset, and no defaults from registry construction." 

449 ) 

450 collections = self.defaults.collections 

451 backend = queries.SqlQueryBackend(self._db, self._managers) 

452 collection_wildcard = CollectionWildcard.from_expression(collections, require_ordered=True) 

453 matched_collections = backend.resolve_collection_wildcard(collection_wildcard) 

454 parent_dataset_type, components = backend.resolve_single_dataset_type_wildcard( 

455 datasetType, components_deprecated=False 

456 ) 

457 if len(components) > 1: 

458 raise DatasetTypeError( 

459 f"findDataset requires exactly one dataset type; got multiple components {components} " 

460 f"for parent dataset type {parent_dataset_type.name}." 

461 ) 

462 component = components[0] 

463 dataId = DataCoordinate.standardize( 

464 dataId, 

465 graph=parent_dataset_type.dimensions, 

466 universe=self.dimensions, 

467 defaults=self.defaults.dataId, 

468 **kwargs, 

469 ) 

470 governor_constraints = {name: {cast(str, dataId[name])} for name in dataId.graph.governors.names} 

471 (filtered_collections,) = backend.filter_dataset_collections( 

472 [parent_dataset_type], 

473 matched_collections, 

474 governor_constraints=governor_constraints, 

475 ).values() 

476 if not filtered_collections: 

477 return None 

478 tail_collections: list[CollectionRecord] = [] 

479 if timespan is None: 

480 for n, collection_record in enumerate(filtered_collections): 

481 if collection_record.type is CollectionType.CALIBRATION: 

482 tail_collections.extend(filtered_collections[n:]) 

483 del filtered_collections[n:] 

484 break 

485 requested_columns = {"dataset_id", "run", "collection"} 

486 with backend.context() as context: 

487 predicate = context.make_data_coordinate_predicate( 

488 dataId.subset(parent_dataset_type.dimensions), full=False 

489 ) 

490 if timespan is not None: 

491 requested_columns.add("timespan") 

492 predicate = predicate.logical_and( 

493 context.make_timespan_overlap_predicate( 

494 DatasetColumnTag(parent_dataset_type.name, "timespan"), timespan 

495 ) 

496 ) 

497 relation = backend.make_dataset_query_relation( 

498 parent_dataset_type, filtered_collections, requested_columns, context 

499 ).with_rows_satisfying(predicate) 

500 rows = list(context.fetch_iterable(relation)) 

501 if not rows: 

502 if tail_collections: 

503 msg = ( 

504 f"Cannot search for dataset '{parent_dataset_type.name}' in CALIBRATION collection " 

505 f"{tail_collections[0].name} without an input timespan." 

506 ) 

507 if len(tail_collections) > 1: 

508 remainder_names = [", ".join(c.name for c in tail_collections[1:])] 

509 msg += f" This also blocks searching collections [{remainder_names}] that follow it." 

510 raise TypeError(msg) 

511 return None 

512 elif len(rows) == 1: 

513 best_row = rows[0] 

514 else: 

515 rank_by_collection_key = {record.key: n for n, record in enumerate(filtered_collections)} 

516 collection_tag = DatasetColumnTag(parent_dataset_type.name, "collection") 

517 row_iter = iter(rows) 

518 best_row = next(row_iter) 

519 best_rank = rank_by_collection_key[best_row[collection_tag]] 

520 have_tie = False 

521 for row in row_iter: 

522 if (rank := rank_by_collection_key[row[collection_tag]]) < best_rank: 

523 best_row = row 

524 best_rank = rank 

525 have_tie = False 

526 elif rank == best_rank: 

527 have_tie = True 

528 assert timespan is not None, "Rank ties should be impossible given DB constraints." 

529 if have_tie: 

530 raise LookupError( 

531 f"Ambiguous calibration lookup for {parent_dataset_type.name} in collections " 

532 f"{collection_wildcard.strings} with timespan {timespan}." 

533 ) 

534 reader = queries.DatasetRefReader( 

535 parent_dataset_type, 

536 translate_collection=lambda k: self._managers.collections[k].name, 

537 ) 

538 ref = reader.read(best_row, data_id=dataId) 

539 if component is not None: 

540 ref = ref.makeComponentRef(component) 

541 return ref 

542 

543 @transactional 

544 def insertDatasets( 

545 self, 

546 datasetType: Union[DatasetType, str], 

547 dataIds: Iterable[DataId], 

548 run: Optional[str] = None, 

549 expand: bool = True, 

550 idGenerationMode: DatasetIdGenEnum = DatasetIdGenEnum.UNIQUE, 

551 ) -> List[DatasetRef]: 

552 # Docstring inherited from lsst.daf.butler.registry.Registry 

553 if isinstance(datasetType, DatasetType): 

554 storage = self._managers.datasets.find(datasetType.name) 

555 if storage is None: 

556 raise DatasetTypeError(f"DatasetType '{datasetType}' has not been registered.") 

557 else: 

558 storage = self._managers.datasets.find(datasetType) 

559 if storage is None: 

560 raise DatasetTypeError(f"DatasetType with name '{datasetType}' has not been registered.") 

561 if run is None: 

562 if self.defaults.run is None: 

563 raise NoDefaultCollectionError( 

564 "No run provided to insertDatasets, and no default from registry construction." 

565 ) 

566 run = self.defaults.run 

567 runRecord = self._managers.collections.find(run) 

568 if runRecord.type is not CollectionType.RUN: 

569 raise CollectionTypeError( 

570 f"Given collection is of type {runRecord.type.name}; RUN collection required." 

571 ) 

572 assert isinstance(runRecord, RunRecord) 

573 progress = Progress("daf.butler.Registry.insertDatasets", level=logging.DEBUG) 

574 if expand: 

575 expandedDataIds = [ 

576 self.expandDataId(dataId, graph=storage.datasetType.dimensions) 

577 for dataId in progress.wrap(dataIds, f"Expanding {storage.datasetType.name} data IDs") 

578 ] 

579 else: 

580 expandedDataIds = [ 

581 DataCoordinate.standardize(dataId, graph=storage.datasetType.dimensions) for dataId in dataIds 

582 ] 

583 try: 

584 refs = list(storage.insert(runRecord, expandedDataIds, idGenerationMode)) 

585 if self._managers.obscore: 

586 context = queries.SqlQueryContext(self._db, self._managers.column_types) 

587 self._managers.obscore.add_datasets(refs, context) 

588 except sqlalchemy.exc.IntegrityError as err: 

589 raise ConflictingDefinitionError( 

590 f"A database constraint failure was triggered by inserting " 

591 f"one or more datasets of type {storage.datasetType} into " 

592 f"collection '{run}'. " 

593 f"This probably means a dataset with the same data ID " 

594 f"and dataset type already exists, but it may also mean a " 

595 f"dimension row is missing." 

596 ) from err 

597 return refs 

598 

599 @transactional 

600 def _importDatasets( 

601 self, 

602 datasets: Iterable[DatasetRef], 

603 expand: bool = True, 

604 idGenerationMode: DatasetIdGenEnum = DatasetIdGenEnum.UNIQUE, 

605 reuseIds: bool = False, 

606 ) -> List[DatasetRef]: 

607 # Docstring inherited from lsst.daf.butler.registry.Registry 

608 datasets = list(datasets) 

609 if not datasets: 

610 # nothing to do 

611 return [] 

612 

613 # find dataset type 

614 datasetTypes = set(dataset.datasetType for dataset in datasets) 

615 if len(datasetTypes) != 1: 

616 raise DatasetTypeError(f"Multiple dataset types in input datasets: {datasetTypes}") 

617 datasetType = datasetTypes.pop() 

618 

619 # get storage handler for this dataset type 

620 storage = self._managers.datasets.find(datasetType.name) 

621 if storage is None: 

622 raise DatasetTypeError(f"DatasetType '{datasetType}' has not been registered.") 

623 

624 # find run name 

625 runs = set(dataset.run for dataset in datasets) 

626 if len(runs) != 1: 

627 raise ValueError(f"Multiple run names in input datasets: {runs}") 

628 run = runs.pop() 

629 if run is None: 

630 if self.defaults.run is None: 

631 raise NoDefaultCollectionError( 

632 "No run provided to ingestDatasets, and no default from registry construction." 

633 ) 

634 run = self.defaults.run 

635 

636 runRecord = self._managers.collections.find(run) 

637 if runRecord.type is not CollectionType.RUN: 

638 raise CollectionTypeError( 

639 f"Given collection '{runRecord.name}' is of type {runRecord.type.name};" 

640 " RUN collection required." 

641 ) 

642 assert isinstance(runRecord, RunRecord) 

643 

644 progress = Progress("daf.butler.Registry.insertDatasets", level=logging.DEBUG) 

645 if expand: 

646 expandedDatasets = [ 

647 dataset.expanded(self.expandDataId(dataset.dataId, graph=storage.datasetType.dimensions)) 

648 for dataset in progress.wrap(datasets, f"Expanding {storage.datasetType.name} data IDs") 

649 ] 

650 else: 

651 expandedDatasets = [ 

652 DatasetRef(datasetType, dataset.dataId, id=dataset.id, run=dataset.run, conform=True) 

653 for dataset in datasets 

654 ] 

655 

656 try: 

657 refs = list(storage.import_(runRecord, expandedDatasets, idGenerationMode, reuseIds)) 

658 if self._managers.obscore: 

659 context = queries.SqlQueryContext(self._db, self._managers.column_types) 

660 self._managers.obscore.add_datasets(refs, context) 

661 except sqlalchemy.exc.IntegrityError as err: 

662 raise ConflictingDefinitionError( 

663 f"A database constraint failure was triggered by inserting " 

664 f"one or more datasets of type {storage.datasetType} into " 

665 f"collection '{run}'. " 

666 f"This probably means a dataset with the same data ID " 

667 f"and dataset type already exists, but it may also mean a " 

668 f"dimension row is missing." 

669 ) from err 

670 return refs 

671 

672 def getDataset(self, id: DatasetId) -> Optional[DatasetRef]: 

673 # Docstring inherited from lsst.daf.butler.registry.Registry 

674 return self._managers.datasets.getDatasetRef(id) 

675 

676 @transactional 

677 def removeDatasets(self, refs: Iterable[DatasetRef]) -> None: 

678 # Docstring inherited from lsst.daf.butler.registry.Registry 

679 progress = Progress("lsst.daf.butler.Registry.removeDatasets", level=logging.DEBUG) 

680 for datasetType, refsForType in progress.iter_item_chunks( 

681 DatasetRef.groupByType(refs).items(), desc="Removing datasets by type" 

682 ): 

683 storage = self._managers.datasets[datasetType.name] 

684 try: 

685 storage.delete(refsForType) 

686 except sqlalchemy.exc.IntegrityError as err: 

687 raise OrphanedRecordError( 

688 "One or more datasets is still present in one or more Datastores." 

689 ) from err 

690 

691 @transactional 

692 def associate(self, collection: str, refs: Iterable[DatasetRef]) -> None: 

693 # Docstring inherited from lsst.daf.butler.registry.Registry 

694 progress = Progress("lsst.daf.butler.Registry.associate", level=logging.DEBUG) 

695 collectionRecord = self._managers.collections.find(collection) 

696 if collectionRecord.type is not CollectionType.TAGGED: 

697 raise CollectionTypeError( 

698 f"Collection '{collection}' has type {collectionRecord.type.name}, not TAGGED." 

699 ) 

700 for datasetType, refsForType in progress.iter_item_chunks( 

701 DatasetRef.groupByType(refs).items(), desc="Associating datasets by type" 

702 ): 

703 storage = self._managers.datasets[datasetType.name] 

704 try: 

705 storage.associate(collectionRecord, refsForType) 

706 if self._managers.obscore: 

707 # If a TAGGED collection is being monitored by ObsCore 

708 # manager then we may need to save the dataset. 

709 context = queries.SqlQueryContext(self._db, self._managers.column_types) 

710 self._managers.obscore.associate(refsForType, collectionRecord, context) 

711 except sqlalchemy.exc.IntegrityError as err: 

712 raise ConflictingDefinitionError( 

713 f"Constraint violation while associating dataset of type {datasetType.name} with " 

714 f"collection {collection}. This probably means that one or more datasets with the same " 

715 f"dataset type and data ID already exist in the collection, but it may also indicate " 

716 f"that the datasets do not exist." 

717 ) from err 

718 

719 @transactional 

720 def disassociate(self, collection: str, refs: Iterable[DatasetRef]) -> None: 

721 # Docstring inherited from lsst.daf.butler.registry.Registry 

722 progress = Progress("lsst.daf.butler.Registry.disassociate", level=logging.DEBUG) 

723 collectionRecord = self._managers.collections.find(collection) 

724 if collectionRecord.type is not CollectionType.TAGGED: 

725 raise CollectionTypeError( 

726 f"Collection '{collection}' has type {collectionRecord.type.name}; expected TAGGED." 

727 ) 

728 for datasetType, refsForType in progress.iter_item_chunks( 

729 DatasetRef.groupByType(refs).items(), desc="Disassociating datasets by type" 

730 ): 

731 storage = self._managers.datasets[datasetType.name] 

732 storage.disassociate(collectionRecord, refsForType) 

733 if self._managers.obscore: 

734 self._managers.obscore.disassociate(refsForType, collectionRecord) 

735 

736 @transactional 

737 def certify(self, collection: str, refs: Iterable[DatasetRef], timespan: Timespan) -> None: 

738 # Docstring inherited from lsst.daf.butler.registry.Registry 

739 progress = Progress("lsst.daf.butler.Registry.certify", level=logging.DEBUG) 

740 collectionRecord = self._managers.collections.find(collection) 

741 for datasetType, refsForType in progress.iter_item_chunks( 

742 DatasetRef.groupByType(refs).items(), desc="Certifying datasets by type" 

743 ): 

744 storage = self._managers.datasets[datasetType.name] 

745 storage.certify( 

746 collectionRecord, 

747 refsForType, 

748 timespan, 

749 context=queries.SqlQueryContext(self._db, self._managers.column_types), 

750 ) 

751 

752 @transactional 

753 def decertify( 

754 self, 

755 collection: str, 

756 datasetType: Union[str, DatasetType], 

757 timespan: Timespan, 

758 *, 

759 dataIds: Optional[Iterable[DataId]] = None, 

760 ) -> None: 

761 # Docstring inherited from lsst.daf.butler.registry.Registry 

762 collectionRecord = self._managers.collections.find(collection) 

763 if isinstance(datasetType, str): 

764 storage = self._managers.datasets[datasetType] 

765 else: 

766 storage = self._managers.datasets[datasetType.name] 

767 standardizedDataIds = None 

768 if dataIds is not None: 

769 standardizedDataIds = [ 

770 DataCoordinate.standardize(d, graph=storage.datasetType.dimensions) for d in dataIds 

771 ] 

772 storage.decertify( 

773 collectionRecord, 

774 timespan, 

775 dataIds=standardizedDataIds, 

776 context=queries.SqlQueryContext(self._db, self._managers.column_types), 

777 ) 

778 

779 def getDatastoreBridgeManager(self) -> DatastoreRegistryBridgeManager: 

780 """Return an object that allows a new `Datastore` instance to 

781 communicate with this `Registry`. 

782 

783 Returns 

784 ------- 

785 manager : `DatastoreRegistryBridgeManager` 

786 Object that mediates communication between this `Registry` and its 

787 associated datastores. 

788 """ 

789 return self._managers.datastores 

790 

791 def getDatasetLocations(self, ref: DatasetRef) -> Iterable[str]: 

792 # Docstring inherited from lsst.daf.butler.registry.Registry 

793 return self._managers.datastores.findDatastores(ref) 

794 

795 def expandDataId( 

796 self, 

797 dataId: Optional[DataId] = None, 

798 *, 

799 graph: Optional[DimensionGraph] = None, 

800 records: Optional[NameLookupMapping[DimensionElement, Optional[DimensionRecord]]] = None, 

801 withDefaults: bool = True, 

802 **kwargs: Any, 

803 ) -> DataCoordinate: 

804 # Docstring inherited from lsst.daf.butler.registry.Registry 

805 if not withDefaults: 

806 defaults = None 

807 else: 

808 defaults = self.defaults.dataId 

809 try: 

810 standardized = DataCoordinate.standardize( 

811 dataId, graph=graph, universe=self.dimensions, defaults=defaults, **kwargs 

812 ) 

813 except KeyError as exc: 

814 # This means either kwargs have some odd name or required 

815 # dimension is missing. 

816 raise DimensionNameError(str(exc)) from exc 

817 if standardized.hasRecords(): 

818 return standardized 

819 if records is None: 

820 records = {} 

821 elif isinstance(records, NamedKeyMapping): 

822 records = records.byName() 

823 else: 

824 records = dict(records) 

825 if isinstance(dataId, DataCoordinate) and dataId.hasRecords(): 

826 records.update(dataId.records.byName()) 

827 keys = standardized.byName() 

828 context = queries.SqlQueryContext(self._db, self._managers.column_types) 

829 for element in standardized.graph.primaryKeyTraversalOrder: 

830 record = records.get(element.name, ...) # Use ... to mean not found; None might mean NULL 

831 if record is ...: 

832 if isinstance(element, Dimension) and keys.get(element.name) is None: 

833 if element in standardized.graph.required: 

834 raise DimensionNameError( 

835 f"No value or null value for required dimension {element.name}." 

836 ) 

837 keys[element.name] = None 

838 record = None 

839 else: 

840 storage = self._managers.dimensions[element] 

841 record = storage.fetch_one(DataCoordinate.standardize(keys, graph=element.graph), context) 

842 records[element.name] = record 

843 if record is not None: 

844 for d in element.implied: 

845 value = getattr(record, d.name) 

846 if keys.setdefault(d.name, value) != value: 

847 raise InconsistentDataIdError( 

848 f"Data ID {standardized} has {d.name}={keys[d.name]!r}, " 

849 f"but {element.name} implies {d.name}={value!r}." 

850 ) 

851 else: 

852 if element in standardized.graph.required: 

853 raise DataIdValueError( 

854 f"Could not fetch record for required dimension {element.name} via keys {keys}." 

855 ) 

856 if element.alwaysJoin: 

857 raise InconsistentDataIdError( 

858 f"Could not fetch record for element {element.name} via keys {keys}, ", 

859 "but it is marked alwaysJoin=True; this means one or more dimensions are not " 

860 "related.", 

861 ) 

862 for d in element.implied: 

863 keys.setdefault(d.name, None) 

864 records.setdefault(d.name, None) 

865 return DataCoordinate.standardize(keys, graph=standardized.graph).expanded(records=records) 

866 

867 def insertDimensionData( 

868 self, 

869 element: Union[DimensionElement, str], 

870 *data: Union[Mapping[str, Any], DimensionRecord], 

871 conform: bool = True, 

872 replace: bool = False, 

873 skip_existing: bool = False, 

874 ) -> None: 

875 # Docstring inherited from lsst.daf.butler.registry.Registry 

876 if conform: 

877 if isinstance(element, str): 

878 element = self.dimensions[element] 

879 records = [ 

880 row if isinstance(row, DimensionRecord) else element.RecordClass(**row) for row in data 

881 ] 

882 else: 

883 # Ignore typing since caller said to trust them with conform=False. 

884 records = data # type: ignore 

885 storage = self._managers.dimensions[element] 

886 storage.insert(*records, replace=replace, skip_existing=skip_existing) 

887 

888 def syncDimensionData( 

889 self, 

890 element: Union[DimensionElement, str], 

891 row: Union[Mapping[str, Any], DimensionRecord], 

892 conform: bool = True, 

893 update: bool = False, 

894 ) -> Union[bool, Dict[str, Any]]: 

895 # Docstring inherited from lsst.daf.butler.registry.Registry 

896 if conform: 

897 if isinstance(element, str): 

898 element = self.dimensions[element] 

899 record = row if isinstance(row, DimensionRecord) else element.RecordClass(**row) 

900 else: 

901 # Ignore typing since caller said to trust them with conform=False. 

902 record = row # type: ignore 

903 storage = self._managers.dimensions[element] 

904 return storage.sync(record, update=update) 

905 

906 def queryDatasetTypes( 

907 self, 

908 expression: Any = ..., 

909 *, 

910 components: Optional[bool] = None, 

911 missing: Optional[List[str]] = None, 

912 ) -> Iterable[DatasetType]: 

913 # Docstring inherited from lsst.daf.butler.registry.Registry 

914 wildcard = DatasetTypeWildcard.from_expression(expression) 

915 composition_dict = self._managers.datasets.resolve_wildcard( 

916 wildcard, 

917 components=components, 

918 missing=missing, 

919 ) 

920 result: list[DatasetType] = [] 

921 for parent_dataset_type, components_for_parent in composition_dict.items(): 

922 result.extend( 

923 parent_dataset_type.makeComponentDatasetType(c) if c is not None else parent_dataset_type 

924 for c in components_for_parent 

925 ) 

926 return result 

927 

928 def queryCollections( 

929 self, 

930 expression: Any = ..., 

931 datasetType: Optional[DatasetType] = None, 

932 collectionTypes: Union[Iterable[CollectionType], CollectionType] = CollectionType.all(), 

933 flattenChains: bool = False, 

934 includeChains: Optional[bool] = None, 

935 ) -> Sequence[str]: 

936 # Docstring inherited from lsst.daf.butler.registry.Registry 

937 

938 # Right now the datasetTypes argument is completely ignored, but that 

939 # is consistent with its [lack of] guarantees. DM-24939 or a follow-up 

940 # ticket will take care of that. 

941 try: 

942 wildcard = CollectionWildcard.from_expression(expression) 

943 except TypeError as exc: 

944 raise CollectionExpressionError(f"Invalid collection expression '{expression}'") from exc 

945 collectionTypes = ensure_iterable(collectionTypes) 

946 return [ 

947 record.name 

948 for record in self._managers.collections.resolve_wildcard( 

949 wildcard, 

950 collection_types=frozenset(collectionTypes), 

951 flatten_chains=flattenChains, 

952 include_chains=includeChains, 

953 ) 

954 ] 

955 

956 def _makeQueryBuilder( 

957 self, 

958 summary: queries.QuerySummary, 

959 doomed_by: Iterable[str] = (), 

960 ) -> queries.QueryBuilder: 

961 """Return a `QueryBuilder` instance capable of constructing and 

962 managing more complex queries than those obtainable via `Registry` 

963 interfaces. 

964 

965 This is an advanced interface; downstream code should prefer 

966 `Registry.queryDataIds` and `Registry.queryDatasets` whenever those 

967 are sufficient. 

968 

969 Parameters 

970 ---------- 

971 summary : `queries.QuerySummary` 

972 Object describing and categorizing the full set of dimensions that 

973 will be included in the query. 

974 doomed_by : `Iterable` of `str`, optional 

975 A list of diagnostic messages that indicate why the query is going 

976 to yield no results and should not even be executed. If an empty 

977 container (default) the query will be executed unless other code 

978 determines that it is doomed. 

979 

980 Returns 

981 ------- 

982 builder : `queries.QueryBuilder` 

983 Object that can be used to construct and perform advanced queries. 

984 """ 

985 doomed_by = list(doomed_by) 

986 backend = queries.SqlQueryBackend(self._db, self._managers) 

987 context = backend.context() 

988 relation: Relation | None = None 

989 if doomed_by: 

990 relation = LeafRelation.make_doomed(context.sql_engine, set(), doomed_by) 

991 return queries.QueryBuilder( 

992 summary, 

993 backend=backend, 

994 context=context, 

995 relation=relation, 

996 ) 

997 

998 def _standardize_query_data_id_args( 

999 self, data_id: DataId | None, *, doomed_by: list[str], **kwargs: Any 

1000 ) -> DataCoordinate: 

1001 """Preprocess the data ID arguments passed to query* methods. 

1002 

1003 Parameters 

1004 ---------- 

1005 data_id : `DataId` or `None` 

1006 Data ID that constrains the query results. 

1007 doomed_by : `list` [ `str` ] 

1008 List to append messages indicating why the query is doomed to 

1009 yield no results. 

1010 **kwargs 

1011 Additional data ID key-value pairs, extending and overriding 

1012 ``data_id``. 

1013 

1014 Returns 

1015 ------- 

1016 data_id : `DataCoordinate` 

1017 Standardized data ID. Will be fully expanded unless expansion 

1018 fails, in which case a message will be appended to ``doomed_by`` 

1019 on return. 

1020 """ 

1021 try: 

1022 return self.expandDataId(data_id, **kwargs) 

1023 except DataIdValueError as err: 

1024 doomed_by.append(str(err)) 

1025 return DataCoordinate.standardize( 

1026 data_id, **kwargs, universe=self.dimensions, defaults=self.defaults.dataId 

1027 ) 

1028 

1029 def _standardize_query_dataset_args( 

1030 self, 

1031 datasets: Any, 

1032 collections: Any, 

1033 components: bool | None, 

1034 mode: Literal["find_first"] | Literal["find_all"] | Literal["constrain"] = "constrain", 

1035 *, 

1036 doomed_by: list[str], 

1037 ) -> tuple[dict[DatasetType, list[str | None]], CollectionWildcard | None]: 

1038 """Preprocess dataset arguments passed to query* methods. 

1039 

1040 Parameters 

1041 ---------- 

1042 datasets : `DatasetType`, `str`, `re.Pattern`, or iterable of these 

1043 Expression identifying dataset types. See `queryDatasetTypes` for 

1044 details. 

1045 collections : `str`, `re.Pattern`, or iterable of these 

1046 Expression identifying collections to be searched. See 

1047 `queryCollections` for details. 

1048 components : `bool`, optional 

1049 If `True`, apply all expression patterns to component dataset type 

1050 names as well. If `False`, never apply patterns to components. 

1051 If `None` (default), apply patterns to components only if their 

1052 parent datasets were not matched by the expression. 

1053 Fully-specified component datasets (`str` or `DatasetType` 

1054 instances) are always included. 

1055 

1056 Values other than `False` are deprecated, and only `False` will be 

1057 supported after v26. After v27 this argument will be removed 

1058 entirely. 

1059 mode : `str`, optional 

1060 The way in which datasets are being used in this query; one of: 

1061 

1062 - "find_first": this is a query for the first dataset in an 

1063 ordered list of collections. Prohibits collection wildcards, 

1064 but permits dataset type wildcards. 

1065 

1066 - "find_all": this is a query for all datasets in all matched 

1067 collections. Permits collection and dataset type wildcards. 

1068 

1069 - "constrain": this is a query for something other than datasets, 

1070 with results constrained by dataset existence. Permits 

1071 collection wildcards and prohibits ``...`` as a dataset type 

1072 wildcard. 

1073 doomed_by : `list` [ `str` ] 

1074 List to append messages indicating why the query is doomed to 

1075 yield no results. 

1076 

1077 Returns 

1078 ------- 

1079 composition : `defaultdict` [ `DatasetType`, `list` [ `str` ] ] 

1080 Dictionary mapping parent dataset type to `list` of components 

1081 matched for that dataset type (or `None` for the parent itself). 

1082 collections : `CollectionWildcard` 

1083 Processed collection expression. 

1084 """ 

1085 composition: dict[DatasetType, list[str | None]] = {} 

1086 if datasets is not None: 

1087 if not collections: 

1088 if not self.defaults.collections: 

1089 raise NoDefaultCollectionError("No collections, and no registry default collections.") 

1090 collections = self.defaults.collections 

1091 else: 

1092 collections = CollectionWildcard.from_expression(collections) 

1093 if mode == "find_first" and collections.patterns: 

1094 raise TypeError( 

1095 f"Collection pattern(s) {collections.patterns} not allowed in this context." 

1096 ) 

1097 missing: list[str] = [] 

1098 composition = self._managers.datasets.resolve_wildcard( 

1099 datasets, components=components, missing=missing, explicit_only=(mode == "constrain") 

1100 ) 

1101 if missing and mode == "constrain": 

1102 # After v26 this should raise MissingDatasetTypeError, to be 

1103 # implemented on DM-36303. 

1104 warnings.warn( 

1105 f"Dataset type(s) {missing} are not registered; this will be an error after v26.", 

1106 FutureWarning, 

1107 ) 

1108 doomed_by.extend(f"Dataset type {name} is not registered." for name in missing) 

1109 elif collections: 

1110 raise ArgumentError(f"Cannot pass 'collections' (='{collections}') without 'datasets'.") 

1111 return composition, collections 

1112 

1113 def queryDatasets( 

1114 self, 

1115 datasetType: Any, 

1116 *, 

1117 collections: Any = None, 

1118 dimensions: Optional[Iterable[Union[Dimension, str]]] = None, 

1119 dataId: Optional[DataId] = None, 

1120 where: str = "", 

1121 findFirst: bool = False, 

1122 components: Optional[bool] = None, 

1123 bind: Optional[Mapping[str, Any]] = None, 

1124 check: bool = True, 

1125 **kwargs: Any, 

1126 ) -> queries.DatasetQueryResults: 

1127 # Docstring inherited from lsst.daf.butler.registry.Registry 

1128 doomed_by: list[str] = [] 

1129 data_id = self._standardize_query_data_id_args(dataId, doomed_by=doomed_by, **kwargs) 

1130 dataset_composition, collections = self._standardize_query_dataset_args( 

1131 datasetType, 

1132 collections, 

1133 components, 

1134 mode="find_first" if findFirst else "find_all", 

1135 doomed_by=doomed_by, 

1136 ) 

1137 parent_results: list[queries.ParentDatasetQueryResults] = [] 

1138 for parent_dataset_type, components_for_parent in dataset_composition.items(): 

1139 # The full set of dimensions in the query is the combination of 

1140 # those needed for the DatasetType and those explicitly requested, 

1141 # if any. 

1142 dimension_names = set(parent_dataset_type.dimensions.names) 

1143 if dimensions is not None: 

1144 dimension_names.update(self.dimensions.extract(dimensions).names) 

1145 # Construct the summary structure needed to construct a 

1146 # QueryBuilder. 

1147 summary = queries.QuerySummary( 

1148 requested=DimensionGraph(self.dimensions, names=dimension_names), 

1149 data_id=data_id, 

1150 expression=where, 

1151 bind=bind, 

1152 defaults=self.defaults.dataId, 

1153 check=check, 

1154 datasets=[parent_dataset_type], 

1155 ) 

1156 builder = self._makeQueryBuilder(summary) 

1157 # Add the dataset subquery to the query, telling the QueryBuilder 

1158 # to include the rank of the selected collection in the results 

1159 # only if we need to findFirst. Note that if any of the 

1160 # collections are actually wildcard expressions, and 

1161 # findFirst=True, this will raise TypeError for us. 

1162 builder.joinDataset(parent_dataset_type, collections, isResult=True, findFirst=findFirst) 

1163 query = builder.finish() 

1164 parent_results.append( 

1165 queries.ParentDatasetQueryResults( 

1166 query, parent_dataset_type, components=components_for_parent 

1167 ) 

1168 ) 

1169 if not parent_results: 

1170 doomed_by.extend( 

1171 f"No registered dataset type matching {t!r} found, so no matching datasets can " 

1172 "exist in any collection." 

1173 for t in ensure_iterable(datasetType) 

1174 ) 

1175 return queries.ChainedDatasetQueryResults([], doomed_by=doomed_by) 

1176 elif len(parent_results) == 1: 

1177 return parent_results[0] 

1178 else: 

1179 return queries.ChainedDatasetQueryResults(parent_results) 

1180 

1181 def queryDataIds( 

1182 self, 

1183 dimensions: Union[Iterable[Union[Dimension, str]], Dimension, str], 

1184 *, 

1185 dataId: Optional[DataId] = None, 

1186 datasets: Any = None, 

1187 collections: Any = None, 

1188 where: str = "", 

1189 components: Optional[bool] = None, 

1190 bind: Optional[Mapping[str, Any]] = None, 

1191 check: bool = True, 

1192 **kwargs: Any, 

1193 ) -> queries.DataCoordinateQueryResults: 

1194 # Docstring inherited from lsst.daf.butler.registry.Registry 

1195 dimensions = ensure_iterable(dimensions) 

1196 requestedDimensions = self.dimensions.extract(dimensions) 

1197 doomed_by: list[str] = [] 

1198 data_id = self._standardize_query_data_id_args(dataId, doomed_by=doomed_by, **kwargs) 

1199 dataset_composition, collections = self._standardize_query_dataset_args( 

1200 datasets, collections, components, doomed_by=doomed_by 

1201 ) 

1202 summary = queries.QuerySummary( 

1203 requested=requestedDimensions, 

1204 data_id=data_id, 

1205 expression=where, 

1206 bind=bind, 

1207 defaults=self.defaults.dataId, 

1208 check=check, 

1209 datasets=dataset_composition.keys(), 

1210 ) 

1211 builder = self._makeQueryBuilder(summary, doomed_by=doomed_by) 

1212 for datasetType in dataset_composition.keys(): 

1213 builder.joinDataset(datasetType, collections, isResult=False) 

1214 query = builder.finish() 

1215 

1216 return queries.DataCoordinateQueryResults(query) 

1217 

1218 def queryDimensionRecords( 

1219 self, 

1220 element: Union[DimensionElement, str], 

1221 *, 

1222 dataId: Optional[DataId] = None, 

1223 datasets: Any = None, 

1224 collections: Any = None, 

1225 where: str = "", 

1226 components: Optional[bool] = None, 

1227 bind: Optional[Mapping[str, Any]] = None, 

1228 check: bool = True, 

1229 **kwargs: Any, 

1230 ) -> queries.DimensionRecordQueryResults: 

1231 # Docstring inherited from lsst.daf.butler.registry.Registry 

1232 if not isinstance(element, DimensionElement): 

1233 try: 

1234 element = self.dimensions[element] 

1235 except KeyError as e: 

1236 raise DimensionNameError( 

1237 f"No such dimension '{element}', available dimensions: " 

1238 + str(self.dimensions.getStaticElements()) 

1239 ) from e 

1240 doomed_by: list[str] = [] 

1241 data_id = self._standardize_query_data_id_args(dataId, doomed_by=doomed_by, **kwargs) 

1242 dataset_composition, collections = self._standardize_query_dataset_args( 

1243 datasets, collections, components, doomed_by=doomed_by 

1244 ) 

1245 summary = queries.QuerySummary( 

1246 requested=element.graph, 

1247 data_id=data_id, 

1248 expression=where, 

1249 bind=bind, 

1250 defaults=self.defaults.dataId, 

1251 check=check, 

1252 datasets=dataset_composition.keys(), 

1253 ) 

1254 builder = self._makeQueryBuilder(summary, doomed_by=doomed_by) 

1255 for datasetType in dataset_composition.keys(): 

1256 builder.joinDataset(datasetType, collections, isResult=False) 

1257 query = builder.finish().with_record_columns(element) 

1258 return queries.DatabaseDimensionRecordQueryResults(query, element) 

1259 

1260 def queryDatasetAssociations( 

1261 self, 

1262 datasetType: Union[str, DatasetType], 

1263 collections: Any = ..., 

1264 *, 

1265 collectionTypes: Iterable[CollectionType] = CollectionType.all(), 

1266 flattenChains: bool = False, 

1267 ) -> Iterator[DatasetAssociation]: 

1268 # Docstring inherited from lsst.daf.butler.registry.Registry 

1269 if collections is None: 

1270 if not self.defaults.collections: 

1271 raise NoDefaultCollectionError( 

1272 "No collections provided to queryDatasetAssociations, " 

1273 "and no defaults from registry construction." 

1274 ) 

1275 collections = self.defaults.collections 

1276 collections = CollectionWildcard.from_expression(collections) 

1277 backend = queries.SqlQueryBackend(self._db, self._managers) 

1278 parent_dataset_type, _ = backend.resolve_single_dataset_type_wildcard(datasetType, components=False) 

1279 timespan_tag = DatasetColumnTag(parent_dataset_type.name, "timespan") 

1280 collection_tag = DatasetColumnTag(parent_dataset_type.name, "collection") 

1281 for parent_collection_record in backend.resolve_collection_wildcard( 

1282 collections, 

1283 collection_types=frozenset(collectionTypes), 

1284 flatten_chains=flattenChains, 

1285 ): 

1286 # Resolve this possibly-chained collection into a list of 

1287 # non-CHAINED collections that actually hold datasets of this 

1288 # type. 

1289 candidate_collection_records = backend.resolve_dataset_collections( 

1290 parent_dataset_type, 

1291 CollectionWildcard.from_names([parent_collection_record.name]), 

1292 allow_calibration_collections=True, 

1293 governor_constraints={}, 

1294 ) 

1295 if not candidate_collection_records: 

1296 continue 

1297 with backend.context() as context: 

1298 relation = backend.make_dataset_query_relation( 

1299 parent_dataset_type, 

1300 candidate_collection_records, 

1301 columns={"dataset_id", "run", "timespan", "collection"}, 

1302 context=context, 

1303 ) 

1304 reader = queries.DatasetRefReader( 

1305 parent_dataset_type, 

1306 translate_collection=lambda k: self._managers.collections[k].name, 

1307 full=False, 

1308 ) 

1309 for row in context.fetch_iterable(relation): 

1310 ref = reader.read(row) 

1311 collection_record = self._managers.collections[row[collection_tag]] 

1312 if collection_record.type is CollectionType.CALIBRATION: 

1313 timespan = row[timespan_tag] 

1314 else: 

1315 # For backwards compatibility and (possibly?) user 

1316 # convenience we continue to define the timespan of a 

1317 # DatasetAssociation row for a non-CALIBRATION 

1318 # collection to be None rather than a fully unbounded 

1319 # timespan. 

1320 timespan = None 

1321 yield DatasetAssociation(ref=ref, collection=collection_record.name, timespan=timespan) 

1322 

1323 storageClasses: StorageClassFactory 

1324 """All storage classes known to the registry (`StorageClassFactory`). 

1325 """