Coverage for python/lsst/daf/butler/registries/sql.py: 13%

503 statements  

« prev     ^ index     » next       coverage.py v7.2.4, created at 2023-04-29 02:58 -0700

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ("SqlRegistry",) 

25 

26import contextlib 

27import logging 

28import warnings 

29from typing import ( 

30 TYPE_CHECKING, 

31 Any, 

32 Dict, 

33 Iterable, 

34 Iterator, 

35 List, 

36 Literal, 

37 Mapping, 

38 Optional, 

39 Sequence, 

40 Set, 

41 Union, 

42 cast, 

43) 

44 

45import sqlalchemy 

46from lsst.daf.relation import LeafRelation, Relation 

47from lsst.resources import ResourcePathExpression 

48from lsst.utils.iteration import ensure_iterable 

49 

50from ..core import ( 

51 Config, 

52 DataCoordinate, 

53 DataId, 

54 DatasetAssociation, 

55 DatasetColumnTag, 

56 DatasetId, 

57 DatasetIdFactory, 

58 DatasetIdGenEnum, 

59 DatasetRef, 

60 DatasetType, 

61 Dimension, 

62 DimensionConfig, 

63 DimensionElement, 

64 DimensionGraph, 

65 DimensionRecord, 

66 DimensionUniverse, 

67 NamedKeyMapping, 

68 NameLookupMapping, 

69 Progress, 

70 StorageClassFactory, 

71 Timespan, 

72 ddl, 

73) 

74from ..core.utils import transactional 

75from ..registry import ( 

76 ArgumentError, 

77 CollectionExpressionError, 

78 CollectionSummary, 

79 CollectionType, 

80 CollectionTypeError, 

81 ConflictingDefinitionError, 

82 DataIdValueError, 

83 DatasetTypeError, 

84 DimensionNameError, 

85 InconsistentDataIdError, 

86 NoDefaultCollectionError, 

87 OrphanedRecordError, 

88 Registry, 

89 RegistryConfig, 

90 RegistryDefaults, 

91 queries, 

92) 

93from ..registry.interfaces import ChainedCollectionRecord, RunRecord 

94from ..registry.managers import RegistryManagerInstances, RegistryManagerTypes 

95from ..registry.wildcards import CollectionWildcard, DatasetTypeWildcard 

96 

97if TYPE_CHECKING: 

98 from .._butlerConfig import ButlerConfig 

99 from ..registry.interfaces import ( 

100 CollectionRecord, 

101 Database, 

102 DatastoreRegistryBridgeManager, 

103 ObsCoreTableManager, 

104 ) 

105 

106 

107_LOG = logging.getLogger(__name__) 

108 

109 

110class SqlRegistry(Registry): 

111 """Registry implementation based on SQLAlchemy. 

112 

113 Parameters 

114 ---------- 

115 database : `Database` 

116 Database instance to store Registry. 

117 defaults : `RegistryDefaults` 

118 Default collection search path and/or output `~CollectionType.RUN` 

119 collection. 

120 managers : `RegistryManagerInstances` 

121 All the managers required for this registry. 

122 """ 

123 

124 defaultConfigFile: Optional[str] = None 

125 """Path to configuration defaults. Accessed within the ``configs`` resource 

126 or relative to a search path. Can be None if no defaults specified. 

127 """ 

128 

129 @classmethod 

130 def createFromConfig( 

131 cls, 

132 config: Optional[Union[RegistryConfig, str]] = None, 

133 dimensionConfig: Optional[Union[DimensionConfig, str]] = None, 

134 butlerRoot: Optional[ResourcePathExpression] = None, 

135 ) -> Registry: 

136 """Create registry database and return `SqlRegistry` instance. 

137 

138 This method initializes database contents, database must be empty 

139 prior to calling this method. 

140 

141 Parameters 

142 ---------- 

143 config : `RegistryConfig` or `str`, optional 

144 Registry configuration, if missing then default configuration will 

145 be loaded from registry.yaml. 

146 dimensionConfig : `DimensionConfig` or `str`, optional 

147 Dimensions configuration, if missing then default configuration 

148 will be loaded from dimensions.yaml. 

149 butlerRoot : convertible to `lsst.resources.ResourcePath`, optional 

150 Path to the repository root this `SqlRegistry` will manage. 

151 

152 Returns 

153 ------- 

154 registry : `SqlRegistry` 

155 A new `SqlRegistry` instance. 

156 """ 

157 config = cls.forceRegistryConfig(config) 

158 config.replaceRoot(butlerRoot) 

159 

160 if isinstance(dimensionConfig, str): 

161 dimensionConfig = DimensionConfig(dimensionConfig) 

162 elif dimensionConfig is None: 

163 dimensionConfig = DimensionConfig() 

164 elif not isinstance(dimensionConfig, DimensionConfig): 

165 raise TypeError(f"Incompatible Dimension configuration type: {type(dimensionConfig)}") 

166 

167 DatabaseClass = config.getDatabaseClass() 

168 database = DatabaseClass.fromUri( 

169 str(config.connectionString), origin=config.get("origin", 0), namespace=config.get("namespace") 

170 ) 

171 managerTypes = RegistryManagerTypes.fromConfig(config) 

172 managers = managerTypes.makeRepo(database, dimensionConfig) 

173 return cls(database, RegistryDefaults(), managers) 

174 

175 @classmethod 

176 def fromConfig( 

177 cls, 

178 config: Union[ButlerConfig, RegistryConfig, Config, str], 

179 butlerRoot: Optional[ResourcePathExpression] = None, 

180 writeable: bool = True, 

181 defaults: Optional[RegistryDefaults] = None, 

182 ) -> Registry: 

183 """Create `Registry` subclass instance from `config`. 

184 

185 Registry database must be initialized prior to calling this method. 

186 

187 Parameters 

188 ---------- 

189 config : `ButlerConfig`, `RegistryConfig`, `Config` or `str` 

190 Registry configuration 

191 butlerRoot : `lsst.resources.ResourcePathExpression`, optional 

192 Path to the repository root this `Registry` will manage. 

193 writeable : `bool`, optional 

194 If `True` (default) create a read-write connection to the database. 

195 defaults : `RegistryDefaults`, optional 

196 Default collection search path and/or output `~CollectionType.RUN` 

197 collection. 

198 

199 Returns 

200 ------- 

201 registry : `SqlRegistry` (subclass) 

202 A new `SqlRegistry` subclass instance. 

203 """ 

204 config = cls.forceRegistryConfig(config) 

205 config.replaceRoot(butlerRoot) 

206 DatabaseClass = config.getDatabaseClass() 

207 database = DatabaseClass.fromUri( 

208 config.connectionString.render_as_string(hide_password=False), 

209 origin=config.get("origin", 0), 

210 namespace=config.get("namespace"), 

211 writeable=writeable, 

212 ) 

213 managerTypes = RegistryManagerTypes.fromConfig(config) 

214 with database.session(): 

215 managers = managerTypes.loadRepo(database) 

216 if defaults is None: 

217 defaults = RegistryDefaults() 

218 return cls(database, defaults, managers) 

219 

220 def __init__(self, database: Database, defaults: RegistryDefaults, managers: RegistryManagerInstances): 

221 self._db = database 

222 self._managers = managers 

223 self.storageClasses = StorageClassFactory() 

224 # Intentionally invoke property setter to initialize defaults. This 

225 # can only be done after most of the rest of Registry has already been 

226 # initialized, and must be done before the property getter is used. 

227 self.defaults = defaults 

228 # In the future DatasetIdFactory may become configurable and this 

229 # instance will need to be shared with datasets manager. 

230 self.datasetIdFactory = DatasetIdFactory() 

231 

232 def __str__(self) -> str: 

233 return str(self._db) 

234 

235 def __repr__(self) -> str: 

236 return f"SqlRegistry({self._db!r}, {self.dimensions!r})" 

237 

238 def isWriteable(self) -> bool: 

239 # Docstring inherited from lsst.daf.butler.registry.Registry 

240 return self._db.isWriteable() 

241 

242 def copy(self, defaults: Optional[RegistryDefaults] = None) -> Registry: 

243 # Docstring inherited from lsst.daf.butler.registry.Registry 

244 if defaults is None: 

245 # No need to copy, because `RegistryDefaults` is immutable; we 

246 # effectively copy on write. 

247 defaults = self.defaults 

248 return type(self)(self._db, defaults, self._managers) 

249 

250 @property 

251 def dimensions(self) -> DimensionUniverse: 

252 # Docstring inherited from lsst.daf.butler.registry.Registry 

253 return self._managers.dimensions.universe 

254 

255 def refresh(self) -> None: 

256 # Docstring inherited from lsst.daf.butler.registry.Registry 

257 with self._db.transaction(): 

258 self._managers.refresh() 

259 

260 @contextlib.contextmanager 

261 def transaction(self, *, savepoint: bool = False) -> Iterator[None]: 

262 # Docstring inherited from lsst.daf.butler.registry.Registry 

263 try: 

264 with self._db.transaction(savepoint=savepoint): 

265 yield 

266 except BaseException: 

267 # TODO: this clears the caches sometimes when we wouldn't actually 

268 # need to. Can we avoid that? 

269 self._managers.dimensions.clearCaches() 

270 raise 

271 

272 def resetConnectionPool(self) -> None: 

273 """Reset SQLAlchemy connection pool for `SqlRegistry` database. 

274 

275 This operation is useful when using registry with fork-based 

276 multiprocessing. To use registry across fork boundary one has to make 

277 sure that there are no currently active connections (no session or 

278 transaction is in progress) and connection pool is reset using this 

279 method. This method should be called by the child process immediately 

280 after the fork. 

281 """ 

282 self._db._engine.dispose() 

283 

284 def registerOpaqueTable(self, tableName: str, spec: ddl.TableSpec) -> None: 

285 """Add an opaque (to the `Registry`) table for use by a `Datastore` or 

286 other data repository client. 

287 

288 Opaque table records can be added via `insertOpaqueData`, retrieved via 

289 `fetchOpaqueData`, and removed via `deleteOpaqueData`. 

290 

291 Parameters 

292 ---------- 

293 tableName : `str` 

294 Logical name of the opaque table. This may differ from the 

295 actual name used in the database by a prefix and/or suffix. 

296 spec : `ddl.TableSpec` 

297 Specification for the table to be added. 

298 """ 

299 self._managers.opaque.register(tableName, spec) 

300 

301 @transactional 

302 def insertOpaqueData(self, tableName: str, *data: dict) -> None: 

303 """Insert records into an opaque table. 

304 

305 Parameters 

306 ---------- 

307 tableName : `str` 

308 Logical name of the opaque table. Must match the name used in a 

309 previous call to `registerOpaqueTable`. 

310 data 

311 Each additional positional argument is a dictionary that represents 

312 a single row to be added. 

313 """ 

314 self._managers.opaque[tableName].insert(*data) 

315 

316 def fetchOpaqueData(self, tableName: str, **where: Any) -> Iterator[Mapping[str, Any]]: 

317 """Retrieve records from an opaque table. 

318 

319 Parameters 

320 ---------- 

321 tableName : `str` 

322 Logical name of the opaque table. Must match the name used in a 

323 previous call to `registerOpaqueTable`. 

324 where 

325 Additional keyword arguments are interpreted as equality 

326 constraints that restrict the returned rows (combined with AND); 

327 keyword arguments are column names and values are the values they 

328 must have. 

329 

330 Yields 

331 ------ 

332 row : `dict` 

333 A dictionary representing a single result row. 

334 """ 

335 yield from self._managers.opaque[tableName].fetch(**where) 

336 

337 @transactional 

338 def deleteOpaqueData(self, tableName: str, **where: Any) -> None: 

339 """Remove records from an opaque table. 

340 

341 Parameters 

342 ---------- 

343 tableName : `str` 

344 Logical name of the opaque table. Must match the name used in a 

345 previous call to `registerOpaqueTable`. 

346 where 

347 Additional keyword arguments are interpreted as equality 

348 constraints that restrict the deleted rows (combined with AND); 

349 keyword arguments are column names and values are the values they 

350 must have. 

351 """ 

352 self._managers.opaque[tableName].delete(where.keys(), where) 

353 

354 def registerCollection( 

355 self, name: str, type: CollectionType = CollectionType.TAGGED, doc: Optional[str] = None 

356 ) -> bool: 

357 # Docstring inherited from lsst.daf.butler.registry.Registry 

358 _, registered = self._managers.collections.register(name, type, doc=doc) 

359 return registered 

360 

361 def getCollectionType(self, name: str) -> CollectionType: 

362 # Docstring inherited from lsst.daf.butler.registry.Registry 

363 return self._managers.collections.find(name).type 

364 

365 def _get_collection_record(self, name: str) -> CollectionRecord: 

366 # Docstring inherited from lsst.daf.butler.registry.Registry 

367 return self._managers.collections.find(name) 

368 

369 def registerRun(self, name: str, doc: Optional[str] = None) -> bool: 

370 # Docstring inherited from lsst.daf.butler.registry.Registry 

371 _, registered = self._managers.collections.register(name, CollectionType.RUN, doc=doc) 

372 return registered 

373 

374 @transactional 

375 def removeCollection(self, name: str) -> None: 

376 # Docstring inherited from lsst.daf.butler.registry.Registry 

377 self._managers.collections.remove(name) 

378 

379 def getCollectionChain(self, parent: str) -> tuple[str, ...]: 

380 # Docstring inherited from lsst.daf.butler.registry.Registry 

381 record = self._managers.collections.find(parent) 

382 if record.type is not CollectionType.CHAINED: 

383 raise CollectionTypeError(f"Collection '{parent}' has type {record.type.name}, not CHAINED.") 

384 assert isinstance(record, ChainedCollectionRecord) 

385 return record.children 

386 

387 @transactional 

388 def setCollectionChain(self, parent: str, children: Any, *, flatten: bool = False) -> None: 

389 # Docstring inherited from lsst.daf.butler.registry.Registry 

390 record = self._managers.collections.find(parent) 

391 if record.type is not CollectionType.CHAINED: 

392 raise CollectionTypeError(f"Collection '{parent}' has type {record.type.name}, not CHAINED.") 

393 assert isinstance(record, ChainedCollectionRecord) 

394 children = CollectionWildcard.from_expression(children).require_ordered() 

395 if children != record.children or flatten: 

396 record.update(self._managers.collections, children, flatten=flatten) 

397 

398 def getCollectionParentChains(self, collection: str) -> Set[str]: 

399 # Docstring inherited from lsst.daf.butler.registry.Registry 

400 return { 

401 record.name 

402 for record in self._managers.collections.getParentChains( 

403 self._managers.collections.find(collection).key 

404 ) 

405 } 

406 

407 def getCollectionDocumentation(self, collection: str) -> Optional[str]: 

408 # Docstring inherited from lsst.daf.butler.registry.Registry 

409 return self._managers.collections.getDocumentation(self._managers.collections.find(collection).key) 

410 

411 def setCollectionDocumentation(self, collection: str, doc: Optional[str]) -> None: 

412 # Docstring inherited from lsst.daf.butler.registry.Registry 

413 self._managers.collections.setDocumentation(self._managers.collections.find(collection).key, doc) 

414 

415 def getCollectionSummary(self, collection: str) -> CollectionSummary: 

416 # Docstring inherited from lsst.daf.butler.registry.Registry 

417 record = self._managers.collections.find(collection) 

418 return self._managers.datasets.getCollectionSummary(record) 

419 

420 def registerDatasetType(self, datasetType: DatasetType) -> bool: 

421 # Docstring inherited from lsst.daf.butler.registry.Registry 

422 _, inserted = self._managers.datasets.register(datasetType) 

423 return inserted 

424 

425 def removeDatasetType(self, name: str | tuple[str, ...]) -> None: 

426 # Docstring inherited from lsst.daf.butler.registry.Registry 

427 

428 for datasetTypeExpression in ensure_iterable(name): 

429 datasetTypes = list(self.queryDatasetTypes(datasetTypeExpression)) 

430 if not datasetTypes: 

431 _LOG.info("Dataset type %r not defined", datasetTypeExpression) 

432 else: 

433 for datasetType in datasetTypes: 

434 self._managers.datasets.remove(datasetType.name) 

435 _LOG.info("Removed dataset type %r", datasetType.name) 

436 

437 def getDatasetType(self, name: str) -> DatasetType: 

438 # Docstring inherited from lsst.daf.butler.registry.Registry 

439 parent_name, component = DatasetType.splitDatasetTypeName(name) 

440 storage = self._managers.datasets[parent_name] 

441 if component is None: 

442 return storage.datasetType 

443 else: 

444 return storage.datasetType.makeComponentDatasetType(component) 

445 

446 def supportsIdGenerationMode(self, mode: DatasetIdGenEnum) -> bool: 

447 # Docstring inherited from lsst.daf.butler.registry.Registry 

448 return self._managers.datasets.supportsIdGenerationMode(mode) 

449 

450 def findDataset( 

451 self, 

452 datasetType: Union[DatasetType, str], 

453 dataId: Optional[DataId] = None, 

454 *, 

455 collections: Any = None, 

456 timespan: Optional[Timespan] = None, 

457 **kwargs: Any, 

458 ) -> Optional[DatasetRef]: 

459 # Docstring inherited from lsst.daf.butler.registry.Registry 

460 if collections is None: 

461 if not self.defaults.collections: 

462 raise NoDefaultCollectionError( 

463 "No collections provided to findDataset, and no defaults from registry construction." 

464 ) 

465 collections = self.defaults.collections 

466 backend = queries.SqlQueryBackend(self._db, self._managers) 

467 collection_wildcard = CollectionWildcard.from_expression(collections, require_ordered=True) 

468 matched_collections = backend.resolve_collection_wildcard(collection_wildcard) 

469 parent_dataset_type, components = backend.resolve_single_dataset_type_wildcard( 

470 datasetType, components_deprecated=False 

471 ) 

472 if len(components) > 1: 

473 raise DatasetTypeError( 

474 f"findDataset requires exactly one dataset type; got multiple components {components} " 

475 f"for parent dataset type {parent_dataset_type.name}." 

476 ) 

477 component = components[0] 

478 dataId = DataCoordinate.standardize( 

479 dataId, 

480 graph=parent_dataset_type.dimensions, 

481 universe=self.dimensions, 

482 defaults=self.defaults.dataId, 

483 **kwargs, 

484 ) 

485 governor_constraints = {name: {cast(str, dataId[name])} for name in dataId.graph.governors.names} 

486 (filtered_collections,) = backend.filter_dataset_collections( 

487 [parent_dataset_type], 

488 matched_collections, 

489 governor_constraints=governor_constraints, 

490 ).values() 

491 if not filtered_collections: 

492 return None 

493 if timespan is None: 

494 filtered_collections = [ 

495 collection_record 

496 for collection_record in filtered_collections 

497 if collection_record.type is not CollectionType.CALIBRATION 

498 ] 

499 if filtered_collections: 

500 requested_columns = {"dataset_id", "run", "collection"} 

501 with backend.context() as context: 

502 predicate = context.make_data_coordinate_predicate( 

503 dataId.subset(parent_dataset_type.dimensions), full=False 

504 ) 

505 if timespan is not None: 

506 requested_columns.add("timespan") 

507 predicate = predicate.logical_and( 

508 context.make_timespan_overlap_predicate( 

509 DatasetColumnTag(parent_dataset_type.name, "timespan"), timespan 

510 ) 

511 ) 

512 relation = backend.make_dataset_query_relation( 

513 parent_dataset_type, filtered_collections, requested_columns, context 

514 ).with_rows_satisfying(predicate) 

515 rows = list(context.fetch_iterable(relation)) 

516 else: 

517 rows = [] 

518 if not rows: 

519 return None 

520 elif len(rows) == 1: 

521 best_row = rows[0] 

522 else: 

523 rank_by_collection_key = {record.key: n for n, record in enumerate(filtered_collections)} 

524 collection_tag = DatasetColumnTag(parent_dataset_type.name, "collection") 

525 row_iter = iter(rows) 

526 best_row = next(row_iter) 

527 best_rank = rank_by_collection_key[best_row[collection_tag]] 

528 have_tie = False 

529 for row in row_iter: 

530 if (rank := rank_by_collection_key[row[collection_tag]]) < best_rank: 

531 best_row = row 

532 best_rank = rank 

533 have_tie = False 

534 elif rank == best_rank: 

535 have_tie = True 

536 assert timespan is not None, "Rank ties should be impossible given DB constraints." 

537 if have_tie: 

538 raise LookupError( 

539 f"Ambiguous calibration lookup for {parent_dataset_type.name} in collections " 

540 f"{collection_wildcard.strings} with timespan {timespan}." 

541 ) 

542 reader = queries.DatasetRefReader( 

543 parent_dataset_type, 

544 translate_collection=lambda k: self._managers.collections[k].name, 

545 ) 

546 ref = reader.read(best_row, data_id=dataId) 

547 if component is not None: 

548 ref = ref.makeComponentRef(component) 

549 return ref 

550 

551 @transactional 

552 def insertDatasets( 

553 self, 

554 datasetType: Union[DatasetType, str], 

555 dataIds: Iterable[DataId], 

556 run: Optional[str] = None, 

557 expand: bool = True, 

558 idGenerationMode: DatasetIdGenEnum = DatasetIdGenEnum.UNIQUE, 

559 ) -> List[DatasetRef]: 

560 # Docstring inherited from lsst.daf.butler.registry.Registry 

561 if isinstance(datasetType, DatasetType): 

562 storage = self._managers.datasets.find(datasetType.name) 

563 if storage is None: 

564 raise DatasetTypeError(f"DatasetType '{datasetType}' has not been registered.") 

565 else: 

566 storage = self._managers.datasets.find(datasetType) 

567 if storage is None: 

568 raise DatasetTypeError(f"DatasetType with name '{datasetType}' has not been registered.") 

569 if run is None: 

570 if self.defaults.run is None: 

571 raise NoDefaultCollectionError( 

572 "No run provided to insertDatasets, and no default from registry construction." 

573 ) 

574 run = self.defaults.run 

575 runRecord = self._managers.collections.find(run) 

576 if runRecord.type is not CollectionType.RUN: 

577 raise CollectionTypeError( 

578 f"Given collection is of type {runRecord.type.name}; RUN collection required." 

579 ) 

580 assert isinstance(runRecord, RunRecord) 

581 progress = Progress("daf.butler.Registry.insertDatasets", level=logging.DEBUG) 

582 if expand: 

583 expandedDataIds = [ 

584 self.expandDataId(dataId, graph=storage.datasetType.dimensions) 

585 for dataId in progress.wrap(dataIds, f"Expanding {storage.datasetType.name} data IDs") 

586 ] 

587 else: 

588 expandedDataIds = [ 

589 DataCoordinate.standardize(dataId, graph=storage.datasetType.dimensions) for dataId in dataIds 

590 ] 

591 try: 

592 refs = list(storage.insert(runRecord, expandedDataIds, idGenerationMode)) 

593 if self._managers.obscore: 

594 context = queries.SqlQueryContext(self._db, self._managers.column_types) 

595 self._managers.obscore.add_datasets(refs, context) 

596 except sqlalchemy.exc.IntegrityError as err: 

597 raise ConflictingDefinitionError( 

598 "A database constraint failure was triggered by inserting " 

599 f"one or more datasets of type {storage.datasetType} into " 

600 f"collection '{run}'. " 

601 "This probably means a dataset with the same data ID " 

602 "and dataset type already exists, but it may also mean a " 

603 "dimension row is missing." 

604 ) from err 

605 return refs 

606 

607 @transactional 

608 def _importDatasets( 

609 self, 

610 datasets: Iterable[DatasetRef], 

611 expand: bool = True, 

612 idGenerationMode: DatasetIdGenEnum = DatasetIdGenEnum.UNIQUE, 

613 reuseIds: bool = False, 

614 ) -> List[DatasetRef]: 

615 # Docstring inherited from lsst.daf.butler.registry.Registry 

616 datasets = list(datasets) 

617 if not datasets: 

618 # nothing to do 

619 return [] 

620 

621 # find dataset type 

622 datasetTypes = set(dataset.datasetType for dataset in datasets) 

623 if len(datasetTypes) != 1: 

624 raise DatasetTypeError(f"Multiple dataset types in input datasets: {datasetTypes}") 

625 datasetType = datasetTypes.pop() 

626 

627 # get storage handler for this dataset type 

628 storage = self._managers.datasets.find(datasetType.name) 

629 if storage is None: 

630 raise DatasetTypeError(f"DatasetType '{datasetType}' has not been registered.") 

631 

632 # find run name 

633 runs = set(dataset.run for dataset in datasets) 

634 if len(runs) != 1: 

635 raise ValueError(f"Multiple run names in input datasets: {runs}") 

636 run = runs.pop() 

637 if run is None: 

638 if self.defaults.run is None: 

639 raise NoDefaultCollectionError( 

640 "No run provided to ingestDatasets, and no default from registry construction." 

641 ) 

642 run = self.defaults.run 

643 

644 runRecord = self._managers.collections.find(run) 

645 if runRecord.type is not CollectionType.RUN: 

646 raise CollectionTypeError( 

647 f"Given collection '{runRecord.name}' is of type {runRecord.type.name};" 

648 " RUN collection required." 

649 ) 

650 assert isinstance(runRecord, RunRecord) 

651 

652 progress = Progress("daf.butler.Registry.insertDatasets", level=logging.DEBUG) 

653 if expand: 

654 expandedDatasets = [ 

655 dataset.expanded(self.expandDataId(dataset.dataId, graph=storage.datasetType.dimensions)) 

656 for dataset in progress.wrap(datasets, f"Expanding {storage.datasetType.name} data IDs") 

657 ] 

658 else: 

659 expandedDatasets = [ 

660 DatasetRef(datasetType, dataset.dataId, id=dataset.id, run=dataset.run, conform=True) 

661 for dataset in datasets 

662 ] 

663 

664 try: 

665 refs = list(storage.import_(runRecord, expandedDatasets, idGenerationMode, reuseIds)) 

666 if self._managers.obscore: 

667 context = queries.SqlQueryContext(self._db, self._managers.column_types) 

668 self._managers.obscore.add_datasets(refs, context) 

669 except sqlalchemy.exc.IntegrityError as err: 

670 raise ConflictingDefinitionError( 

671 "A database constraint failure was triggered by inserting " 

672 f"one or more datasets of type {storage.datasetType} into " 

673 f"collection '{run}'. " 

674 "This probably means a dataset with the same data ID " 

675 "and dataset type already exists, but it may also mean a " 

676 "dimension row is missing." 

677 ) from err 

678 return refs 

679 

680 def getDataset(self, id: DatasetId) -> Optional[DatasetRef]: 

681 # Docstring inherited from lsst.daf.butler.registry.Registry 

682 return self._managers.datasets.getDatasetRef(id) 

683 

684 @transactional 

685 def removeDatasets(self, refs: Iterable[DatasetRef]) -> None: 

686 # Docstring inherited from lsst.daf.butler.registry.Registry 

687 progress = Progress("lsst.daf.butler.Registry.removeDatasets", level=logging.DEBUG) 

688 for datasetType, refsForType in progress.iter_item_chunks( 

689 DatasetRef.groupByType(refs).items(), desc="Removing datasets by type" 

690 ): 

691 storage = self._managers.datasets[datasetType.name] 

692 try: 

693 storage.delete(refsForType) 

694 except sqlalchemy.exc.IntegrityError as err: 

695 raise OrphanedRecordError( 

696 "One or more datasets is still present in one or more Datastores." 

697 ) from err 

698 

699 @transactional 

700 def associate(self, collection: str, refs: Iterable[DatasetRef]) -> None: 

701 # Docstring inherited from lsst.daf.butler.registry.Registry 

702 progress = Progress("lsst.daf.butler.Registry.associate", level=logging.DEBUG) 

703 collectionRecord = self._managers.collections.find(collection) 

704 if collectionRecord.type is not CollectionType.TAGGED: 

705 raise CollectionTypeError( 

706 f"Collection '{collection}' has type {collectionRecord.type.name}, not TAGGED." 

707 ) 

708 for datasetType, refsForType in progress.iter_item_chunks( 

709 DatasetRef.groupByType(refs).items(), desc="Associating datasets by type" 

710 ): 

711 storage = self._managers.datasets[datasetType.name] 

712 try: 

713 storage.associate(collectionRecord, refsForType) 

714 if self._managers.obscore: 

715 # If a TAGGED collection is being monitored by ObsCore 

716 # manager then we may need to save the dataset. 

717 context = queries.SqlQueryContext(self._db, self._managers.column_types) 

718 self._managers.obscore.associate(refsForType, collectionRecord, context) 

719 except sqlalchemy.exc.IntegrityError as err: 

720 raise ConflictingDefinitionError( 

721 f"Constraint violation while associating dataset of type {datasetType.name} with " 

722 f"collection {collection}. This probably means that one or more datasets with the same " 

723 "dataset type and data ID already exist in the collection, but it may also indicate " 

724 "that the datasets do not exist." 

725 ) from err 

726 

727 @transactional 

728 def disassociate(self, collection: str, refs: Iterable[DatasetRef]) -> None: 

729 # Docstring inherited from lsst.daf.butler.registry.Registry 

730 progress = Progress("lsst.daf.butler.Registry.disassociate", level=logging.DEBUG) 

731 collectionRecord = self._managers.collections.find(collection) 

732 if collectionRecord.type is not CollectionType.TAGGED: 

733 raise CollectionTypeError( 

734 f"Collection '{collection}' has type {collectionRecord.type.name}; expected TAGGED." 

735 ) 

736 for datasetType, refsForType in progress.iter_item_chunks( 

737 DatasetRef.groupByType(refs).items(), desc="Disassociating datasets by type" 

738 ): 

739 storage = self._managers.datasets[datasetType.name] 

740 storage.disassociate(collectionRecord, refsForType) 

741 if self._managers.obscore: 

742 self._managers.obscore.disassociate(refsForType, collectionRecord) 

743 

744 @transactional 

745 def certify(self, collection: str, refs: Iterable[DatasetRef], timespan: Timespan) -> None: 

746 # Docstring inherited from lsst.daf.butler.registry.Registry 

747 progress = Progress("lsst.daf.butler.Registry.certify", level=logging.DEBUG) 

748 collectionRecord = self._managers.collections.find(collection) 

749 for datasetType, refsForType in progress.iter_item_chunks( 

750 DatasetRef.groupByType(refs).items(), desc="Certifying datasets by type" 

751 ): 

752 storage = self._managers.datasets[datasetType.name] 

753 storage.certify( 

754 collectionRecord, 

755 refsForType, 

756 timespan, 

757 context=queries.SqlQueryContext(self._db, self._managers.column_types), 

758 ) 

759 

760 @transactional 

761 def decertify( 

762 self, 

763 collection: str, 

764 datasetType: Union[str, DatasetType], 

765 timespan: Timespan, 

766 *, 

767 dataIds: Optional[Iterable[DataId]] = None, 

768 ) -> None: 

769 # Docstring inherited from lsst.daf.butler.registry.Registry 

770 collectionRecord = self._managers.collections.find(collection) 

771 if isinstance(datasetType, str): 

772 storage = self._managers.datasets[datasetType] 

773 else: 

774 storage = self._managers.datasets[datasetType.name] 

775 standardizedDataIds = None 

776 if dataIds is not None: 

777 standardizedDataIds = [ 

778 DataCoordinate.standardize(d, graph=storage.datasetType.dimensions) for d in dataIds 

779 ] 

780 storage.decertify( 

781 collectionRecord, 

782 timespan, 

783 dataIds=standardizedDataIds, 

784 context=queries.SqlQueryContext(self._db, self._managers.column_types), 

785 ) 

786 

787 def getDatastoreBridgeManager(self) -> DatastoreRegistryBridgeManager: 

788 """Return an object that allows a new `Datastore` instance to 

789 communicate with this `Registry`. 

790 

791 Returns 

792 ------- 

793 manager : `DatastoreRegistryBridgeManager` 

794 Object that mediates communication between this `Registry` and its 

795 associated datastores. 

796 """ 

797 return self._managers.datastores 

798 

799 def getDatasetLocations(self, ref: DatasetRef) -> Iterable[str]: 

800 # Docstring inherited from lsst.daf.butler.registry.Registry 

801 return self._managers.datastores.findDatastores(ref) 

802 

803 def expandDataId( 

804 self, 

805 dataId: Optional[DataId] = None, 

806 *, 

807 graph: Optional[DimensionGraph] = None, 

808 records: Optional[NameLookupMapping[DimensionElement, Optional[DimensionRecord]]] = None, 

809 withDefaults: bool = True, 

810 **kwargs: Any, 

811 ) -> DataCoordinate: 

812 # Docstring inherited from lsst.daf.butler.registry.Registry 

813 if not withDefaults: 

814 defaults = None 

815 else: 

816 defaults = self.defaults.dataId 

817 try: 

818 standardized = DataCoordinate.standardize( 

819 dataId, graph=graph, universe=self.dimensions, defaults=defaults, **kwargs 

820 ) 

821 except KeyError as exc: 

822 # This means either kwargs have some odd name or required 

823 # dimension is missing. 

824 raise DimensionNameError(str(exc)) from exc 

825 if standardized.hasRecords(): 

826 return standardized 

827 if records is None: 

828 records = {} 

829 elif isinstance(records, NamedKeyMapping): 

830 records = records.byName() 

831 else: 

832 records = dict(records) 

833 if isinstance(dataId, DataCoordinate) and dataId.hasRecords(): 

834 records.update(dataId.records.byName()) 

835 keys = standardized.byName() 

836 context = queries.SqlQueryContext(self._db, self._managers.column_types) 

837 for element in standardized.graph.primaryKeyTraversalOrder: 

838 record = records.get(element.name, ...) # Use ... to mean not found; None might mean NULL 

839 if record is ...: 

840 if isinstance(element, Dimension) and keys.get(element.name) is None: 

841 if element in standardized.graph.required: 

842 raise DimensionNameError( 

843 f"No value or null value for required dimension {element.name}." 

844 ) 

845 keys[element.name] = None 

846 record = None 

847 else: 

848 storage = self._managers.dimensions[element] 

849 record = storage.fetch_one(DataCoordinate.standardize(keys, graph=element.graph), context) 

850 records[element.name] = record 

851 if record is not None: 

852 for d in element.implied: 

853 value = getattr(record, d.name) 

854 if keys.setdefault(d.name, value) != value: 

855 raise InconsistentDataIdError( 

856 f"Data ID {standardized} has {d.name}={keys[d.name]!r}, " 

857 f"but {element.name} implies {d.name}={value!r}." 

858 ) 

859 else: 

860 if element in standardized.graph.required: 

861 raise DataIdValueError( 

862 f"Could not fetch record for required dimension {element.name} via keys {keys}." 

863 ) 

864 if element.alwaysJoin: 

865 raise InconsistentDataIdError( 

866 f"Could not fetch record for element {element.name} via keys {keys}, ", 

867 "but it is marked alwaysJoin=True; this means one or more dimensions are not " 

868 "related.", 

869 ) 

870 for d in element.implied: 

871 keys.setdefault(d.name, None) 

872 records.setdefault(d.name, None) 

873 return DataCoordinate.standardize(keys, graph=standardized.graph).expanded(records=records) 

874 

875 def insertDimensionData( 

876 self, 

877 element: Union[DimensionElement, str], 

878 *data: Union[Mapping[str, Any], DimensionRecord], 

879 conform: bool = True, 

880 replace: bool = False, 

881 skip_existing: bool = False, 

882 ) -> None: 

883 # Docstring inherited from lsst.daf.butler.registry.Registry 

884 if conform: 

885 if isinstance(element, str): 

886 element = self.dimensions[element] 

887 records = [ 

888 row if isinstance(row, DimensionRecord) else element.RecordClass(**row) for row in data 

889 ] 

890 else: 

891 # Ignore typing since caller said to trust them with conform=False. 

892 records = data # type: ignore 

893 storage = self._managers.dimensions[element] 

894 storage.insert(*records, replace=replace, skip_existing=skip_existing) 

895 

896 def syncDimensionData( 

897 self, 

898 element: Union[DimensionElement, str], 

899 row: Union[Mapping[str, Any], DimensionRecord], 

900 conform: bool = True, 

901 update: bool = False, 

902 ) -> Union[bool, Dict[str, Any]]: 

903 # Docstring inherited from lsst.daf.butler.registry.Registry 

904 if conform: 

905 if isinstance(element, str): 

906 element = self.dimensions[element] 

907 record = row if isinstance(row, DimensionRecord) else element.RecordClass(**row) 

908 else: 

909 # Ignore typing since caller said to trust them with conform=False. 

910 record = row # type: ignore 

911 storage = self._managers.dimensions[element] 

912 return storage.sync(record, update=update) 

913 

914 def queryDatasetTypes( 

915 self, 

916 expression: Any = ..., 

917 *, 

918 components: Optional[bool] = None, 

919 missing: Optional[List[str]] = None, 

920 ) -> Iterable[DatasetType]: 

921 # Docstring inherited from lsst.daf.butler.registry.Registry 

922 wildcard = DatasetTypeWildcard.from_expression(expression) 

923 composition_dict = self._managers.datasets.resolve_wildcard( 

924 wildcard, 

925 components=components, 

926 missing=missing, 

927 ) 

928 result: list[DatasetType] = [] 

929 for parent_dataset_type, components_for_parent in composition_dict.items(): 

930 result.extend( 

931 parent_dataset_type.makeComponentDatasetType(c) if c is not None else parent_dataset_type 

932 for c in components_for_parent 

933 ) 

934 return result 

935 

936 def queryCollections( 

937 self, 

938 expression: Any = ..., 

939 datasetType: Optional[DatasetType] = None, 

940 collectionTypes: Union[Iterable[CollectionType], CollectionType] = CollectionType.all(), 

941 flattenChains: bool = False, 

942 includeChains: Optional[bool] = None, 

943 ) -> Sequence[str]: 

944 # Docstring inherited from lsst.daf.butler.registry.Registry 

945 

946 # Right now the datasetTypes argument is completely ignored, but that 

947 # is consistent with its [lack of] guarantees. DM-24939 or a follow-up 

948 # ticket will take care of that. 

949 try: 

950 wildcard = CollectionWildcard.from_expression(expression) 

951 except TypeError as exc: 

952 raise CollectionExpressionError(f"Invalid collection expression '{expression}'") from exc 

953 collectionTypes = ensure_iterable(collectionTypes) 

954 return [ 

955 record.name 

956 for record in self._managers.collections.resolve_wildcard( 

957 wildcard, 

958 collection_types=frozenset(collectionTypes), 

959 flatten_chains=flattenChains, 

960 include_chains=includeChains, 

961 ) 

962 ] 

963 

964 def _makeQueryBuilder( 

965 self, 

966 summary: queries.QuerySummary, 

967 doomed_by: Iterable[str] = (), 

968 ) -> queries.QueryBuilder: 

969 """Return a `QueryBuilder` instance capable of constructing and 

970 managing more complex queries than those obtainable via `Registry` 

971 interfaces. 

972 

973 This is an advanced interface; downstream code should prefer 

974 `Registry.queryDataIds` and `Registry.queryDatasets` whenever those 

975 are sufficient. 

976 

977 Parameters 

978 ---------- 

979 summary : `queries.QuerySummary` 

980 Object describing and categorizing the full set of dimensions that 

981 will be included in the query. 

982 doomed_by : `Iterable` of `str`, optional 

983 A list of diagnostic messages that indicate why the query is going 

984 to yield no results and should not even be executed. If an empty 

985 container (default) the query will be executed unless other code 

986 determines that it is doomed. 

987 

988 Returns 

989 ------- 

990 builder : `queries.QueryBuilder` 

991 Object that can be used to construct and perform advanced queries. 

992 """ 

993 doomed_by = list(doomed_by) 

994 backend = queries.SqlQueryBackend(self._db, self._managers) 

995 context = backend.context() 

996 relation: Relation | None = None 

997 if doomed_by: 

998 relation = LeafRelation.make_doomed(context.sql_engine, set(), doomed_by) 

999 return queries.QueryBuilder( 

1000 summary, 

1001 backend=backend, 

1002 context=context, 

1003 relation=relation, 

1004 ) 

1005 

1006 def _standardize_query_data_id_args( 

1007 self, data_id: DataId | None, *, doomed_by: list[str], **kwargs: Any 

1008 ) -> DataCoordinate: 

1009 """Preprocess the data ID arguments passed to query* methods. 

1010 

1011 Parameters 

1012 ---------- 

1013 data_id : `DataId` or `None` 

1014 Data ID that constrains the query results. 

1015 doomed_by : `list` [ `str` ] 

1016 List to append messages indicating why the query is doomed to 

1017 yield no results. 

1018 **kwargs 

1019 Additional data ID key-value pairs, extending and overriding 

1020 ``data_id``. 

1021 

1022 Returns 

1023 ------- 

1024 data_id : `DataCoordinate` 

1025 Standardized data ID. Will be fully expanded unless expansion 

1026 fails, in which case a message will be appended to ``doomed_by`` 

1027 on return. 

1028 """ 

1029 try: 

1030 return self.expandDataId(data_id, **kwargs) 

1031 except DataIdValueError as err: 

1032 doomed_by.append(str(err)) 

1033 return DataCoordinate.standardize( 

1034 data_id, **kwargs, universe=self.dimensions, defaults=self.defaults.dataId 

1035 ) 

1036 

1037 def _standardize_query_dataset_args( 

1038 self, 

1039 datasets: Any, 

1040 collections: Any, 

1041 components: bool | None, 

1042 mode: Literal["find_first"] | Literal["find_all"] | Literal["constrain"] = "constrain", 

1043 *, 

1044 doomed_by: list[str], 

1045 ) -> tuple[dict[DatasetType, list[str | None]], CollectionWildcard | None]: 

1046 """Preprocess dataset arguments passed to query* methods. 

1047 

1048 Parameters 

1049 ---------- 

1050 datasets : `DatasetType`, `str`, `re.Pattern`, or iterable of these 

1051 Expression identifying dataset types. See `queryDatasetTypes` for 

1052 details. 

1053 collections : `str`, `re.Pattern`, or iterable of these 

1054 Expression identifying collections to be searched. See 

1055 `queryCollections` for details. 

1056 components : `bool`, optional 

1057 If `True`, apply all expression patterns to component dataset type 

1058 names as well. If `False`, never apply patterns to components. 

1059 If `None` (default), apply patterns to components only if their 

1060 parent datasets were not matched by the expression. 

1061 Fully-specified component datasets (`str` or `DatasetType` 

1062 instances) are always included. 

1063 

1064 Values other than `False` are deprecated, and only `False` will be 

1065 supported after v26. After v27 this argument will be removed 

1066 entirely. 

1067 mode : `str`, optional 

1068 The way in which datasets are being used in this query; one of: 

1069 

1070 - "find_first": this is a query for the first dataset in an 

1071 ordered list of collections. Prohibits collection wildcards, 

1072 but permits dataset type wildcards. 

1073 

1074 - "find_all": this is a query for all datasets in all matched 

1075 collections. Permits collection and dataset type wildcards. 

1076 

1077 - "constrain": this is a query for something other than datasets, 

1078 with results constrained by dataset existence. Permits 

1079 collection wildcards and prohibits ``...`` as a dataset type 

1080 wildcard. 

1081 doomed_by : `list` [ `str` ] 

1082 List to append messages indicating why the query is doomed to 

1083 yield no results. 

1084 

1085 Returns 

1086 ------- 

1087 composition : `defaultdict` [ `DatasetType`, `list` [ `str` ] ] 

1088 Dictionary mapping parent dataset type to `list` of components 

1089 matched for that dataset type (or `None` for the parent itself). 

1090 collections : `CollectionWildcard` 

1091 Processed collection expression. 

1092 """ 

1093 composition: dict[DatasetType, list[str | None]] = {} 

1094 if datasets is not None: 

1095 if not collections: 

1096 if not self.defaults.collections: 

1097 raise NoDefaultCollectionError("No collections, and no registry default collections.") 

1098 collections = self.defaults.collections 

1099 else: 

1100 collections = CollectionWildcard.from_expression(collections) 

1101 if mode == "find_first" and collections.patterns: 

1102 raise TypeError( 

1103 f"Collection pattern(s) {collections.patterns} not allowed in this context." 

1104 ) 

1105 missing: list[str] = [] 

1106 composition = self._managers.datasets.resolve_wildcard( 

1107 datasets, components=components, missing=missing, explicit_only=(mode == "constrain") 

1108 ) 

1109 if missing and mode == "constrain": 

1110 # After v26 this should raise MissingDatasetTypeError, to be 

1111 # implemented on DM-36303. 

1112 warnings.warn( 

1113 f"Dataset type(s) {missing} are not registered; this will be an error after v26.", 

1114 FutureWarning, 

1115 ) 

1116 doomed_by.extend(f"Dataset type {name} is not registered." for name in missing) 

1117 elif collections: 

1118 raise ArgumentError(f"Cannot pass 'collections' (='{collections}') without 'datasets'.") 

1119 return composition, collections 

1120 

1121 def queryDatasets( 

1122 self, 

1123 datasetType: Any, 

1124 *, 

1125 collections: Any = None, 

1126 dimensions: Optional[Iterable[Union[Dimension, str]]] = None, 

1127 dataId: Optional[DataId] = None, 

1128 where: str = "", 

1129 findFirst: bool = False, 

1130 components: Optional[bool] = None, 

1131 bind: Optional[Mapping[str, Any]] = None, 

1132 check: bool = True, 

1133 **kwargs: Any, 

1134 ) -> queries.DatasetQueryResults: 

1135 # Docstring inherited from lsst.daf.butler.registry.Registry 

1136 doomed_by: list[str] = [] 

1137 data_id = self._standardize_query_data_id_args(dataId, doomed_by=doomed_by, **kwargs) 

1138 dataset_composition, collections = self._standardize_query_dataset_args( 

1139 datasetType, 

1140 collections, 

1141 components, 

1142 mode="find_first" if findFirst else "find_all", 

1143 doomed_by=doomed_by, 

1144 ) 

1145 parent_results: list[queries.ParentDatasetQueryResults] = [] 

1146 for parent_dataset_type, components_for_parent in dataset_composition.items(): 

1147 # The full set of dimensions in the query is the combination of 

1148 # those needed for the DatasetType and those explicitly requested, 

1149 # if any. 

1150 dimension_names = set(parent_dataset_type.dimensions.names) 

1151 if dimensions is not None: 

1152 dimension_names.update(self.dimensions.extract(dimensions).names) 

1153 # Construct the summary structure needed to construct a 

1154 # QueryBuilder. 

1155 summary = queries.QuerySummary( 

1156 requested=DimensionGraph(self.dimensions, names=dimension_names), 

1157 column_types=self._managers.column_types, 

1158 data_id=data_id, 

1159 expression=where, 

1160 bind=bind, 

1161 defaults=self.defaults.dataId, 

1162 check=check, 

1163 datasets=[parent_dataset_type], 

1164 ) 

1165 builder = self._makeQueryBuilder(summary) 

1166 # Add the dataset subquery to the query, telling the QueryBuilder 

1167 # to include the rank of the selected collection in the results 

1168 # only if we need to findFirst. Note that if any of the 

1169 # collections are actually wildcard expressions, and 

1170 # findFirst=True, this will raise TypeError for us. 

1171 builder.joinDataset(parent_dataset_type, collections, isResult=True, findFirst=findFirst) 

1172 query = builder.finish() 

1173 parent_results.append( 

1174 queries.ParentDatasetQueryResults( 

1175 query, parent_dataset_type, components=components_for_parent 

1176 ) 

1177 ) 

1178 if not parent_results: 

1179 doomed_by.extend( 

1180 f"No registered dataset type matching {t!r} found, so no matching datasets can " 

1181 "exist in any collection." 

1182 for t in ensure_iterable(datasetType) 

1183 ) 

1184 return queries.ChainedDatasetQueryResults([], doomed_by=doomed_by) 

1185 elif len(parent_results) == 1: 

1186 return parent_results[0] 

1187 else: 

1188 return queries.ChainedDatasetQueryResults(parent_results) 

1189 

1190 def queryDataIds( 

1191 self, 

1192 dimensions: Union[Iterable[Union[Dimension, str]], Dimension, str], 

1193 *, 

1194 dataId: Optional[DataId] = None, 

1195 datasets: Any = None, 

1196 collections: Any = None, 

1197 where: str = "", 

1198 components: Optional[bool] = None, 

1199 bind: Optional[Mapping[str, Any]] = None, 

1200 check: bool = True, 

1201 **kwargs: Any, 

1202 ) -> queries.DataCoordinateQueryResults: 

1203 # Docstring inherited from lsst.daf.butler.registry.Registry 

1204 dimensions = ensure_iterable(dimensions) 

1205 requestedDimensions = self.dimensions.extract(dimensions) 

1206 doomed_by: list[str] = [] 

1207 data_id = self._standardize_query_data_id_args(dataId, doomed_by=doomed_by, **kwargs) 

1208 dataset_composition, collections = self._standardize_query_dataset_args( 

1209 datasets, collections, components, doomed_by=doomed_by 

1210 ) 

1211 summary = queries.QuerySummary( 

1212 requested=requestedDimensions, 

1213 column_types=self._managers.column_types, 

1214 data_id=data_id, 

1215 expression=where, 

1216 bind=bind, 

1217 defaults=self.defaults.dataId, 

1218 check=check, 

1219 datasets=dataset_composition.keys(), 

1220 ) 

1221 builder = self._makeQueryBuilder(summary, doomed_by=doomed_by) 

1222 for datasetType in dataset_composition.keys(): 

1223 builder.joinDataset(datasetType, collections, isResult=False) 

1224 query = builder.finish() 

1225 

1226 return queries.DataCoordinateQueryResults(query) 

1227 

1228 def queryDimensionRecords( 

1229 self, 

1230 element: Union[DimensionElement, str], 

1231 *, 

1232 dataId: Optional[DataId] = None, 

1233 datasets: Any = None, 

1234 collections: Any = None, 

1235 where: str = "", 

1236 components: Optional[bool] = None, 

1237 bind: Optional[Mapping[str, Any]] = None, 

1238 check: bool = True, 

1239 **kwargs: Any, 

1240 ) -> queries.DimensionRecordQueryResults: 

1241 # Docstring inherited from lsst.daf.butler.registry.Registry 

1242 if not isinstance(element, DimensionElement): 

1243 try: 

1244 element = self.dimensions[element] 

1245 except KeyError as e: 

1246 raise DimensionNameError( 

1247 f"No such dimension '{element}', available dimensions: " 

1248 + str(self.dimensions.getStaticElements()) 

1249 ) from e 

1250 doomed_by: list[str] = [] 

1251 data_id = self._standardize_query_data_id_args(dataId, doomed_by=doomed_by, **kwargs) 

1252 dataset_composition, collections = self._standardize_query_dataset_args( 

1253 datasets, collections, components, doomed_by=doomed_by 

1254 ) 

1255 summary = queries.QuerySummary( 

1256 requested=element.graph, 

1257 column_types=self._managers.column_types, 

1258 data_id=data_id, 

1259 expression=where, 

1260 bind=bind, 

1261 defaults=self.defaults.dataId, 

1262 check=check, 

1263 datasets=dataset_composition.keys(), 

1264 ) 

1265 builder = self._makeQueryBuilder(summary, doomed_by=doomed_by) 

1266 for datasetType in dataset_composition.keys(): 

1267 builder.joinDataset(datasetType, collections, isResult=False) 

1268 query = builder.finish().with_record_columns(element) 

1269 return queries.DatabaseDimensionRecordQueryResults(query, element) 

1270 

1271 def queryDatasetAssociations( 

1272 self, 

1273 datasetType: Union[str, DatasetType], 

1274 collections: Any = ..., 

1275 *, 

1276 collectionTypes: Iterable[CollectionType] = CollectionType.all(), 

1277 flattenChains: bool = False, 

1278 ) -> Iterator[DatasetAssociation]: 

1279 # Docstring inherited from lsst.daf.butler.registry.Registry 

1280 if collections is None: 

1281 if not self.defaults.collections: 

1282 raise NoDefaultCollectionError( 

1283 "No collections provided to queryDatasetAssociations, " 

1284 "and no defaults from registry construction." 

1285 ) 

1286 collections = self.defaults.collections 

1287 collections = CollectionWildcard.from_expression(collections) 

1288 backend = queries.SqlQueryBackend(self._db, self._managers) 

1289 parent_dataset_type, _ = backend.resolve_single_dataset_type_wildcard(datasetType, components=False) 

1290 timespan_tag = DatasetColumnTag(parent_dataset_type.name, "timespan") 

1291 collection_tag = DatasetColumnTag(parent_dataset_type.name, "collection") 

1292 for parent_collection_record in backend.resolve_collection_wildcard( 

1293 collections, 

1294 collection_types=frozenset(collectionTypes), 

1295 flatten_chains=flattenChains, 

1296 ): 

1297 # Resolve this possibly-chained collection into a list of 

1298 # non-CHAINED collections that actually hold datasets of this 

1299 # type. 

1300 candidate_collection_records = backend.resolve_dataset_collections( 

1301 parent_dataset_type, 

1302 CollectionWildcard.from_names([parent_collection_record.name]), 

1303 allow_calibration_collections=True, 

1304 governor_constraints={}, 

1305 ) 

1306 if not candidate_collection_records: 

1307 continue 

1308 with backend.context() as context: 

1309 relation = backend.make_dataset_query_relation( 

1310 parent_dataset_type, 

1311 candidate_collection_records, 

1312 columns={"dataset_id", "run", "timespan", "collection"}, 

1313 context=context, 

1314 ) 

1315 reader = queries.DatasetRefReader( 

1316 parent_dataset_type, 

1317 translate_collection=lambda k: self._managers.collections[k].name, 

1318 full=False, 

1319 ) 

1320 for row in context.fetch_iterable(relation): 

1321 ref = reader.read(row) 

1322 collection_record = self._managers.collections[row[collection_tag]] 

1323 if collection_record.type is CollectionType.CALIBRATION: 

1324 timespan = row[timespan_tag] 

1325 else: 

1326 # For backwards compatibility and (possibly?) user 

1327 # convenience we continue to define the timespan of a 

1328 # DatasetAssociation row for a non-CALIBRATION 

1329 # collection to be None rather than a fully unbounded 

1330 # timespan. 

1331 timespan = None 

1332 yield DatasetAssociation(ref=ref, collection=collection_record.name, timespan=timespan) 

1333 

1334 @property 

1335 def obsCoreTableManager(self) -> ObsCoreTableManager | None: 

1336 # Docstring inherited from lsst.daf.butler.registry.Registry 

1337 return self._managers.obscore 

1338 

1339 storageClasses: StorageClassFactory 

1340 """All storage classes known to the registry (`StorageClassFactory`). 

1341 """