Coverage for python/lsst/daf/butler/registries/sql.py: 12%

512 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2023-06-06 09:38 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ("SqlRegistry",) 

25 

26import contextlib 

27import logging 

28import warnings 

29from typing import ( 

30 TYPE_CHECKING, 

31 Any, 

32 Dict, 

33 Iterable, 

34 Iterator, 

35 List, 

36 Literal, 

37 Mapping, 

38 Optional, 

39 Sequence, 

40 Set, 

41 Union, 

42 cast, 

43) 

44 

45import sqlalchemy 

46from lsst.daf.relation import LeafRelation, Relation 

47from lsst.resources import ResourcePathExpression 

48from lsst.utils.iteration import ensure_iterable 

49 

50from ..core import ( 

51 Config, 

52 DataCoordinate, 

53 DataId, 

54 DatasetAssociation, 

55 DatasetColumnTag, 

56 DatasetId, 

57 DatasetIdFactory, 

58 DatasetIdGenEnum, 

59 DatasetRef, 

60 DatasetType, 

61 Dimension, 

62 DimensionConfig, 

63 DimensionElement, 

64 DimensionGraph, 

65 DimensionRecord, 

66 DimensionUniverse, 

67 NamedKeyMapping, 

68 NameLookupMapping, 

69 Progress, 

70 StorageClassFactory, 

71 Timespan, 

72 ddl, 

73) 

74from ..core.utils import transactional 

75from ..registry import ( 

76 ArgumentError, 

77 CollectionExpressionError, 

78 CollectionSummary, 

79 CollectionType, 

80 CollectionTypeError, 

81 ConflictingDefinitionError, 

82 DataIdValueError, 

83 DatasetTypeError, 

84 DimensionNameError, 

85 InconsistentDataIdError, 

86 NoDefaultCollectionError, 

87 OrphanedRecordError, 

88 Registry, 

89 RegistryConfig, 

90 RegistryConsistencyError, 

91 RegistryDefaults, 

92 queries, 

93) 

94from ..registry.interfaces import ChainedCollectionRecord, RunRecord 

95from ..registry.managers import RegistryManagerInstances, RegistryManagerTypes 

96from ..registry.wildcards import CollectionWildcard, DatasetTypeWildcard 

97 

98if TYPE_CHECKING: 

99 from .._butlerConfig import ButlerConfig 

100 from ..registry._registry import CollectionArgType 

101 from ..registry.interfaces import ( 

102 CollectionRecord, 

103 Database, 

104 DatastoreRegistryBridgeManager, 

105 ObsCoreTableManager, 

106 ) 

107 

108 

109_LOG = logging.getLogger(__name__) 

110 

111 

112class SqlRegistry(Registry): 

113 """Registry implementation based on SQLAlchemy. 

114 

115 Parameters 

116 ---------- 

117 database : `Database` 

118 Database instance to store Registry. 

119 defaults : `RegistryDefaults` 

120 Default collection search path and/or output `~CollectionType.RUN` 

121 collection. 

122 managers : `RegistryManagerInstances` 

123 All the managers required for this registry. 

124 """ 

125 

126 defaultConfigFile: Optional[str] = None 

127 """Path to configuration defaults. Accessed within the ``configs`` resource 

128 or relative to a search path. Can be None if no defaults specified. 

129 """ 

130 

131 @classmethod 

132 def createFromConfig( 

133 cls, 

134 config: Optional[Union[RegistryConfig, str]] = None, 

135 dimensionConfig: Optional[Union[DimensionConfig, str]] = None, 

136 butlerRoot: Optional[ResourcePathExpression] = None, 

137 ) -> Registry: 

138 """Create registry database and return `SqlRegistry` instance. 

139 

140 This method initializes database contents, database must be empty 

141 prior to calling this method. 

142 

143 Parameters 

144 ---------- 

145 config : `RegistryConfig` or `str`, optional 

146 Registry configuration, if missing then default configuration will 

147 be loaded from registry.yaml. 

148 dimensionConfig : `DimensionConfig` or `str`, optional 

149 Dimensions configuration, if missing then default configuration 

150 will be loaded from dimensions.yaml. 

151 butlerRoot : convertible to `lsst.resources.ResourcePath`, optional 

152 Path to the repository root this `SqlRegistry` will manage. 

153 

154 Returns 

155 ------- 

156 registry : `SqlRegistry` 

157 A new `SqlRegistry` instance. 

158 """ 

159 config = cls.forceRegistryConfig(config) 

160 config.replaceRoot(butlerRoot) 

161 

162 if isinstance(dimensionConfig, str): 

163 dimensionConfig = DimensionConfig(dimensionConfig) 

164 elif dimensionConfig is None: 

165 dimensionConfig = DimensionConfig() 

166 elif not isinstance(dimensionConfig, DimensionConfig): 

167 raise TypeError(f"Incompatible Dimension configuration type: {type(dimensionConfig)}") 

168 

169 DatabaseClass = config.getDatabaseClass() 

170 database = DatabaseClass.fromUri( 

171 config.connectionString, origin=config.get("origin", 0), namespace=config.get("namespace") 

172 ) 

173 managerTypes = RegistryManagerTypes.fromConfig(config) 

174 managers = managerTypes.makeRepo(database, dimensionConfig) 

175 return cls(database, RegistryDefaults(), managers) 

176 

177 @classmethod 

178 def fromConfig( 

179 cls, 

180 config: Union[ButlerConfig, RegistryConfig, Config, str], 

181 butlerRoot: Optional[ResourcePathExpression] = None, 

182 writeable: bool = True, 

183 defaults: Optional[RegistryDefaults] = None, 

184 ) -> Registry: 

185 """Create `Registry` subclass instance from `config`. 

186 

187 Registry database must be initialized prior to calling this method. 

188 

189 Parameters 

190 ---------- 

191 config : `ButlerConfig`, `RegistryConfig`, `Config` or `str` 

192 Registry configuration 

193 butlerRoot : `lsst.resources.ResourcePathExpression`, optional 

194 Path to the repository root this `Registry` will manage. 

195 writeable : `bool`, optional 

196 If `True` (default) create a read-write connection to the database. 

197 defaults : `RegistryDefaults`, optional 

198 Default collection search path and/or output `~CollectionType.RUN` 

199 collection. 

200 

201 Returns 

202 ------- 

203 registry : `SqlRegistry` (subclass) 

204 A new `SqlRegistry` subclass instance. 

205 """ 

206 config = cls.forceRegistryConfig(config) 

207 config.replaceRoot(butlerRoot) 

208 DatabaseClass = config.getDatabaseClass() 

209 database = DatabaseClass.fromUri( 

210 config.connectionString, 

211 origin=config.get("origin", 0), 

212 namespace=config.get("namespace"), 

213 writeable=writeable, 

214 ) 

215 managerTypes = RegistryManagerTypes.fromConfig(config) 

216 with database.session(): 

217 managers = managerTypes.loadRepo(database) 

218 if defaults is None: 

219 defaults = RegistryDefaults() 

220 return cls(database, defaults, managers) 

221 

222 def __init__(self, database: Database, defaults: RegistryDefaults, managers: RegistryManagerInstances): 

223 self._db = database 

224 self._managers = managers 

225 self.storageClasses = StorageClassFactory() 

226 # Intentionally invoke property setter to initialize defaults. This 

227 # can only be done after most of the rest of Registry has already been 

228 # initialized, and must be done before the property getter is used. 

229 self.defaults = defaults 

230 # In the future DatasetIdFactory may become configurable and this 

231 # instance will need to be shared with datasets manager. 

232 self.datasetIdFactory = DatasetIdFactory() 

233 

234 def __str__(self) -> str: 

235 return str(self._db) 

236 

237 def __repr__(self) -> str: 

238 return f"SqlRegistry({self._db!r}, {self.dimensions!r})" 

239 

240 def isWriteable(self) -> bool: 

241 # Docstring inherited from lsst.daf.butler.registry.Registry 

242 return self._db.isWriteable() 

243 

244 def copy(self, defaults: Optional[RegistryDefaults] = None) -> Registry: 

245 # Docstring inherited from lsst.daf.butler.registry.Registry 

246 if defaults is None: 

247 # No need to copy, because `RegistryDefaults` is immutable; we 

248 # effectively copy on write. 

249 defaults = self.defaults 

250 return type(self)(self._db, defaults, self._managers) 

251 

252 @property 

253 def dimensions(self) -> DimensionUniverse: 

254 # Docstring inherited from lsst.daf.butler.registry.Registry 

255 return self._managers.dimensions.universe 

256 

257 def refresh(self) -> None: 

258 # Docstring inherited from lsst.daf.butler.registry.Registry 

259 with self._db.transaction(): 

260 self._managers.refresh() 

261 

262 @contextlib.contextmanager 

263 def transaction(self, *, savepoint: bool = False) -> Iterator[None]: 

264 # Docstring inherited from lsst.daf.butler.registry.Registry 

265 try: 

266 with self._db.transaction(savepoint=savepoint): 

267 yield 

268 except BaseException: 

269 # TODO: this clears the caches sometimes when we wouldn't actually 

270 # need to. Can we avoid that? 

271 self._managers.dimensions.clearCaches() 

272 raise 

273 

274 def resetConnectionPool(self) -> None: 

275 """Reset SQLAlchemy connection pool for `SqlRegistry` database. 

276 

277 This operation is useful when using registry with fork-based 

278 multiprocessing. To use registry across fork boundary one has to make 

279 sure that there are no currently active connections (no session or 

280 transaction is in progress) and connection pool is reset using this 

281 method. This method should be called by the child process immediately 

282 after the fork. 

283 """ 

284 self._db._engine.dispose() 

285 

286 def registerOpaqueTable(self, tableName: str, spec: ddl.TableSpec) -> None: 

287 """Add an opaque (to the `Registry`) table for use by a `Datastore` or 

288 other data repository client. 

289 

290 Opaque table records can be added via `insertOpaqueData`, retrieved via 

291 `fetchOpaqueData`, and removed via `deleteOpaqueData`. 

292 

293 Parameters 

294 ---------- 

295 tableName : `str` 

296 Logical name of the opaque table. This may differ from the 

297 actual name used in the database by a prefix and/or suffix. 

298 spec : `ddl.TableSpec` 

299 Specification for the table to be added. 

300 """ 

301 self._managers.opaque.register(tableName, spec) 

302 

303 @transactional 

304 def insertOpaqueData(self, tableName: str, *data: dict) -> None: 

305 """Insert records into an opaque table. 

306 

307 Parameters 

308 ---------- 

309 tableName : `str` 

310 Logical name of the opaque table. Must match the name used in a 

311 previous call to `registerOpaqueTable`. 

312 data 

313 Each additional positional argument is a dictionary that represents 

314 a single row to be added. 

315 """ 

316 self._managers.opaque[tableName].insert(*data) 

317 

318 def fetchOpaqueData(self, tableName: str, **where: Any) -> Iterator[Mapping[str, Any]]: 

319 """Retrieve records from an opaque table. 

320 

321 Parameters 

322 ---------- 

323 tableName : `str` 

324 Logical name of the opaque table. Must match the name used in a 

325 previous call to `registerOpaqueTable`. 

326 where 

327 Additional keyword arguments are interpreted as equality 

328 constraints that restrict the returned rows (combined with AND); 

329 keyword arguments are column names and values are the values they 

330 must have. 

331 

332 Yields 

333 ------ 

334 row : `dict` 

335 A dictionary representing a single result row. 

336 """ 

337 yield from self._managers.opaque[tableName].fetch(**where) 

338 

339 @transactional 

340 def deleteOpaqueData(self, tableName: str, **where: Any) -> None: 

341 """Remove records from an opaque table. 

342 

343 Parameters 

344 ---------- 

345 tableName : `str` 

346 Logical name of the opaque table. Must match the name used in a 

347 previous call to `registerOpaqueTable`. 

348 where 

349 Additional keyword arguments are interpreted as equality 

350 constraints that restrict the deleted rows (combined with AND); 

351 keyword arguments are column names and values are the values they 

352 must have. 

353 """ 

354 self._managers.opaque[tableName].delete(where.keys(), where) 

355 

356 def registerCollection( 

357 self, name: str, type: CollectionType = CollectionType.TAGGED, doc: Optional[str] = None 

358 ) -> bool: 

359 # Docstring inherited from lsst.daf.butler.registry.Registry 

360 _, registered = self._managers.collections.register(name, type, doc=doc) 

361 return registered 

362 

363 def getCollectionType(self, name: str) -> CollectionType: 

364 # Docstring inherited from lsst.daf.butler.registry.Registry 

365 return self._managers.collections.find(name).type 

366 

367 def _get_collection_record(self, name: str) -> CollectionRecord: 

368 # Docstring inherited from lsst.daf.butler.registry.Registry 

369 return self._managers.collections.find(name) 

370 

371 def registerRun(self, name: str, doc: Optional[str] = None) -> bool: 

372 # Docstring inherited from lsst.daf.butler.registry.Registry 

373 _, registered = self._managers.collections.register(name, CollectionType.RUN, doc=doc) 

374 return registered 

375 

376 @transactional 

377 def removeCollection(self, name: str) -> None: 

378 # Docstring inherited from lsst.daf.butler.registry.Registry 

379 self._managers.collections.remove(name) 

380 

381 def getCollectionChain(self, parent: str) -> tuple[str, ...]: 

382 # Docstring inherited from lsst.daf.butler.registry.Registry 

383 record = self._managers.collections.find(parent) 

384 if record.type is not CollectionType.CHAINED: 

385 raise CollectionTypeError(f"Collection '{parent}' has type {record.type.name}, not CHAINED.") 

386 assert isinstance(record, ChainedCollectionRecord) 

387 return record.children 

388 

389 @transactional 

390 def setCollectionChain(self, parent: str, children: Any, *, flatten: bool = False) -> None: 

391 # Docstring inherited from lsst.daf.butler.registry.Registry 

392 record = self._managers.collections.find(parent) 

393 if record.type is not CollectionType.CHAINED: 

394 raise CollectionTypeError(f"Collection '{parent}' has type {record.type.name}, not CHAINED.") 

395 assert isinstance(record, ChainedCollectionRecord) 

396 children = CollectionWildcard.from_expression(children).require_ordered() 

397 if children != record.children or flatten: 

398 record.update(self._managers.collections, children, flatten=flatten) 

399 

400 def getCollectionParentChains(self, collection: str) -> Set[str]: 

401 # Docstring inherited from lsst.daf.butler.registry.Registry 

402 return { 

403 record.name 

404 for record in self._managers.collections.getParentChains( 

405 self._managers.collections.find(collection).key 

406 ) 

407 } 

408 

409 def getCollectionDocumentation(self, collection: str) -> Optional[str]: 

410 # Docstring inherited from lsst.daf.butler.registry.Registry 

411 return self._managers.collections.getDocumentation(self._managers.collections.find(collection).key) 

412 

413 def setCollectionDocumentation(self, collection: str, doc: Optional[str]) -> None: 

414 # Docstring inherited from lsst.daf.butler.registry.Registry 

415 self._managers.collections.setDocumentation(self._managers.collections.find(collection).key, doc) 

416 

417 def getCollectionSummary(self, collection: str) -> CollectionSummary: 

418 # Docstring inherited from lsst.daf.butler.registry.Registry 

419 record = self._managers.collections.find(collection) 

420 return self._managers.datasets.getCollectionSummary(record) 

421 

422 def registerDatasetType(self, datasetType: DatasetType) -> bool: 

423 # Docstring inherited from lsst.daf.butler.registry.Registry 

424 _, inserted = self._managers.datasets.register(datasetType) 

425 return inserted 

426 

427 def removeDatasetType(self, name: str | tuple[str, ...]) -> None: 

428 # Docstring inherited from lsst.daf.butler.registry.Registry 

429 

430 for datasetTypeExpression in ensure_iterable(name): 

431 datasetTypes = list(self.queryDatasetTypes(datasetTypeExpression)) 

432 if not datasetTypes: 

433 _LOG.info("Dataset type %r not defined", datasetTypeExpression) 

434 else: 

435 for datasetType in datasetTypes: 

436 self._managers.datasets.remove(datasetType.name) 

437 _LOG.info("Removed dataset type %r", datasetType.name) 

438 

439 def getDatasetType(self, name: str) -> DatasetType: 

440 # Docstring inherited from lsst.daf.butler.registry.Registry 

441 parent_name, component = DatasetType.splitDatasetTypeName(name) 

442 storage = self._managers.datasets[parent_name] 

443 if component is None: 

444 return storage.datasetType 

445 else: 

446 return storage.datasetType.makeComponentDatasetType(component) 

447 

448 def supportsIdGenerationMode(self, mode: DatasetIdGenEnum) -> bool: 

449 # Docstring inherited from lsst.daf.butler.registry.Registry 

450 return self._managers.datasets.supportsIdGenerationMode(mode) 

451 

452 def findDataset( 

453 self, 

454 datasetType: Union[DatasetType, str], 

455 dataId: Optional[DataId] = None, 

456 *, 

457 collections: CollectionArgType | None = None, 

458 timespan: Optional[Timespan] = None, 

459 **kwargs: Any, 

460 ) -> Optional[DatasetRef]: 

461 # Docstring inherited from lsst.daf.butler.registry.Registry 

462 if collections is None: 

463 if not self.defaults.collections: 

464 raise NoDefaultCollectionError( 

465 "No collections provided to findDataset, and no defaults from registry construction." 

466 ) 

467 collections = self.defaults.collections 

468 backend = queries.SqlQueryBackend(self._db, self._managers) 

469 collection_wildcard = CollectionWildcard.from_expression(collections, require_ordered=True) 

470 if collection_wildcard.empty(): 

471 return None 

472 matched_collections = backend.resolve_collection_wildcard(collection_wildcard) 

473 parent_dataset_type, components = backend.resolve_single_dataset_type_wildcard( 

474 datasetType, components_deprecated=False 

475 ) 

476 if len(components) > 1: 

477 raise DatasetTypeError( 

478 f"findDataset requires exactly one dataset type; got multiple components {components} " 

479 f"for parent dataset type {parent_dataset_type.name}." 

480 ) 

481 component = components[0] 

482 dataId = DataCoordinate.standardize( 

483 dataId, 

484 graph=parent_dataset_type.dimensions, 

485 universe=self.dimensions, 

486 defaults=self.defaults.dataId, 

487 **kwargs, 

488 ) 

489 governor_constraints = {name: {cast(str, dataId[name])} for name in dataId.graph.governors.names} 

490 (filtered_collections,) = backend.filter_dataset_collections( 

491 [parent_dataset_type], 

492 matched_collections, 

493 governor_constraints=governor_constraints, 

494 ).values() 

495 if not filtered_collections: 

496 return None 

497 if timespan is None: 

498 filtered_collections = [ 

499 collection_record 

500 for collection_record in filtered_collections 

501 if collection_record.type is not CollectionType.CALIBRATION 

502 ] 

503 if filtered_collections: 

504 requested_columns = {"dataset_id", "run", "collection"} 

505 with backend.context() as context: 

506 predicate = context.make_data_coordinate_predicate( 

507 dataId.subset(parent_dataset_type.dimensions), full=False 

508 ) 

509 if timespan is not None: 

510 requested_columns.add("timespan") 

511 predicate = predicate.logical_and( 

512 context.make_timespan_overlap_predicate( 

513 DatasetColumnTag(parent_dataset_type.name, "timespan"), timespan 

514 ) 

515 ) 

516 relation = backend.make_dataset_query_relation( 

517 parent_dataset_type, filtered_collections, requested_columns, context 

518 ).with_rows_satisfying(predicate) 

519 rows = list(context.fetch_iterable(relation)) 

520 else: 

521 rows = [] 

522 if not rows: 

523 return None 

524 elif len(rows) == 1: 

525 best_row = rows[0] 

526 else: 

527 rank_by_collection_key = {record.key: n for n, record in enumerate(filtered_collections)} 

528 collection_tag = DatasetColumnTag(parent_dataset_type.name, "collection") 

529 row_iter = iter(rows) 

530 best_row = next(row_iter) 

531 best_rank = rank_by_collection_key[best_row[collection_tag]] 

532 have_tie = False 

533 for row in row_iter: 

534 if (rank := rank_by_collection_key[row[collection_tag]]) < best_rank: 

535 best_row = row 

536 best_rank = rank 

537 have_tie = False 

538 elif rank == best_rank: 

539 have_tie = True 

540 assert timespan is not None, "Rank ties should be impossible given DB constraints." 

541 if have_tie: 

542 raise LookupError( 

543 f"Ambiguous calibration lookup for {parent_dataset_type.name} in collections " 

544 f"{collection_wildcard.strings} with timespan {timespan}." 

545 ) 

546 reader = queries.DatasetRefReader( 

547 parent_dataset_type, 

548 translate_collection=lambda k: self._managers.collections[k].name, 

549 ) 

550 ref = reader.read(best_row, data_id=dataId) 

551 if component is not None: 

552 ref = ref.makeComponentRef(component) 

553 return ref 

554 

555 @transactional 

556 def insertDatasets( 

557 self, 

558 datasetType: Union[DatasetType, str], 

559 dataIds: Iterable[DataId], 

560 run: Optional[str] = None, 

561 expand: bool = True, 

562 idGenerationMode: DatasetIdGenEnum = DatasetIdGenEnum.UNIQUE, 

563 ) -> List[DatasetRef]: 

564 # Docstring inherited from lsst.daf.butler.registry.Registry 

565 if isinstance(datasetType, DatasetType): 

566 storage = self._managers.datasets.find(datasetType.name) 

567 if storage is None: 

568 raise DatasetTypeError(f"DatasetType '{datasetType}' has not been registered.") 

569 else: 

570 storage = self._managers.datasets.find(datasetType) 

571 if storage is None: 

572 raise DatasetTypeError(f"DatasetType with name '{datasetType}' has not been registered.") 

573 if run is None: 

574 if self.defaults.run is None: 

575 raise NoDefaultCollectionError( 

576 "No run provided to insertDatasets, and no default from registry construction." 

577 ) 

578 run = self.defaults.run 

579 runRecord = self._managers.collections.find(run) 

580 if runRecord.type is not CollectionType.RUN: 

581 raise CollectionTypeError( 

582 f"Given collection is of type {runRecord.type.name}; RUN collection required." 

583 ) 

584 assert isinstance(runRecord, RunRecord) 

585 progress = Progress("daf.butler.Registry.insertDatasets", level=logging.DEBUG) 

586 if expand: 

587 expandedDataIds = [ 

588 self.expandDataId(dataId, graph=storage.datasetType.dimensions) 

589 for dataId in progress.wrap(dataIds, f"Expanding {storage.datasetType.name} data IDs") 

590 ] 

591 else: 

592 expandedDataIds = [ 

593 DataCoordinate.standardize(dataId, graph=storage.datasetType.dimensions) for dataId in dataIds 

594 ] 

595 try: 

596 refs = list(storage.insert(runRecord, expandedDataIds, idGenerationMode)) 

597 if self._managers.obscore: 

598 context = queries.SqlQueryContext(self._db, self._managers.column_types) 

599 self._managers.obscore.add_datasets(refs, context) 

600 except sqlalchemy.exc.IntegrityError as err: 

601 raise ConflictingDefinitionError( 

602 "A database constraint failure was triggered by inserting " 

603 f"one or more datasets of type {storage.datasetType} into " 

604 f"collection '{run}'. " 

605 "This probably means a dataset with the same data ID " 

606 "and dataset type already exists, but it may also mean a " 

607 "dimension row is missing." 

608 ) from err 

609 return refs 

610 

611 @transactional 

612 def _importDatasets( 

613 self, 

614 datasets: Iterable[DatasetRef], 

615 expand: bool = True, 

616 ) -> List[DatasetRef]: 

617 # Docstring inherited from lsst.daf.butler.registry.Registry 

618 datasets = list(datasets) 

619 if not datasets: 

620 # nothing to do 

621 return [] 

622 

623 # find dataset type 

624 datasetTypes = set(dataset.datasetType for dataset in datasets) 

625 if len(datasetTypes) != 1: 

626 raise DatasetTypeError(f"Multiple dataset types in input datasets: {datasetTypes}") 

627 datasetType = datasetTypes.pop() 

628 

629 # get storage handler for this dataset type 

630 storage = self._managers.datasets.find(datasetType.name) 

631 if storage is None: 

632 raise DatasetTypeError(f"DatasetType '{datasetType}' has not been registered.") 

633 

634 # find run name 

635 runs = set(dataset.run for dataset in datasets) 

636 if len(runs) != 1: 

637 raise ValueError(f"Multiple run names in input datasets: {runs}") 

638 run = runs.pop() 

639 

640 runRecord = self._managers.collections.find(run) 

641 if runRecord.type is not CollectionType.RUN: 

642 raise CollectionTypeError( 

643 f"Given collection '{runRecord.name}' is of type {runRecord.type.name};" 

644 " RUN collection required." 

645 ) 

646 assert isinstance(runRecord, RunRecord) 

647 

648 progress = Progress("daf.butler.Registry.insertDatasets", level=logging.DEBUG) 

649 if expand: 

650 expandedDatasets = [ 

651 dataset.expanded(self.expandDataId(dataset.dataId, graph=storage.datasetType.dimensions)) 

652 for dataset in progress.wrap(datasets, f"Expanding {storage.datasetType.name} data IDs") 

653 ] 

654 else: 

655 expandedDatasets = [ 

656 DatasetRef(datasetType, dataset.dataId, id=dataset.id, run=dataset.run, conform=True) 

657 for dataset in datasets 

658 ] 

659 

660 try: 

661 refs = list(storage.import_(runRecord, expandedDatasets)) 

662 if self._managers.obscore: 

663 context = queries.SqlQueryContext(self._db, self._managers.column_types) 

664 self._managers.obscore.add_datasets(refs, context) 

665 except sqlalchemy.exc.IntegrityError as err: 

666 raise ConflictingDefinitionError( 

667 "A database constraint failure was triggered by inserting " 

668 f"one or more datasets of type {storage.datasetType} into " 

669 f"collection '{run}'. " 

670 "This probably means a dataset with the same data ID " 

671 "and dataset type already exists, but it may also mean a " 

672 "dimension row is missing." 

673 ) from err 

674 # Check that imported dataset IDs match the input 

675 for imported_ref, input_ref in zip(refs, datasets): 

676 if imported_ref.id != input_ref.id: 

677 raise RegistryConsistencyError( 

678 "Imported dataset ID differs from input dataset ID, " 

679 f"input ref: {input_ref}, imported ref: {imported_ref}" 

680 ) 

681 return refs 

682 

683 def getDataset(self, id: DatasetId) -> Optional[DatasetRef]: 

684 # Docstring inherited from lsst.daf.butler.registry.Registry 

685 return self._managers.datasets.getDatasetRef(id) 

686 

687 @transactional 

688 def removeDatasets(self, refs: Iterable[DatasetRef]) -> None: 

689 # Docstring inherited from lsst.daf.butler.registry.Registry 

690 progress = Progress("lsst.daf.butler.Registry.removeDatasets", level=logging.DEBUG) 

691 for datasetType, refsForType in progress.iter_item_chunks( 

692 DatasetRef.groupByType(refs).items(), desc="Removing datasets by type" 

693 ): 

694 storage = self._managers.datasets[datasetType.name] 

695 try: 

696 storage.delete(refsForType) 

697 except sqlalchemy.exc.IntegrityError as err: 

698 raise OrphanedRecordError( 

699 "One or more datasets is still present in one or more Datastores." 

700 ) from err 

701 

702 @transactional 

703 def associate(self, collection: str, refs: Iterable[DatasetRef]) -> None: 

704 # Docstring inherited from lsst.daf.butler.registry.Registry 

705 progress = Progress("lsst.daf.butler.Registry.associate", level=logging.DEBUG) 

706 collectionRecord = self._managers.collections.find(collection) 

707 if collectionRecord.type is not CollectionType.TAGGED: 

708 raise CollectionTypeError( 

709 f"Collection '{collection}' has type {collectionRecord.type.name}, not TAGGED." 

710 ) 

711 for datasetType, refsForType in progress.iter_item_chunks( 

712 DatasetRef.groupByType(refs).items(), desc="Associating datasets by type" 

713 ): 

714 storage = self._managers.datasets[datasetType.name] 

715 try: 

716 storage.associate(collectionRecord, refsForType) 

717 if self._managers.obscore: 

718 # If a TAGGED collection is being monitored by ObsCore 

719 # manager then we may need to save the dataset. 

720 context = queries.SqlQueryContext(self._db, self._managers.column_types) 

721 self._managers.obscore.associate(refsForType, collectionRecord, context) 

722 except sqlalchemy.exc.IntegrityError as err: 

723 raise ConflictingDefinitionError( 

724 f"Constraint violation while associating dataset of type {datasetType.name} with " 

725 f"collection {collection}. This probably means that one or more datasets with the same " 

726 "dataset type and data ID already exist in the collection, but it may also indicate " 

727 "that the datasets do not exist." 

728 ) from err 

729 

730 @transactional 

731 def disassociate(self, collection: str, refs: Iterable[DatasetRef]) -> None: 

732 # Docstring inherited from lsst.daf.butler.registry.Registry 

733 progress = Progress("lsst.daf.butler.Registry.disassociate", level=logging.DEBUG) 

734 collectionRecord = self._managers.collections.find(collection) 

735 if collectionRecord.type is not CollectionType.TAGGED: 

736 raise CollectionTypeError( 

737 f"Collection '{collection}' has type {collectionRecord.type.name}; expected TAGGED." 

738 ) 

739 for datasetType, refsForType in progress.iter_item_chunks( 

740 DatasetRef.groupByType(refs).items(), desc="Disassociating datasets by type" 

741 ): 

742 storage = self._managers.datasets[datasetType.name] 

743 storage.disassociate(collectionRecord, refsForType) 

744 if self._managers.obscore: 

745 self._managers.obscore.disassociate(refsForType, collectionRecord) 

746 

747 @transactional 

748 def certify(self, collection: str, refs: Iterable[DatasetRef], timespan: Timespan) -> None: 

749 # Docstring inherited from lsst.daf.butler.registry.Registry 

750 progress = Progress("lsst.daf.butler.Registry.certify", level=logging.DEBUG) 

751 collectionRecord = self._managers.collections.find(collection) 

752 for datasetType, refsForType in progress.iter_item_chunks( 

753 DatasetRef.groupByType(refs).items(), desc="Certifying datasets by type" 

754 ): 

755 storage = self._managers.datasets[datasetType.name] 

756 storage.certify( 

757 collectionRecord, 

758 refsForType, 

759 timespan, 

760 context=queries.SqlQueryContext(self._db, self._managers.column_types), 

761 ) 

762 

763 @transactional 

764 def decertify( 

765 self, 

766 collection: str, 

767 datasetType: Union[str, DatasetType], 

768 timespan: Timespan, 

769 *, 

770 dataIds: Optional[Iterable[DataId]] = None, 

771 ) -> None: 

772 # Docstring inherited from lsst.daf.butler.registry.Registry 

773 collectionRecord = self._managers.collections.find(collection) 

774 if isinstance(datasetType, str): 

775 storage = self._managers.datasets[datasetType] 

776 else: 

777 storage = self._managers.datasets[datasetType.name] 

778 standardizedDataIds = None 

779 if dataIds is not None: 

780 standardizedDataIds = [ 

781 DataCoordinate.standardize(d, graph=storage.datasetType.dimensions) for d in dataIds 

782 ] 

783 storage.decertify( 

784 collectionRecord, 

785 timespan, 

786 dataIds=standardizedDataIds, 

787 context=queries.SqlQueryContext(self._db, self._managers.column_types), 

788 ) 

789 

790 def getDatastoreBridgeManager(self) -> DatastoreRegistryBridgeManager: 

791 """Return an object that allows a new `Datastore` instance to 

792 communicate with this `Registry`. 

793 

794 Returns 

795 ------- 

796 manager : `DatastoreRegistryBridgeManager` 

797 Object that mediates communication between this `Registry` and its 

798 associated datastores. 

799 """ 

800 return self._managers.datastores 

801 

802 def getDatasetLocations(self, ref: DatasetRef) -> Iterable[str]: 

803 # Docstring inherited from lsst.daf.butler.registry.Registry 

804 return self._managers.datastores.findDatastores(ref) 

805 

806 def expandDataId( 

807 self, 

808 dataId: Optional[DataId] = None, 

809 *, 

810 graph: Optional[DimensionGraph] = None, 

811 records: Optional[NameLookupMapping[DimensionElement, Optional[DimensionRecord]]] = None, 

812 withDefaults: bool = True, 

813 **kwargs: Any, 

814 ) -> DataCoordinate: 

815 # Docstring inherited from lsst.daf.butler.registry.Registry 

816 if not withDefaults: 

817 defaults = None 

818 else: 

819 defaults = self.defaults.dataId 

820 try: 

821 standardized = DataCoordinate.standardize( 

822 dataId, graph=graph, universe=self.dimensions, defaults=defaults, **kwargs 

823 ) 

824 except KeyError as exc: 

825 # This means either kwargs have some odd name or required 

826 # dimension is missing. 

827 raise DimensionNameError(str(exc)) from exc 

828 if standardized.hasRecords(): 

829 return standardized 

830 if records is None: 

831 records = {} 

832 elif isinstance(records, NamedKeyMapping): 

833 records = records.byName() 

834 else: 

835 records = dict(records) 

836 if isinstance(dataId, DataCoordinate) and dataId.hasRecords(): 

837 records.update(dataId.records.byName()) 

838 keys = standardized.byName() 

839 context = queries.SqlQueryContext(self._db, self._managers.column_types) 

840 for element in standardized.graph.primaryKeyTraversalOrder: 

841 record = records.get(element.name, ...) # Use ... to mean not found; None might mean NULL 

842 if record is ...: 

843 if isinstance(element, Dimension) and keys.get(element.name) is None: 

844 if element in standardized.graph.required: 

845 raise DimensionNameError( 

846 f"No value or null value for required dimension {element.name}." 

847 ) 

848 keys[element.name] = None 

849 record = None 

850 else: 

851 storage = self._managers.dimensions[element] 

852 record = storage.fetch_one(DataCoordinate.standardize(keys, graph=element.graph), context) 

853 records[element.name] = record 

854 if record is not None: 

855 for d in element.implied: 

856 value = getattr(record, d.name) 

857 if keys.setdefault(d.name, value) != value: 

858 raise InconsistentDataIdError( 

859 f"Data ID {standardized} has {d.name}={keys[d.name]!r}, " 

860 f"but {element.name} implies {d.name}={value!r}." 

861 ) 

862 else: 

863 if element in standardized.graph.required: 

864 raise DataIdValueError( 

865 f"Could not fetch record for required dimension {element.name} via keys {keys}." 

866 ) 

867 if element.alwaysJoin: 

868 raise InconsistentDataIdError( 

869 f"Could not fetch record for element {element.name} via keys {keys}, ", 

870 "but it is marked alwaysJoin=True; this means one or more dimensions are not " 

871 "related.", 

872 ) 

873 for d in element.implied: 

874 keys.setdefault(d.name, None) 

875 records.setdefault(d.name, None) 

876 return DataCoordinate.standardize(keys, graph=standardized.graph).expanded(records=records) 

877 

878 def insertDimensionData( 

879 self, 

880 element: Union[DimensionElement, str], 

881 *data: Union[Mapping[str, Any], DimensionRecord], 

882 conform: bool = True, 

883 replace: bool = False, 

884 skip_existing: bool = False, 

885 ) -> None: 

886 # Docstring inherited from lsst.daf.butler.registry.Registry 

887 if conform: 

888 if isinstance(element, str): 

889 element = self.dimensions[element] 

890 records = [ 

891 row if isinstance(row, DimensionRecord) else element.RecordClass(**row) for row in data 

892 ] 

893 else: 

894 # Ignore typing since caller said to trust them with conform=False. 

895 records = data # type: ignore 

896 storage = self._managers.dimensions[element] 

897 storage.insert(*records, replace=replace, skip_existing=skip_existing) 

898 

899 def syncDimensionData( 

900 self, 

901 element: Union[DimensionElement, str], 

902 row: Union[Mapping[str, Any], DimensionRecord], 

903 conform: bool = True, 

904 update: bool = False, 

905 ) -> Union[bool, Dict[str, Any]]: 

906 # Docstring inherited from lsst.daf.butler.registry.Registry 

907 if conform: 

908 if isinstance(element, str): 

909 element = self.dimensions[element] 

910 record = row if isinstance(row, DimensionRecord) else element.RecordClass(**row) 

911 else: 

912 # Ignore typing since caller said to trust them with conform=False. 

913 record = row # type: ignore 

914 storage = self._managers.dimensions[element] 

915 return storage.sync(record, update=update) 

916 

917 def queryDatasetTypes( 

918 self, 

919 expression: Any = ..., 

920 *, 

921 components: Optional[bool] = None, 

922 missing: Optional[List[str]] = None, 

923 ) -> Iterable[DatasetType]: 

924 # Docstring inherited from lsst.daf.butler.registry.Registry 

925 wildcard = DatasetTypeWildcard.from_expression(expression) 

926 composition_dict = self._managers.datasets.resolve_wildcard( 

927 wildcard, 

928 components=components, 

929 missing=missing, 

930 ) 

931 result: list[DatasetType] = [] 

932 for parent_dataset_type, components_for_parent in composition_dict.items(): 

933 result.extend( 

934 parent_dataset_type.makeComponentDatasetType(c) if c is not None else parent_dataset_type 

935 for c in components_for_parent 

936 ) 

937 return result 

938 

939 def queryCollections( 

940 self, 

941 expression: Any = ..., 

942 datasetType: Optional[DatasetType] = None, 

943 collectionTypes: Union[Iterable[CollectionType], CollectionType] = CollectionType.all(), 

944 flattenChains: bool = False, 

945 includeChains: Optional[bool] = None, 

946 ) -> Sequence[str]: 

947 # Docstring inherited from lsst.daf.butler.registry.Registry 

948 

949 # Right now the datasetTypes argument is completely ignored, but that 

950 # is consistent with its [lack of] guarantees. DM-24939 or a follow-up 

951 # ticket will take care of that. 

952 try: 

953 wildcard = CollectionWildcard.from_expression(expression) 

954 except TypeError as exc: 

955 raise CollectionExpressionError(f"Invalid collection expression '{expression}'") from exc 

956 collectionTypes = ensure_iterable(collectionTypes) 

957 return [ 

958 record.name 

959 for record in self._managers.collections.resolve_wildcard( 

960 wildcard, 

961 collection_types=frozenset(collectionTypes), 

962 flatten_chains=flattenChains, 

963 include_chains=includeChains, 

964 ) 

965 ] 

966 

967 def _makeQueryBuilder( 

968 self, 

969 summary: queries.QuerySummary, 

970 doomed_by: Iterable[str] = (), 

971 ) -> queries.QueryBuilder: 

972 """Return a `QueryBuilder` instance capable of constructing and 

973 managing more complex queries than those obtainable via `Registry` 

974 interfaces. 

975 

976 This is an advanced interface; downstream code should prefer 

977 `Registry.queryDataIds` and `Registry.queryDatasets` whenever those 

978 are sufficient. 

979 

980 Parameters 

981 ---------- 

982 summary : `queries.QuerySummary` 

983 Object describing and categorizing the full set of dimensions that 

984 will be included in the query. 

985 doomed_by : `Iterable` of `str`, optional 

986 A list of diagnostic messages that indicate why the query is going 

987 to yield no results and should not even be executed. If an empty 

988 container (default) the query will be executed unless other code 

989 determines that it is doomed. 

990 

991 Returns 

992 ------- 

993 builder : `queries.QueryBuilder` 

994 Object that can be used to construct and perform advanced queries. 

995 """ 

996 doomed_by = list(doomed_by) 

997 backend = queries.SqlQueryBackend(self._db, self._managers) 

998 context = backend.context() 

999 relation: Relation | None = None 

1000 if doomed_by: 

1001 relation = LeafRelation.make_doomed(context.sql_engine, set(), doomed_by) 

1002 return queries.QueryBuilder( 

1003 summary, 

1004 backend=backend, 

1005 context=context, 

1006 relation=relation, 

1007 ) 

1008 

1009 def _standardize_query_data_id_args( 

1010 self, data_id: DataId | None, *, doomed_by: list[str], **kwargs: Any 

1011 ) -> DataCoordinate: 

1012 """Preprocess the data ID arguments passed to query* methods. 

1013 

1014 Parameters 

1015 ---------- 

1016 data_id : `DataId` or `None` 

1017 Data ID that constrains the query results. 

1018 doomed_by : `list` [ `str` ] 

1019 List to append messages indicating why the query is doomed to 

1020 yield no results. 

1021 **kwargs 

1022 Additional data ID key-value pairs, extending and overriding 

1023 ``data_id``. 

1024 

1025 Returns 

1026 ------- 

1027 data_id : `DataCoordinate` 

1028 Standardized data ID. Will be fully expanded unless expansion 

1029 fails, in which case a message will be appended to ``doomed_by`` 

1030 on return. 

1031 """ 

1032 try: 

1033 return self.expandDataId(data_id, **kwargs) 

1034 except DataIdValueError as err: 

1035 doomed_by.append(str(err)) 

1036 return DataCoordinate.standardize( 

1037 data_id, **kwargs, universe=self.dimensions, defaults=self.defaults.dataId 

1038 ) 

1039 

1040 def _standardize_query_dataset_args( 

1041 self, 

1042 datasets: Any, 

1043 collections: CollectionArgType | None, 

1044 components: bool | None, 

1045 mode: Literal["find_first"] | Literal["find_all"] | Literal["constrain"] = "constrain", 

1046 *, 

1047 doomed_by: list[str], 

1048 ) -> tuple[dict[DatasetType, list[str | None]], CollectionWildcard | None]: 

1049 """Preprocess dataset arguments passed to query* methods. 

1050 

1051 Parameters 

1052 ---------- 

1053 datasets : `DatasetType`, `str`, `re.Pattern`, or iterable of these 

1054 Expression identifying dataset types. See `queryDatasetTypes` for 

1055 details. 

1056 collections : `str`, `re.Pattern`, or iterable of these 

1057 Expression identifying collections to be searched. See 

1058 `queryCollections` for details. 

1059 components : `bool`, optional 

1060 If `True`, apply all expression patterns to component dataset type 

1061 names as well. If `False`, never apply patterns to components. 

1062 If `None` (default), apply patterns to components only if their 

1063 parent datasets were not matched by the expression. 

1064 Fully-specified component datasets (`str` or `DatasetType` 

1065 instances) are always included. 

1066 

1067 Values other than `False` are deprecated, and only `False` will be 

1068 supported after v26. After v27 this argument will be removed 

1069 entirely. 

1070 mode : `str`, optional 

1071 The way in which datasets are being used in this query; one of: 

1072 

1073 - "find_first": this is a query for the first dataset in an 

1074 ordered list of collections. Prohibits collection wildcards, 

1075 but permits dataset type wildcards. 

1076 

1077 - "find_all": this is a query for all datasets in all matched 

1078 collections. Permits collection and dataset type wildcards. 

1079 

1080 - "constrain": this is a query for something other than datasets, 

1081 with results constrained by dataset existence. Permits 

1082 collection wildcards and prohibits ``...`` as a dataset type 

1083 wildcard. 

1084 doomed_by : `list` [ `str` ] 

1085 List to append messages indicating why the query is doomed to 

1086 yield no results. 

1087 

1088 Returns 

1089 ------- 

1090 composition : `defaultdict` [ `DatasetType`, `list` [ `str` ] ] 

1091 Dictionary mapping parent dataset type to `list` of components 

1092 matched for that dataset type (or `None` for the parent itself). 

1093 collections : `CollectionWildcard` 

1094 Processed collection expression. 

1095 """ 

1096 composition: dict[DatasetType, list[str | None]] = {} 

1097 collection_wildcard: CollectionWildcard | None = None 

1098 if datasets is not None: 

1099 if collections is None: 

1100 if not self.defaults.collections: 

1101 raise NoDefaultCollectionError("No collections, and no registry default collections.") 

1102 collection_wildcard = CollectionWildcard.from_expression(self.defaults.collections) 

1103 else: 

1104 collection_wildcard = CollectionWildcard.from_expression(collections) 

1105 if mode == "find_first" and collection_wildcard.patterns: 

1106 raise TypeError( 

1107 f"Collection pattern(s) {collection_wildcard.patterns} not allowed in this context." 

1108 ) 

1109 missing: list[str] = [] 

1110 composition = self._managers.datasets.resolve_wildcard( 

1111 datasets, components=components, missing=missing, explicit_only=(mode == "constrain") 

1112 ) 

1113 if missing and mode == "constrain": 

1114 # After v26 this should raise MissingDatasetTypeError, to be 

1115 # implemented on DM-36303. 

1116 warnings.warn( 

1117 f"Dataset type(s) {missing} are not registered; this will be an error after v26.", 

1118 FutureWarning, 

1119 ) 

1120 doomed_by.extend(f"Dataset type {name} is not registered." for name in missing) 

1121 elif collections: 

1122 # I think this check should actually be `collections is not None`, 

1123 # but it looks like some CLI scripts use empty tuple as default. 

1124 raise ArgumentError(f"Cannot pass 'collections' (='{collections}') without 'datasets'.") 

1125 return composition, collection_wildcard 

1126 

1127 def queryDatasets( 

1128 self, 

1129 datasetType: Any, 

1130 *, 

1131 collections: CollectionArgType | None = None, 

1132 dimensions: Optional[Iterable[Union[Dimension, str]]] = None, 

1133 dataId: Optional[DataId] = None, 

1134 where: str = "", 

1135 findFirst: bool = False, 

1136 components: Optional[bool] = None, 

1137 bind: Optional[Mapping[str, Any]] = None, 

1138 check: bool = True, 

1139 **kwargs: Any, 

1140 ) -> queries.DatasetQueryResults: 

1141 # Docstring inherited from lsst.daf.butler.registry.Registry 

1142 doomed_by: list[str] = [] 

1143 data_id = self._standardize_query_data_id_args(dataId, doomed_by=doomed_by, **kwargs) 

1144 dataset_composition, collection_wildcard = self._standardize_query_dataset_args( 

1145 datasetType, 

1146 collections, 

1147 components, 

1148 mode="find_first" if findFirst else "find_all", 

1149 doomed_by=doomed_by, 

1150 ) 

1151 if collection_wildcard is not None and collection_wildcard.empty(): 

1152 doomed_by.append("No datasets can be found because collection list is empty.") 

1153 return queries.ChainedDatasetQueryResults([], doomed_by=doomed_by) 

1154 parent_results: list[queries.ParentDatasetQueryResults] = [] 

1155 for parent_dataset_type, components_for_parent in dataset_composition.items(): 

1156 # The full set of dimensions in the query is the combination of 

1157 # those needed for the DatasetType and those explicitly requested, 

1158 # if any. 

1159 dimension_names = set(parent_dataset_type.dimensions.names) 

1160 if dimensions is not None: 

1161 dimension_names.update(self.dimensions.extract(dimensions).names) 

1162 # Construct the summary structure needed to construct a 

1163 # QueryBuilder. 

1164 summary = queries.QuerySummary( 

1165 requested=DimensionGraph(self.dimensions, names=dimension_names), 

1166 column_types=self._managers.column_types, 

1167 data_id=data_id, 

1168 expression=where, 

1169 bind=bind, 

1170 defaults=self.defaults.dataId, 

1171 check=check, 

1172 datasets=[parent_dataset_type], 

1173 ) 

1174 builder = self._makeQueryBuilder(summary) 

1175 # Add the dataset subquery to the query, telling the QueryBuilder 

1176 # to include the rank of the selected collection in the results 

1177 # only if we need to findFirst. Note that if any of the 

1178 # collections are actually wildcard expressions, and 

1179 # findFirst=True, this will raise TypeError for us. 

1180 builder.joinDataset(parent_dataset_type, collection_wildcard, isResult=True, findFirst=findFirst) 

1181 query = builder.finish() 

1182 parent_results.append( 

1183 queries.ParentDatasetQueryResults( 

1184 query, parent_dataset_type, components=components_for_parent 

1185 ) 

1186 ) 

1187 if not parent_results: 

1188 doomed_by.extend( 

1189 f"No registered dataset type matching {t!r} found, so no matching datasets can " 

1190 "exist in any collection." 

1191 for t in ensure_iterable(datasetType) 

1192 ) 

1193 return queries.ChainedDatasetQueryResults([], doomed_by=doomed_by) 

1194 elif len(parent_results) == 1: 

1195 return parent_results[0] 

1196 else: 

1197 return queries.ChainedDatasetQueryResults(parent_results) 

1198 

1199 def queryDataIds( 

1200 self, 

1201 dimensions: Union[Iterable[Union[Dimension, str]], Dimension, str], 

1202 *, 

1203 dataId: Optional[DataId] = None, 

1204 datasets: Any = None, 

1205 collections: CollectionArgType | None = None, 

1206 where: str = "", 

1207 components: Optional[bool] = None, 

1208 bind: Optional[Mapping[str, Any]] = None, 

1209 check: bool = True, 

1210 **kwargs: Any, 

1211 ) -> queries.DataCoordinateQueryResults: 

1212 # Docstring inherited from lsst.daf.butler.registry.Registry 

1213 dimensions = ensure_iterable(dimensions) 

1214 requestedDimensions = self.dimensions.extract(dimensions) 

1215 doomed_by: list[str] = [] 

1216 data_id = self._standardize_query_data_id_args(dataId, doomed_by=doomed_by, **kwargs) 

1217 dataset_composition, collection_wildcard = self._standardize_query_dataset_args( 

1218 datasets, collections, components, doomed_by=doomed_by 

1219 ) 

1220 if collection_wildcard is not None and collection_wildcard.empty(): 

1221 doomed_by.append("No data coordinates can be found because collection list is empty.") 

1222 summary = queries.QuerySummary( 

1223 requested=requestedDimensions, 

1224 column_types=self._managers.column_types, 

1225 data_id=data_id, 

1226 expression=where, 

1227 bind=bind, 

1228 defaults=self.defaults.dataId, 

1229 check=check, 

1230 datasets=dataset_composition.keys(), 

1231 ) 

1232 builder = self._makeQueryBuilder(summary, doomed_by=doomed_by) 

1233 for datasetType in dataset_composition.keys(): 

1234 builder.joinDataset(datasetType, collection_wildcard, isResult=False) 

1235 query = builder.finish() 

1236 

1237 return queries.DataCoordinateQueryResults(query) 

1238 

1239 def queryDimensionRecords( 

1240 self, 

1241 element: Union[DimensionElement, str], 

1242 *, 

1243 dataId: Optional[DataId] = None, 

1244 datasets: Any = None, 

1245 collections: CollectionArgType | None = None, 

1246 where: str = "", 

1247 components: Optional[bool] = None, 

1248 bind: Optional[Mapping[str, Any]] = None, 

1249 check: bool = True, 

1250 **kwargs: Any, 

1251 ) -> queries.DimensionRecordQueryResults: 

1252 # Docstring inherited from lsst.daf.butler.registry.Registry 

1253 if not isinstance(element, DimensionElement): 

1254 try: 

1255 element = self.dimensions[element] 

1256 except KeyError as e: 

1257 raise DimensionNameError( 

1258 f"No such dimension '{element}', available dimensions: " 

1259 + str(self.dimensions.getStaticElements()) 

1260 ) from e 

1261 doomed_by: list[str] = [] 

1262 data_id = self._standardize_query_data_id_args(dataId, doomed_by=doomed_by, **kwargs) 

1263 dataset_composition, collection_wildcard = self._standardize_query_dataset_args( 

1264 datasets, collections, components, doomed_by=doomed_by 

1265 ) 

1266 if collection_wildcard is not None and collection_wildcard.empty(): 

1267 doomed_by.append("No dimension records can be found because collection list is empty.") 

1268 summary = queries.QuerySummary( 

1269 requested=element.graph, 

1270 column_types=self._managers.column_types, 

1271 data_id=data_id, 

1272 expression=where, 

1273 bind=bind, 

1274 defaults=self.defaults.dataId, 

1275 check=check, 

1276 datasets=dataset_composition.keys(), 

1277 ) 

1278 builder = self._makeQueryBuilder(summary, doomed_by=doomed_by) 

1279 for datasetType in dataset_composition.keys(): 

1280 builder.joinDataset(datasetType, collection_wildcard, isResult=False) 

1281 query = builder.finish().with_record_columns(element) 

1282 return queries.DatabaseDimensionRecordQueryResults(query, element) 

1283 

1284 def queryDatasetAssociations( 

1285 self, 

1286 datasetType: Union[str, DatasetType], 

1287 collections: CollectionArgType | None = ..., 

1288 *, 

1289 collectionTypes: Iterable[CollectionType] = CollectionType.all(), 

1290 flattenChains: bool = False, 

1291 ) -> Iterator[DatasetAssociation]: 

1292 # Docstring inherited from lsst.daf.butler.registry.Registry 

1293 if collections is None: 

1294 if not self.defaults.collections: 

1295 raise NoDefaultCollectionError( 

1296 "No collections provided to queryDatasetAssociations, " 

1297 "and no defaults from registry construction." 

1298 ) 

1299 collections = self.defaults.collections 

1300 collection_wildcard = CollectionWildcard.from_expression(collections) 

1301 backend = queries.SqlQueryBackend(self._db, self._managers) 

1302 parent_dataset_type, _ = backend.resolve_single_dataset_type_wildcard(datasetType, components=False) 

1303 timespan_tag = DatasetColumnTag(parent_dataset_type.name, "timespan") 

1304 collection_tag = DatasetColumnTag(parent_dataset_type.name, "collection") 

1305 for parent_collection_record in backend.resolve_collection_wildcard( 

1306 collection_wildcard, 

1307 collection_types=frozenset(collectionTypes), 

1308 flatten_chains=flattenChains, 

1309 ): 

1310 # Resolve this possibly-chained collection into a list of 

1311 # non-CHAINED collections that actually hold datasets of this 

1312 # type. 

1313 candidate_collection_records = backend.resolve_dataset_collections( 

1314 parent_dataset_type, 

1315 CollectionWildcard.from_names([parent_collection_record.name]), 

1316 allow_calibration_collections=True, 

1317 governor_constraints={}, 

1318 ) 

1319 if not candidate_collection_records: 

1320 continue 

1321 with backend.context() as context: 

1322 relation = backend.make_dataset_query_relation( 

1323 parent_dataset_type, 

1324 candidate_collection_records, 

1325 columns={"dataset_id", "run", "timespan", "collection"}, 

1326 context=context, 

1327 ) 

1328 reader = queries.DatasetRefReader( 

1329 parent_dataset_type, 

1330 translate_collection=lambda k: self._managers.collections[k].name, 

1331 full=False, 

1332 ) 

1333 for row in context.fetch_iterable(relation): 

1334 ref = reader.read(row) 

1335 collection_record = self._managers.collections[row[collection_tag]] 

1336 if collection_record.type is CollectionType.CALIBRATION: 

1337 timespan = row[timespan_tag] 

1338 else: 

1339 # For backwards compatibility and (possibly?) user 

1340 # convenience we continue to define the timespan of a 

1341 # DatasetAssociation row for a non-CALIBRATION 

1342 # collection to be None rather than a fully unbounded 

1343 # timespan. 

1344 timespan = None 

1345 yield DatasetAssociation(ref=ref, collection=collection_record.name, timespan=timespan) 

1346 

1347 @property 

1348 def obsCoreTableManager(self) -> ObsCoreTableManager | None: 

1349 # Docstring inherited from lsst.daf.butler.registry.Registry 

1350 return self._managers.obscore 

1351 

1352 storageClasses: StorageClassFactory 

1353 """All storage classes known to the registry (`StorageClassFactory`). 

1354 """