Coverage for python/lsst/daf/butler/registries/sql.py: 13%

503 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2023-04-01 02:05 -0700

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ("SqlRegistry",) 

25 

26import contextlib 

27import logging 

28import warnings 

29from typing import ( 

30 TYPE_CHECKING, 

31 Any, 

32 Dict, 

33 Iterable, 

34 Iterator, 

35 List, 

36 Literal, 

37 Mapping, 

38 Optional, 

39 Sequence, 

40 Set, 

41 Union, 

42 cast, 

43) 

44 

45import sqlalchemy 

46from lsst.daf.relation import LeafRelation, Relation 

47from lsst.resources import ResourcePathExpression 

48from lsst.utils.iteration import ensure_iterable 

49 

50from ..core import ( 

51 Config, 

52 DataCoordinate, 

53 DataId, 

54 DatasetAssociation, 

55 DatasetColumnTag, 

56 DatasetId, 

57 DatasetRef, 

58 DatasetType, 

59 Dimension, 

60 DimensionConfig, 

61 DimensionElement, 

62 DimensionGraph, 

63 DimensionRecord, 

64 DimensionUniverse, 

65 NamedKeyMapping, 

66 NameLookupMapping, 

67 Progress, 

68 StorageClassFactory, 

69 Timespan, 

70 ddl, 

71) 

72from ..core.utils import transactional 

73from ..registry import ( 

74 ArgumentError, 

75 CollectionExpressionError, 

76 CollectionSummary, 

77 CollectionType, 

78 CollectionTypeError, 

79 ConflictingDefinitionError, 

80 DataIdValueError, 

81 DatasetTypeError, 

82 DimensionNameError, 

83 InconsistentDataIdError, 

84 NoDefaultCollectionError, 

85 OrphanedRecordError, 

86 Registry, 

87 RegistryConfig, 

88 RegistryDefaults, 

89 queries, 

90) 

91from ..registry.interfaces import ChainedCollectionRecord, DatasetIdFactory, DatasetIdGenEnum, RunRecord 

92from ..registry.managers import RegistryManagerInstances, RegistryManagerTypes 

93from ..registry.wildcards import CollectionWildcard, DatasetTypeWildcard 

94 

95if TYPE_CHECKING: 

96 from .._butlerConfig import ButlerConfig 

97 from ..registry.interfaces import ( 

98 CollectionRecord, 

99 Database, 

100 DatastoreRegistryBridgeManager, 

101 ObsCoreTableManager, 

102 ) 

103 

104 

105_LOG = logging.getLogger(__name__) 

106 

107 

108class SqlRegistry(Registry): 

109 """Registry implementation based on SQLAlchemy. 

110 

111 Parameters 

112 ---------- 

113 database : `Database` 

114 Database instance to store Registry. 

115 defaults : `RegistryDefaults` 

116 Default collection search path and/or output `~CollectionType.RUN` 

117 collection. 

118 managers : `RegistryManagerInstances` 

119 All the managers required for this registry. 

120 """ 

121 

122 defaultConfigFile: Optional[str] = None 

123 """Path to configuration defaults. Accessed within the ``configs`` resource 

124 or relative to a search path. Can be None if no defaults specified. 

125 """ 

126 

127 @classmethod 

128 def createFromConfig( 

129 cls, 

130 config: Optional[Union[RegistryConfig, str]] = None, 

131 dimensionConfig: Optional[Union[DimensionConfig, str]] = None, 

132 butlerRoot: Optional[ResourcePathExpression] = None, 

133 ) -> Registry: 

134 """Create registry database and return `SqlRegistry` instance. 

135 

136 This method initializes database contents, database must be empty 

137 prior to calling this method. 

138 

139 Parameters 

140 ---------- 

141 config : `RegistryConfig` or `str`, optional 

142 Registry configuration, if missing then default configuration will 

143 be loaded from registry.yaml. 

144 dimensionConfig : `DimensionConfig` or `str`, optional 

145 Dimensions configuration, if missing then default configuration 

146 will be loaded from dimensions.yaml. 

147 butlerRoot : convertible to `lsst.resources.ResourcePath`, optional 

148 Path to the repository root this `SqlRegistry` will manage. 

149 

150 Returns 

151 ------- 

152 registry : `SqlRegistry` 

153 A new `SqlRegistry` instance. 

154 """ 

155 config = cls.forceRegistryConfig(config) 

156 config.replaceRoot(butlerRoot) 

157 

158 if isinstance(dimensionConfig, str): 

159 dimensionConfig = DimensionConfig(dimensionConfig) 

160 elif dimensionConfig is None: 

161 dimensionConfig = DimensionConfig() 

162 elif not isinstance(dimensionConfig, DimensionConfig): 

163 raise TypeError(f"Incompatible Dimension configuration type: {type(dimensionConfig)}") 

164 

165 DatabaseClass = config.getDatabaseClass() 

166 database = DatabaseClass.fromUri( 

167 str(config.connectionString), origin=config.get("origin", 0), namespace=config.get("namespace") 

168 ) 

169 managerTypes = RegistryManagerTypes.fromConfig(config) 

170 managers = managerTypes.makeRepo(database, dimensionConfig) 

171 return cls(database, RegistryDefaults(), managers) 

172 

173 @classmethod 

174 def fromConfig( 

175 cls, 

176 config: Union[ButlerConfig, RegistryConfig, Config, str], 

177 butlerRoot: Optional[ResourcePathExpression] = None, 

178 writeable: bool = True, 

179 defaults: Optional[RegistryDefaults] = None, 

180 ) -> Registry: 

181 """Create `Registry` subclass instance from `config`. 

182 

183 Registry database must be initialized prior to calling this method. 

184 

185 Parameters 

186 ---------- 

187 config : `ButlerConfig`, `RegistryConfig`, `Config` or `str` 

188 Registry configuration 

189 butlerRoot : `lsst.resources.ResourcePathExpression`, optional 

190 Path to the repository root this `Registry` will manage. 

191 writeable : `bool`, optional 

192 If `True` (default) create a read-write connection to the database. 

193 defaults : `RegistryDefaults`, optional 

194 Default collection search path and/or output `~CollectionType.RUN` 

195 collection. 

196 

197 Returns 

198 ------- 

199 registry : `SqlRegistry` (subclass) 

200 A new `SqlRegistry` subclass instance. 

201 """ 

202 config = cls.forceRegistryConfig(config) 

203 config.replaceRoot(butlerRoot) 

204 DatabaseClass = config.getDatabaseClass() 

205 database = DatabaseClass.fromUri( 

206 config.connectionString.render_as_string(hide_password=False), 

207 origin=config.get("origin", 0), 

208 namespace=config.get("namespace"), 

209 writeable=writeable, 

210 ) 

211 managerTypes = RegistryManagerTypes.fromConfig(config) 

212 with database.session(): 

213 managers = managerTypes.loadRepo(database) 

214 if defaults is None: 

215 defaults = RegistryDefaults() 

216 return cls(database, defaults, managers) 

217 

218 def __init__(self, database: Database, defaults: RegistryDefaults, managers: RegistryManagerInstances): 

219 self._db = database 

220 self._managers = managers 

221 self.storageClasses = StorageClassFactory() 

222 # Intentionally invoke property setter to initialize defaults. This 

223 # can only be done after most of the rest of Registry has already been 

224 # initialized, and must be done before the property getter is used. 

225 self.defaults = defaults 

226 # In the future DatasetIdFactory may become configurable and this 

227 # instance will need to be shared with datasets manager. 

228 self.datasetIdFactory = DatasetIdFactory() 

229 

230 def __str__(self) -> str: 

231 return str(self._db) 

232 

233 def __repr__(self) -> str: 

234 return f"SqlRegistry({self._db!r}, {self.dimensions!r})" 

235 

236 def isWriteable(self) -> bool: 

237 # Docstring inherited from lsst.daf.butler.registry.Registry 

238 return self._db.isWriteable() 

239 

240 def copy(self, defaults: Optional[RegistryDefaults] = None) -> Registry: 

241 # Docstring inherited from lsst.daf.butler.registry.Registry 

242 if defaults is None: 

243 # No need to copy, because `RegistryDefaults` is immutable; we 

244 # effectively copy on write. 

245 defaults = self.defaults 

246 return type(self)(self._db, defaults, self._managers) 

247 

248 @property 

249 def dimensions(self) -> DimensionUniverse: 

250 # Docstring inherited from lsst.daf.butler.registry.Registry 

251 return self._managers.dimensions.universe 

252 

253 def refresh(self) -> None: 

254 # Docstring inherited from lsst.daf.butler.registry.Registry 

255 with self._db.transaction(): 

256 self._managers.refresh() 

257 

258 @contextlib.contextmanager 

259 def transaction(self, *, savepoint: bool = False) -> Iterator[None]: 

260 # Docstring inherited from lsst.daf.butler.registry.Registry 

261 try: 

262 with self._db.transaction(savepoint=savepoint): 

263 yield 

264 except BaseException: 

265 # TODO: this clears the caches sometimes when we wouldn't actually 

266 # need to. Can we avoid that? 

267 self._managers.dimensions.clearCaches() 

268 raise 

269 

270 def resetConnectionPool(self) -> None: 

271 """Reset SQLAlchemy connection pool for `SqlRegistry` database. 

272 

273 This operation is useful when using registry with fork-based 

274 multiprocessing. To use registry across fork boundary one has to make 

275 sure that there are no currently active connections (no session or 

276 transaction is in progress) and connection pool is reset using this 

277 method. This method should be called by the child process immediately 

278 after the fork. 

279 """ 

280 self._db._engine.dispose() 

281 

282 def registerOpaqueTable(self, tableName: str, spec: ddl.TableSpec) -> None: 

283 """Add an opaque (to the `Registry`) table for use by a `Datastore` or 

284 other data repository client. 

285 

286 Opaque table records can be added via `insertOpaqueData`, retrieved via 

287 `fetchOpaqueData`, and removed via `deleteOpaqueData`. 

288 

289 Parameters 

290 ---------- 

291 tableName : `str` 

292 Logical name of the opaque table. This may differ from the 

293 actual name used in the database by a prefix and/or suffix. 

294 spec : `ddl.TableSpec` 

295 Specification for the table to be added. 

296 """ 

297 self._managers.opaque.register(tableName, spec) 

298 

299 @transactional 

300 def insertOpaqueData(self, tableName: str, *data: dict) -> None: 

301 """Insert records into an opaque table. 

302 

303 Parameters 

304 ---------- 

305 tableName : `str` 

306 Logical name of the opaque table. Must match the name used in a 

307 previous call to `registerOpaqueTable`. 

308 data 

309 Each additional positional argument is a dictionary that represents 

310 a single row to be added. 

311 """ 

312 self._managers.opaque[tableName].insert(*data) 

313 

314 def fetchOpaqueData(self, tableName: str, **where: Any) -> Iterator[Mapping[str, Any]]: 

315 """Retrieve records from an opaque table. 

316 

317 Parameters 

318 ---------- 

319 tableName : `str` 

320 Logical name of the opaque table. Must match the name used in a 

321 previous call to `registerOpaqueTable`. 

322 where 

323 Additional keyword arguments are interpreted as equality 

324 constraints that restrict the returned rows (combined with AND); 

325 keyword arguments are column names and values are the values they 

326 must have. 

327 

328 Yields 

329 ------ 

330 row : `dict` 

331 A dictionary representing a single result row. 

332 """ 

333 yield from self._managers.opaque[tableName].fetch(**where) 

334 

335 @transactional 

336 def deleteOpaqueData(self, tableName: str, **where: Any) -> None: 

337 """Remove records from an opaque table. 

338 

339 Parameters 

340 ---------- 

341 tableName : `str` 

342 Logical name of the opaque table. Must match the name used in a 

343 previous call to `registerOpaqueTable`. 

344 where 

345 Additional keyword arguments are interpreted as equality 

346 constraints that restrict the deleted rows (combined with AND); 

347 keyword arguments are column names and values are the values they 

348 must have. 

349 """ 

350 self._managers.opaque[tableName].delete(where.keys(), where) 

351 

352 def registerCollection( 

353 self, name: str, type: CollectionType = CollectionType.TAGGED, doc: Optional[str] = None 

354 ) -> bool: 

355 # Docstring inherited from lsst.daf.butler.registry.Registry 

356 _, registered = self._managers.collections.register(name, type, doc=doc) 

357 return registered 

358 

359 def getCollectionType(self, name: str) -> CollectionType: 

360 # Docstring inherited from lsst.daf.butler.registry.Registry 

361 return self._managers.collections.find(name).type 

362 

363 def _get_collection_record(self, name: str) -> CollectionRecord: 

364 # Docstring inherited from lsst.daf.butler.registry.Registry 

365 return self._managers.collections.find(name) 

366 

367 def registerRun(self, name: str, doc: Optional[str] = None) -> bool: 

368 # Docstring inherited from lsst.daf.butler.registry.Registry 

369 _, registered = self._managers.collections.register(name, CollectionType.RUN, doc=doc) 

370 return registered 

371 

372 @transactional 

373 def removeCollection(self, name: str) -> None: 

374 # Docstring inherited from lsst.daf.butler.registry.Registry 

375 self._managers.collections.remove(name) 

376 

377 def getCollectionChain(self, parent: str) -> tuple[str, ...]: 

378 # Docstring inherited from lsst.daf.butler.registry.Registry 

379 record = self._managers.collections.find(parent) 

380 if record.type is not CollectionType.CHAINED: 

381 raise CollectionTypeError(f"Collection '{parent}' has type {record.type.name}, not CHAINED.") 

382 assert isinstance(record, ChainedCollectionRecord) 

383 return record.children 

384 

385 @transactional 

386 def setCollectionChain(self, parent: str, children: Any, *, flatten: bool = False) -> None: 

387 # Docstring inherited from lsst.daf.butler.registry.Registry 

388 record = self._managers.collections.find(parent) 

389 if record.type is not CollectionType.CHAINED: 

390 raise CollectionTypeError(f"Collection '{parent}' has type {record.type.name}, not CHAINED.") 

391 assert isinstance(record, ChainedCollectionRecord) 

392 children = CollectionWildcard.from_expression(children).require_ordered() 

393 if children != record.children or flatten: 

394 record.update(self._managers.collections, children, flatten=flatten) 

395 

396 def getCollectionParentChains(self, collection: str) -> Set[str]: 

397 # Docstring inherited from lsst.daf.butler.registry.Registry 

398 return { 

399 record.name 

400 for record in self._managers.collections.getParentChains( 

401 self._managers.collections.find(collection).key 

402 ) 

403 } 

404 

405 def getCollectionDocumentation(self, collection: str) -> Optional[str]: 

406 # Docstring inherited from lsst.daf.butler.registry.Registry 

407 return self._managers.collections.getDocumentation(self._managers.collections.find(collection).key) 

408 

409 def setCollectionDocumentation(self, collection: str, doc: Optional[str]) -> None: 

410 # Docstring inherited from lsst.daf.butler.registry.Registry 

411 self._managers.collections.setDocumentation(self._managers.collections.find(collection).key, doc) 

412 

413 def getCollectionSummary(self, collection: str) -> CollectionSummary: 

414 # Docstring inherited from lsst.daf.butler.registry.Registry 

415 record = self._managers.collections.find(collection) 

416 return self._managers.datasets.getCollectionSummary(record) 

417 

418 def registerDatasetType(self, datasetType: DatasetType) -> bool: 

419 # Docstring inherited from lsst.daf.butler.registry.Registry 

420 _, inserted = self._managers.datasets.register(datasetType) 

421 return inserted 

422 

423 def removeDatasetType(self, name: str | tuple[str, ...]) -> None: 

424 # Docstring inherited from lsst.daf.butler.registry.Registry 

425 

426 for datasetTypeExpression in ensure_iterable(name): 

427 datasetTypes = list(self.queryDatasetTypes(datasetTypeExpression)) 

428 if not datasetTypes: 

429 _LOG.info("Dataset type %r not defined", datasetTypeExpression) 

430 else: 

431 for datasetType in datasetTypes: 

432 self._managers.datasets.remove(datasetType.name) 

433 _LOG.info("Removed dataset type %r", datasetType.name) 

434 

435 def getDatasetType(self, name: str) -> DatasetType: 

436 # Docstring inherited from lsst.daf.butler.registry.Registry 

437 parent_name, component = DatasetType.splitDatasetTypeName(name) 

438 storage = self._managers.datasets[parent_name] 

439 if component is None: 

440 return storage.datasetType 

441 else: 

442 return storage.datasetType.makeComponentDatasetType(component) 

443 

444 def supportsIdGenerationMode(self, mode: DatasetIdGenEnum) -> bool: 

445 # Docstring inherited from lsst.daf.butler.registry.Registry 

446 return self._managers.datasets.supportsIdGenerationMode(mode) 

447 

448 def findDataset( 

449 self, 

450 datasetType: Union[DatasetType, str], 

451 dataId: Optional[DataId] = None, 

452 *, 

453 collections: Any = None, 

454 timespan: Optional[Timespan] = None, 

455 **kwargs: Any, 

456 ) -> Optional[DatasetRef]: 

457 # Docstring inherited from lsst.daf.butler.registry.Registry 

458 if collections is None: 

459 if not self.defaults.collections: 

460 raise NoDefaultCollectionError( 

461 "No collections provided to findDataset, and no defaults from registry construction." 

462 ) 

463 collections = self.defaults.collections 

464 backend = queries.SqlQueryBackend(self._db, self._managers) 

465 collection_wildcard = CollectionWildcard.from_expression(collections, require_ordered=True) 

466 matched_collections = backend.resolve_collection_wildcard(collection_wildcard) 

467 parent_dataset_type, components = backend.resolve_single_dataset_type_wildcard( 

468 datasetType, components_deprecated=False 

469 ) 

470 if len(components) > 1: 

471 raise DatasetTypeError( 

472 f"findDataset requires exactly one dataset type; got multiple components {components} " 

473 f"for parent dataset type {parent_dataset_type.name}." 

474 ) 

475 component = components[0] 

476 dataId = DataCoordinate.standardize( 

477 dataId, 

478 graph=parent_dataset_type.dimensions, 

479 universe=self.dimensions, 

480 defaults=self.defaults.dataId, 

481 **kwargs, 

482 ) 

483 governor_constraints = {name: {cast(str, dataId[name])} for name in dataId.graph.governors.names} 

484 (filtered_collections,) = backend.filter_dataset_collections( 

485 [parent_dataset_type], 

486 matched_collections, 

487 governor_constraints=governor_constraints, 

488 ).values() 

489 if not filtered_collections: 

490 return None 

491 if timespan is None: 

492 filtered_collections = [ 

493 collection_record 

494 for collection_record in filtered_collections 

495 if collection_record.type is not CollectionType.CALIBRATION 

496 ] 

497 if filtered_collections: 

498 requested_columns = {"dataset_id", "run", "collection"} 

499 with backend.context() as context: 

500 predicate = context.make_data_coordinate_predicate( 

501 dataId.subset(parent_dataset_type.dimensions), full=False 

502 ) 

503 if timespan is not None: 

504 requested_columns.add("timespan") 

505 predicate = predicate.logical_and( 

506 context.make_timespan_overlap_predicate( 

507 DatasetColumnTag(parent_dataset_type.name, "timespan"), timespan 

508 ) 

509 ) 

510 relation = backend.make_dataset_query_relation( 

511 parent_dataset_type, filtered_collections, requested_columns, context 

512 ).with_rows_satisfying(predicate) 

513 rows = list(context.fetch_iterable(relation)) 

514 else: 

515 rows = [] 

516 if not rows: 

517 return None 

518 elif len(rows) == 1: 

519 best_row = rows[0] 

520 else: 

521 rank_by_collection_key = {record.key: n for n, record in enumerate(filtered_collections)} 

522 collection_tag = DatasetColumnTag(parent_dataset_type.name, "collection") 

523 row_iter = iter(rows) 

524 best_row = next(row_iter) 

525 best_rank = rank_by_collection_key[best_row[collection_tag]] 

526 have_tie = False 

527 for row in row_iter: 

528 if (rank := rank_by_collection_key[row[collection_tag]]) < best_rank: 

529 best_row = row 

530 best_rank = rank 

531 have_tie = False 

532 elif rank == best_rank: 

533 have_tie = True 

534 assert timespan is not None, "Rank ties should be impossible given DB constraints." 

535 if have_tie: 

536 raise LookupError( 

537 f"Ambiguous calibration lookup for {parent_dataset_type.name} in collections " 

538 f"{collection_wildcard.strings} with timespan {timespan}." 

539 ) 

540 reader = queries.DatasetRefReader( 

541 parent_dataset_type, 

542 translate_collection=lambda k: self._managers.collections[k].name, 

543 ) 

544 ref = reader.read(best_row, data_id=dataId) 

545 if component is not None: 

546 ref = ref.makeComponentRef(component) 

547 return ref 

548 

549 @transactional 

550 def insertDatasets( 

551 self, 

552 datasetType: Union[DatasetType, str], 

553 dataIds: Iterable[DataId], 

554 run: Optional[str] = None, 

555 expand: bool = True, 

556 idGenerationMode: DatasetIdGenEnum = DatasetIdGenEnum.UNIQUE, 

557 ) -> List[DatasetRef]: 

558 # Docstring inherited from lsst.daf.butler.registry.Registry 

559 if isinstance(datasetType, DatasetType): 

560 storage = self._managers.datasets.find(datasetType.name) 

561 if storage is None: 

562 raise DatasetTypeError(f"DatasetType '{datasetType}' has not been registered.") 

563 else: 

564 storage = self._managers.datasets.find(datasetType) 

565 if storage is None: 

566 raise DatasetTypeError(f"DatasetType with name '{datasetType}' has not been registered.") 

567 if run is None: 

568 if self.defaults.run is None: 

569 raise NoDefaultCollectionError( 

570 "No run provided to insertDatasets, and no default from registry construction." 

571 ) 

572 run = self.defaults.run 

573 runRecord = self._managers.collections.find(run) 

574 if runRecord.type is not CollectionType.RUN: 

575 raise CollectionTypeError( 

576 f"Given collection is of type {runRecord.type.name}; RUN collection required." 

577 ) 

578 assert isinstance(runRecord, RunRecord) 

579 progress = Progress("daf.butler.Registry.insertDatasets", level=logging.DEBUG) 

580 if expand: 

581 expandedDataIds = [ 

582 self.expandDataId(dataId, graph=storage.datasetType.dimensions) 

583 for dataId in progress.wrap(dataIds, f"Expanding {storage.datasetType.name} data IDs") 

584 ] 

585 else: 

586 expandedDataIds = [ 

587 DataCoordinate.standardize(dataId, graph=storage.datasetType.dimensions) for dataId in dataIds 

588 ] 

589 try: 

590 refs = list(storage.insert(runRecord, expandedDataIds, idGenerationMode)) 

591 if self._managers.obscore: 

592 context = queries.SqlQueryContext(self._db, self._managers.column_types) 

593 self._managers.obscore.add_datasets(refs, context) 

594 except sqlalchemy.exc.IntegrityError as err: 

595 raise ConflictingDefinitionError( 

596 "A database constraint failure was triggered by inserting " 

597 f"one or more datasets of type {storage.datasetType} into " 

598 f"collection '{run}'. " 

599 "This probably means a dataset with the same data ID " 

600 "and dataset type already exists, but it may also mean a " 

601 "dimension row is missing." 

602 ) from err 

603 return refs 

604 

605 @transactional 

606 def _importDatasets( 

607 self, 

608 datasets: Iterable[DatasetRef], 

609 expand: bool = True, 

610 idGenerationMode: DatasetIdGenEnum = DatasetIdGenEnum.UNIQUE, 

611 reuseIds: bool = False, 

612 ) -> List[DatasetRef]: 

613 # Docstring inherited from lsst.daf.butler.registry.Registry 

614 datasets = list(datasets) 

615 if not datasets: 

616 # nothing to do 

617 return [] 

618 

619 # find dataset type 

620 datasetTypes = set(dataset.datasetType for dataset in datasets) 

621 if len(datasetTypes) != 1: 

622 raise DatasetTypeError(f"Multiple dataset types in input datasets: {datasetTypes}") 

623 datasetType = datasetTypes.pop() 

624 

625 # get storage handler for this dataset type 

626 storage = self._managers.datasets.find(datasetType.name) 

627 if storage is None: 

628 raise DatasetTypeError(f"DatasetType '{datasetType}' has not been registered.") 

629 

630 # find run name 

631 runs = set(dataset.run for dataset in datasets) 

632 if len(runs) != 1: 

633 raise ValueError(f"Multiple run names in input datasets: {runs}") 

634 run = runs.pop() 

635 if run is None: 

636 if self.defaults.run is None: 

637 raise NoDefaultCollectionError( 

638 "No run provided to ingestDatasets, and no default from registry construction." 

639 ) 

640 run = self.defaults.run 

641 

642 runRecord = self._managers.collections.find(run) 

643 if runRecord.type is not CollectionType.RUN: 

644 raise CollectionTypeError( 

645 f"Given collection '{runRecord.name}' is of type {runRecord.type.name};" 

646 " RUN collection required." 

647 ) 

648 assert isinstance(runRecord, RunRecord) 

649 

650 progress = Progress("daf.butler.Registry.insertDatasets", level=logging.DEBUG) 

651 if expand: 

652 expandedDatasets = [ 

653 dataset.expanded(self.expandDataId(dataset.dataId, graph=storage.datasetType.dimensions)) 

654 for dataset in progress.wrap(datasets, f"Expanding {storage.datasetType.name} data IDs") 

655 ] 

656 else: 

657 expandedDatasets = [ 

658 DatasetRef(datasetType, dataset.dataId, id=dataset.id, run=dataset.run, conform=True) 

659 for dataset in datasets 

660 ] 

661 

662 try: 

663 refs = list(storage.import_(runRecord, expandedDatasets, idGenerationMode, reuseIds)) 

664 if self._managers.obscore: 

665 context = queries.SqlQueryContext(self._db, self._managers.column_types) 

666 self._managers.obscore.add_datasets(refs, context) 

667 except sqlalchemy.exc.IntegrityError as err: 

668 raise ConflictingDefinitionError( 

669 "A database constraint failure was triggered by inserting " 

670 f"one or more datasets of type {storage.datasetType} into " 

671 f"collection '{run}'. " 

672 "This probably means a dataset with the same data ID " 

673 "and dataset type already exists, but it may also mean a " 

674 "dimension row is missing." 

675 ) from err 

676 return refs 

677 

678 def getDataset(self, id: DatasetId) -> Optional[DatasetRef]: 

679 # Docstring inherited from lsst.daf.butler.registry.Registry 

680 return self._managers.datasets.getDatasetRef(id) 

681 

682 @transactional 

683 def removeDatasets(self, refs: Iterable[DatasetRef]) -> None: 

684 # Docstring inherited from lsst.daf.butler.registry.Registry 

685 progress = Progress("lsst.daf.butler.Registry.removeDatasets", level=logging.DEBUG) 

686 for datasetType, refsForType in progress.iter_item_chunks( 

687 DatasetRef.groupByType(refs).items(), desc="Removing datasets by type" 

688 ): 

689 storage = self._managers.datasets[datasetType.name] 

690 try: 

691 storage.delete(refsForType) 

692 except sqlalchemy.exc.IntegrityError as err: 

693 raise OrphanedRecordError( 

694 "One or more datasets is still present in one or more Datastores." 

695 ) from err 

696 

697 @transactional 

698 def associate(self, collection: str, refs: Iterable[DatasetRef]) -> None: 

699 # Docstring inherited from lsst.daf.butler.registry.Registry 

700 progress = Progress("lsst.daf.butler.Registry.associate", level=logging.DEBUG) 

701 collectionRecord = self._managers.collections.find(collection) 

702 if collectionRecord.type is not CollectionType.TAGGED: 

703 raise CollectionTypeError( 

704 f"Collection '{collection}' has type {collectionRecord.type.name}, not TAGGED." 

705 ) 

706 for datasetType, refsForType in progress.iter_item_chunks( 

707 DatasetRef.groupByType(refs).items(), desc="Associating datasets by type" 

708 ): 

709 storage = self._managers.datasets[datasetType.name] 

710 try: 

711 storage.associate(collectionRecord, refsForType) 

712 if self._managers.obscore: 

713 # If a TAGGED collection is being monitored by ObsCore 

714 # manager then we may need to save the dataset. 

715 context = queries.SqlQueryContext(self._db, self._managers.column_types) 

716 self._managers.obscore.associate(refsForType, collectionRecord, context) 

717 except sqlalchemy.exc.IntegrityError as err: 

718 raise ConflictingDefinitionError( 

719 f"Constraint violation while associating dataset of type {datasetType.name} with " 

720 f"collection {collection}. This probably means that one or more datasets with the same " 

721 "dataset type and data ID already exist in the collection, but it may also indicate " 

722 "that the datasets do not exist." 

723 ) from err 

724 

725 @transactional 

726 def disassociate(self, collection: str, refs: Iterable[DatasetRef]) -> None: 

727 # Docstring inherited from lsst.daf.butler.registry.Registry 

728 progress = Progress("lsst.daf.butler.Registry.disassociate", level=logging.DEBUG) 

729 collectionRecord = self._managers.collections.find(collection) 

730 if collectionRecord.type is not CollectionType.TAGGED: 

731 raise CollectionTypeError( 

732 f"Collection '{collection}' has type {collectionRecord.type.name}; expected TAGGED." 

733 ) 

734 for datasetType, refsForType in progress.iter_item_chunks( 

735 DatasetRef.groupByType(refs).items(), desc="Disassociating datasets by type" 

736 ): 

737 storage = self._managers.datasets[datasetType.name] 

738 storage.disassociate(collectionRecord, refsForType) 

739 if self._managers.obscore: 

740 self._managers.obscore.disassociate(refsForType, collectionRecord) 

741 

742 @transactional 

743 def certify(self, collection: str, refs: Iterable[DatasetRef], timespan: Timespan) -> None: 

744 # Docstring inherited from lsst.daf.butler.registry.Registry 

745 progress = Progress("lsst.daf.butler.Registry.certify", level=logging.DEBUG) 

746 collectionRecord = self._managers.collections.find(collection) 

747 for datasetType, refsForType in progress.iter_item_chunks( 

748 DatasetRef.groupByType(refs).items(), desc="Certifying datasets by type" 

749 ): 

750 storage = self._managers.datasets[datasetType.name] 

751 storage.certify( 

752 collectionRecord, 

753 refsForType, 

754 timespan, 

755 context=queries.SqlQueryContext(self._db, self._managers.column_types), 

756 ) 

757 

758 @transactional 

759 def decertify( 

760 self, 

761 collection: str, 

762 datasetType: Union[str, DatasetType], 

763 timespan: Timespan, 

764 *, 

765 dataIds: Optional[Iterable[DataId]] = None, 

766 ) -> None: 

767 # Docstring inherited from lsst.daf.butler.registry.Registry 

768 collectionRecord = self._managers.collections.find(collection) 

769 if isinstance(datasetType, str): 

770 storage = self._managers.datasets[datasetType] 

771 else: 

772 storage = self._managers.datasets[datasetType.name] 

773 standardizedDataIds = None 

774 if dataIds is not None: 

775 standardizedDataIds = [ 

776 DataCoordinate.standardize(d, graph=storage.datasetType.dimensions) for d in dataIds 

777 ] 

778 storage.decertify( 

779 collectionRecord, 

780 timespan, 

781 dataIds=standardizedDataIds, 

782 context=queries.SqlQueryContext(self._db, self._managers.column_types), 

783 ) 

784 

785 def getDatastoreBridgeManager(self) -> DatastoreRegistryBridgeManager: 

786 """Return an object that allows a new `Datastore` instance to 

787 communicate with this `Registry`. 

788 

789 Returns 

790 ------- 

791 manager : `DatastoreRegistryBridgeManager` 

792 Object that mediates communication between this `Registry` and its 

793 associated datastores. 

794 """ 

795 return self._managers.datastores 

796 

797 def getDatasetLocations(self, ref: DatasetRef) -> Iterable[str]: 

798 # Docstring inherited from lsst.daf.butler.registry.Registry 

799 return self._managers.datastores.findDatastores(ref) 

800 

801 def expandDataId( 

802 self, 

803 dataId: Optional[DataId] = None, 

804 *, 

805 graph: Optional[DimensionGraph] = None, 

806 records: Optional[NameLookupMapping[DimensionElement, Optional[DimensionRecord]]] = None, 

807 withDefaults: bool = True, 

808 **kwargs: Any, 

809 ) -> DataCoordinate: 

810 # Docstring inherited from lsst.daf.butler.registry.Registry 

811 if not withDefaults: 

812 defaults = None 

813 else: 

814 defaults = self.defaults.dataId 

815 try: 

816 standardized = DataCoordinate.standardize( 

817 dataId, graph=graph, universe=self.dimensions, defaults=defaults, **kwargs 

818 ) 

819 except KeyError as exc: 

820 # This means either kwargs have some odd name or required 

821 # dimension is missing. 

822 raise DimensionNameError(str(exc)) from exc 

823 if standardized.hasRecords(): 

824 return standardized 

825 if records is None: 

826 records = {} 

827 elif isinstance(records, NamedKeyMapping): 

828 records = records.byName() 

829 else: 

830 records = dict(records) 

831 if isinstance(dataId, DataCoordinate) and dataId.hasRecords(): 

832 records.update(dataId.records.byName()) 

833 keys = standardized.byName() 

834 context = queries.SqlQueryContext(self._db, self._managers.column_types) 

835 for element in standardized.graph.primaryKeyTraversalOrder: 

836 record = records.get(element.name, ...) # Use ... to mean not found; None might mean NULL 

837 if record is ...: 

838 if isinstance(element, Dimension) and keys.get(element.name) is None: 

839 if element in standardized.graph.required: 

840 raise DimensionNameError( 

841 f"No value or null value for required dimension {element.name}." 

842 ) 

843 keys[element.name] = None 

844 record = None 

845 else: 

846 storage = self._managers.dimensions[element] 

847 record = storage.fetch_one(DataCoordinate.standardize(keys, graph=element.graph), context) 

848 records[element.name] = record 

849 if record is not None: 

850 for d in element.implied: 

851 value = getattr(record, d.name) 

852 if keys.setdefault(d.name, value) != value: 

853 raise InconsistentDataIdError( 

854 f"Data ID {standardized} has {d.name}={keys[d.name]!r}, " 

855 f"but {element.name} implies {d.name}={value!r}." 

856 ) 

857 else: 

858 if element in standardized.graph.required: 

859 raise DataIdValueError( 

860 f"Could not fetch record for required dimension {element.name} via keys {keys}." 

861 ) 

862 if element.alwaysJoin: 

863 raise InconsistentDataIdError( 

864 f"Could not fetch record for element {element.name} via keys {keys}, ", 

865 "but it is marked alwaysJoin=True; this means one or more dimensions are not " 

866 "related.", 

867 ) 

868 for d in element.implied: 

869 keys.setdefault(d.name, None) 

870 records.setdefault(d.name, None) 

871 return DataCoordinate.standardize(keys, graph=standardized.graph).expanded(records=records) 

872 

873 def insertDimensionData( 

874 self, 

875 element: Union[DimensionElement, str], 

876 *data: Union[Mapping[str, Any], DimensionRecord], 

877 conform: bool = True, 

878 replace: bool = False, 

879 skip_existing: bool = False, 

880 ) -> None: 

881 # Docstring inherited from lsst.daf.butler.registry.Registry 

882 if conform: 

883 if isinstance(element, str): 

884 element = self.dimensions[element] 

885 records = [ 

886 row if isinstance(row, DimensionRecord) else element.RecordClass(**row) for row in data 

887 ] 

888 else: 

889 # Ignore typing since caller said to trust them with conform=False. 

890 records = data # type: ignore 

891 storage = self._managers.dimensions[element] 

892 storage.insert(*records, replace=replace, skip_existing=skip_existing) 

893 

894 def syncDimensionData( 

895 self, 

896 element: Union[DimensionElement, str], 

897 row: Union[Mapping[str, Any], DimensionRecord], 

898 conform: bool = True, 

899 update: bool = False, 

900 ) -> Union[bool, Dict[str, Any]]: 

901 # Docstring inherited from lsst.daf.butler.registry.Registry 

902 if conform: 

903 if isinstance(element, str): 

904 element = self.dimensions[element] 

905 record = row if isinstance(row, DimensionRecord) else element.RecordClass(**row) 

906 else: 

907 # Ignore typing since caller said to trust them with conform=False. 

908 record = row # type: ignore 

909 storage = self._managers.dimensions[element] 

910 return storage.sync(record, update=update) 

911 

912 def queryDatasetTypes( 

913 self, 

914 expression: Any = ..., 

915 *, 

916 components: Optional[bool] = None, 

917 missing: Optional[List[str]] = None, 

918 ) -> Iterable[DatasetType]: 

919 # Docstring inherited from lsst.daf.butler.registry.Registry 

920 wildcard = DatasetTypeWildcard.from_expression(expression) 

921 composition_dict = self._managers.datasets.resolve_wildcard( 

922 wildcard, 

923 components=components, 

924 missing=missing, 

925 ) 

926 result: list[DatasetType] = [] 

927 for parent_dataset_type, components_for_parent in composition_dict.items(): 

928 result.extend( 

929 parent_dataset_type.makeComponentDatasetType(c) if c is not None else parent_dataset_type 

930 for c in components_for_parent 

931 ) 

932 return result 

933 

934 def queryCollections( 

935 self, 

936 expression: Any = ..., 

937 datasetType: Optional[DatasetType] = None, 

938 collectionTypes: Union[Iterable[CollectionType], CollectionType] = CollectionType.all(), 

939 flattenChains: bool = False, 

940 includeChains: Optional[bool] = None, 

941 ) -> Sequence[str]: 

942 # Docstring inherited from lsst.daf.butler.registry.Registry 

943 

944 # Right now the datasetTypes argument is completely ignored, but that 

945 # is consistent with its [lack of] guarantees. DM-24939 or a follow-up 

946 # ticket will take care of that. 

947 try: 

948 wildcard = CollectionWildcard.from_expression(expression) 

949 except TypeError as exc: 

950 raise CollectionExpressionError(f"Invalid collection expression '{expression}'") from exc 

951 collectionTypes = ensure_iterable(collectionTypes) 

952 return [ 

953 record.name 

954 for record in self._managers.collections.resolve_wildcard( 

955 wildcard, 

956 collection_types=frozenset(collectionTypes), 

957 flatten_chains=flattenChains, 

958 include_chains=includeChains, 

959 ) 

960 ] 

961 

962 def _makeQueryBuilder( 

963 self, 

964 summary: queries.QuerySummary, 

965 doomed_by: Iterable[str] = (), 

966 ) -> queries.QueryBuilder: 

967 """Return a `QueryBuilder` instance capable of constructing and 

968 managing more complex queries than those obtainable via `Registry` 

969 interfaces. 

970 

971 This is an advanced interface; downstream code should prefer 

972 `Registry.queryDataIds` and `Registry.queryDatasets` whenever those 

973 are sufficient. 

974 

975 Parameters 

976 ---------- 

977 summary : `queries.QuerySummary` 

978 Object describing and categorizing the full set of dimensions that 

979 will be included in the query. 

980 doomed_by : `Iterable` of `str`, optional 

981 A list of diagnostic messages that indicate why the query is going 

982 to yield no results and should not even be executed. If an empty 

983 container (default) the query will be executed unless other code 

984 determines that it is doomed. 

985 

986 Returns 

987 ------- 

988 builder : `queries.QueryBuilder` 

989 Object that can be used to construct and perform advanced queries. 

990 """ 

991 doomed_by = list(doomed_by) 

992 backend = queries.SqlQueryBackend(self._db, self._managers) 

993 context = backend.context() 

994 relation: Relation | None = None 

995 if doomed_by: 

996 relation = LeafRelation.make_doomed(context.sql_engine, set(), doomed_by) 

997 return queries.QueryBuilder( 

998 summary, 

999 backend=backend, 

1000 context=context, 

1001 relation=relation, 

1002 ) 

1003 

1004 def _standardize_query_data_id_args( 

1005 self, data_id: DataId | None, *, doomed_by: list[str], **kwargs: Any 

1006 ) -> DataCoordinate: 

1007 """Preprocess the data ID arguments passed to query* methods. 

1008 

1009 Parameters 

1010 ---------- 

1011 data_id : `DataId` or `None` 

1012 Data ID that constrains the query results. 

1013 doomed_by : `list` [ `str` ] 

1014 List to append messages indicating why the query is doomed to 

1015 yield no results. 

1016 **kwargs 

1017 Additional data ID key-value pairs, extending and overriding 

1018 ``data_id``. 

1019 

1020 Returns 

1021 ------- 

1022 data_id : `DataCoordinate` 

1023 Standardized data ID. Will be fully expanded unless expansion 

1024 fails, in which case a message will be appended to ``doomed_by`` 

1025 on return. 

1026 """ 

1027 try: 

1028 return self.expandDataId(data_id, **kwargs) 

1029 except DataIdValueError as err: 

1030 doomed_by.append(str(err)) 

1031 return DataCoordinate.standardize( 

1032 data_id, **kwargs, universe=self.dimensions, defaults=self.defaults.dataId 

1033 ) 

1034 

1035 def _standardize_query_dataset_args( 

1036 self, 

1037 datasets: Any, 

1038 collections: Any, 

1039 components: bool | None, 

1040 mode: Literal["find_first"] | Literal["find_all"] | Literal["constrain"] = "constrain", 

1041 *, 

1042 doomed_by: list[str], 

1043 ) -> tuple[dict[DatasetType, list[str | None]], CollectionWildcard | None]: 

1044 """Preprocess dataset arguments passed to query* methods. 

1045 

1046 Parameters 

1047 ---------- 

1048 datasets : `DatasetType`, `str`, `re.Pattern`, or iterable of these 

1049 Expression identifying dataset types. See `queryDatasetTypes` for 

1050 details. 

1051 collections : `str`, `re.Pattern`, or iterable of these 

1052 Expression identifying collections to be searched. See 

1053 `queryCollections` for details. 

1054 components : `bool`, optional 

1055 If `True`, apply all expression patterns to component dataset type 

1056 names as well. If `False`, never apply patterns to components. 

1057 If `None` (default), apply patterns to components only if their 

1058 parent datasets were not matched by the expression. 

1059 Fully-specified component datasets (`str` or `DatasetType` 

1060 instances) are always included. 

1061 

1062 Values other than `False` are deprecated, and only `False` will be 

1063 supported after v26. After v27 this argument will be removed 

1064 entirely. 

1065 mode : `str`, optional 

1066 The way in which datasets are being used in this query; one of: 

1067 

1068 - "find_first": this is a query for the first dataset in an 

1069 ordered list of collections. Prohibits collection wildcards, 

1070 but permits dataset type wildcards. 

1071 

1072 - "find_all": this is a query for all datasets in all matched 

1073 collections. Permits collection and dataset type wildcards. 

1074 

1075 - "constrain": this is a query for something other than datasets, 

1076 with results constrained by dataset existence. Permits 

1077 collection wildcards and prohibits ``...`` as a dataset type 

1078 wildcard. 

1079 doomed_by : `list` [ `str` ] 

1080 List to append messages indicating why the query is doomed to 

1081 yield no results. 

1082 

1083 Returns 

1084 ------- 

1085 composition : `defaultdict` [ `DatasetType`, `list` [ `str` ] ] 

1086 Dictionary mapping parent dataset type to `list` of components 

1087 matched for that dataset type (or `None` for the parent itself). 

1088 collections : `CollectionWildcard` 

1089 Processed collection expression. 

1090 """ 

1091 composition: dict[DatasetType, list[str | None]] = {} 

1092 if datasets is not None: 

1093 if not collections: 

1094 if not self.defaults.collections: 

1095 raise NoDefaultCollectionError("No collections, and no registry default collections.") 

1096 collections = self.defaults.collections 

1097 else: 

1098 collections = CollectionWildcard.from_expression(collections) 

1099 if mode == "find_first" and collections.patterns: 

1100 raise TypeError( 

1101 f"Collection pattern(s) {collections.patterns} not allowed in this context." 

1102 ) 

1103 missing: list[str] = [] 

1104 composition = self._managers.datasets.resolve_wildcard( 

1105 datasets, components=components, missing=missing, explicit_only=(mode == "constrain") 

1106 ) 

1107 if missing and mode == "constrain": 

1108 # After v26 this should raise MissingDatasetTypeError, to be 

1109 # implemented on DM-36303. 

1110 warnings.warn( 

1111 f"Dataset type(s) {missing} are not registered; this will be an error after v26.", 

1112 FutureWarning, 

1113 ) 

1114 doomed_by.extend(f"Dataset type {name} is not registered." for name in missing) 

1115 elif collections: 

1116 raise ArgumentError(f"Cannot pass 'collections' (='{collections}') without 'datasets'.") 

1117 return composition, collections 

1118 

1119 def queryDatasets( 

1120 self, 

1121 datasetType: Any, 

1122 *, 

1123 collections: Any = None, 

1124 dimensions: Optional[Iterable[Union[Dimension, str]]] = None, 

1125 dataId: Optional[DataId] = None, 

1126 where: str = "", 

1127 findFirst: bool = False, 

1128 components: Optional[bool] = None, 

1129 bind: Optional[Mapping[str, Any]] = None, 

1130 check: bool = True, 

1131 **kwargs: Any, 

1132 ) -> queries.DatasetQueryResults: 

1133 # Docstring inherited from lsst.daf.butler.registry.Registry 

1134 doomed_by: list[str] = [] 

1135 data_id = self._standardize_query_data_id_args(dataId, doomed_by=doomed_by, **kwargs) 

1136 dataset_composition, collections = self._standardize_query_dataset_args( 

1137 datasetType, 

1138 collections, 

1139 components, 

1140 mode="find_first" if findFirst else "find_all", 

1141 doomed_by=doomed_by, 

1142 ) 

1143 parent_results: list[queries.ParentDatasetQueryResults] = [] 

1144 for parent_dataset_type, components_for_parent in dataset_composition.items(): 

1145 # The full set of dimensions in the query is the combination of 

1146 # those needed for the DatasetType and those explicitly requested, 

1147 # if any. 

1148 dimension_names = set(parent_dataset_type.dimensions.names) 

1149 if dimensions is not None: 

1150 dimension_names.update(self.dimensions.extract(dimensions).names) 

1151 # Construct the summary structure needed to construct a 

1152 # QueryBuilder. 

1153 summary = queries.QuerySummary( 

1154 requested=DimensionGraph(self.dimensions, names=dimension_names), 

1155 data_id=data_id, 

1156 expression=where, 

1157 bind=bind, 

1158 defaults=self.defaults.dataId, 

1159 check=check, 

1160 datasets=[parent_dataset_type], 

1161 ) 

1162 builder = self._makeQueryBuilder(summary) 

1163 # Add the dataset subquery to the query, telling the QueryBuilder 

1164 # to include the rank of the selected collection in the results 

1165 # only if we need to findFirst. Note that if any of the 

1166 # collections are actually wildcard expressions, and 

1167 # findFirst=True, this will raise TypeError for us. 

1168 builder.joinDataset(parent_dataset_type, collections, isResult=True, findFirst=findFirst) 

1169 query = builder.finish() 

1170 parent_results.append( 

1171 queries.ParentDatasetQueryResults( 

1172 query, parent_dataset_type, components=components_for_parent 

1173 ) 

1174 ) 

1175 if not parent_results: 

1176 doomed_by.extend( 

1177 f"No registered dataset type matching {t!r} found, so no matching datasets can " 

1178 "exist in any collection." 

1179 for t in ensure_iterable(datasetType) 

1180 ) 

1181 return queries.ChainedDatasetQueryResults([], doomed_by=doomed_by) 

1182 elif len(parent_results) == 1: 

1183 return parent_results[0] 

1184 else: 

1185 return queries.ChainedDatasetQueryResults(parent_results) 

1186 

1187 def queryDataIds( 

1188 self, 

1189 dimensions: Union[Iterable[Union[Dimension, str]], Dimension, str], 

1190 *, 

1191 dataId: Optional[DataId] = None, 

1192 datasets: Any = None, 

1193 collections: Any = None, 

1194 where: str = "", 

1195 components: Optional[bool] = None, 

1196 bind: Optional[Mapping[str, Any]] = None, 

1197 check: bool = True, 

1198 **kwargs: Any, 

1199 ) -> queries.DataCoordinateQueryResults: 

1200 # Docstring inherited from lsst.daf.butler.registry.Registry 

1201 dimensions = ensure_iterable(dimensions) 

1202 requestedDimensions = self.dimensions.extract(dimensions) 

1203 doomed_by: list[str] = [] 

1204 data_id = self._standardize_query_data_id_args(dataId, doomed_by=doomed_by, **kwargs) 

1205 dataset_composition, collections = self._standardize_query_dataset_args( 

1206 datasets, collections, components, doomed_by=doomed_by 

1207 ) 

1208 summary = queries.QuerySummary( 

1209 requested=requestedDimensions, 

1210 data_id=data_id, 

1211 expression=where, 

1212 bind=bind, 

1213 defaults=self.defaults.dataId, 

1214 check=check, 

1215 datasets=dataset_composition.keys(), 

1216 ) 

1217 builder = self._makeQueryBuilder(summary, doomed_by=doomed_by) 

1218 for datasetType in dataset_composition.keys(): 

1219 builder.joinDataset(datasetType, collections, isResult=False) 

1220 query = builder.finish() 

1221 

1222 return queries.DataCoordinateQueryResults(query) 

1223 

1224 def queryDimensionRecords( 

1225 self, 

1226 element: Union[DimensionElement, str], 

1227 *, 

1228 dataId: Optional[DataId] = None, 

1229 datasets: Any = None, 

1230 collections: Any = None, 

1231 where: str = "", 

1232 components: Optional[bool] = None, 

1233 bind: Optional[Mapping[str, Any]] = None, 

1234 check: bool = True, 

1235 **kwargs: Any, 

1236 ) -> queries.DimensionRecordQueryResults: 

1237 # Docstring inherited from lsst.daf.butler.registry.Registry 

1238 if not isinstance(element, DimensionElement): 

1239 try: 

1240 element = self.dimensions[element] 

1241 except KeyError as e: 

1242 raise DimensionNameError( 

1243 f"No such dimension '{element}', available dimensions: " 

1244 + str(self.dimensions.getStaticElements()) 

1245 ) from e 

1246 doomed_by: list[str] = [] 

1247 data_id = self._standardize_query_data_id_args(dataId, doomed_by=doomed_by, **kwargs) 

1248 dataset_composition, collections = self._standardize_query_dataset_args( 

1249 datasets, collections, components, doomed_by=doomed_by 

1250 ) 

1251 summary = queries.QuerySummary( 

1252 requested=element.graph, 

1253 data_id=data_id, 

1254 expression=where, 

1255 bind=bind, 

1256 defaults=self.defaults.dataId, 

1257 check=check, 

1258 datasets=dataset_composition.keys(), 

1259 ) 

1260 builder = self._makeQueryBuilder(summary, doomed_by=doomed_by) 

1261 for datasetType in dataset_composition.keys(): 

1262 builder.joinDataset(datasetType, collections, isResult=False) 

1263 query = builder.finish().with_record_columns(element) 

1264 return queries.DatabaseDimensionRecordQueryResults(query, element) 

1265 

1266 def queryDatasetAssociations( 

1267 self, 

1268 datasetType: Union[str, DatasetType], 

1269 collections: Any = ..., 

1270 *, 

1271 collectionTypes: Iterable[CollectionType] = CollectionType.all(), 

1272 flattenChains: bool = False, 

1273 ) -> Iterator[DatasetAssociation]: 

1274 # Docstring inherited from lsst.daf.butler.registry.Registry 

1275 if collections is None: 

1276 if not self.defaults.collections: 

1277 raise NoDefaultCollectionError( 

1278 "No collections provided to queryDatasetAssociations, " 

1279 "and no defaults from registry construction." 

1280 ) 

1281 collections = self.defaults.collections 

1282 collections = CollectionWildcard.from_expression(collections) 

1283 backend = queries.SqlQueryBackend(self._db, self._managers) 

1284 parent_dataset_type, _ = backend.resolve_single_dataset_type_wildcard(datasetType, components=False) 

1285 timespan_tag = DatasetColumnTag(parent_dataset_type.name, "timespan") 

1286 collection_tag = DatasetColumnTag(parent_dataset_type.name, "collection") 

1287 for parent_collection_record in backend.resolve_collection_wildcard( 

1288 collections, 

1289 collection_types=frozenset(collectionTypes), 

1290 flatten_chains=flattenChains, 

1291 ): 

1292 # Resolve this possibly-chained collection into a list of 

1293 # non-CHAINED collections that actually hold datasets of this 

1294 # type. 

1295 candidate_collection_records = backend.resolve_dataset_collections( 

1296 parent_dataset_type, 

1297 CollectionWildcard.from_names([parent_collection_record.name]), 

1298 allow_calibration_collections=True, 

1299 governor_constraints={}, 

1300 ) 

1301 if not candidate_collection_records: 

1302 continue 

1303 with backend.context() as context: 

1304 relation = backend.make_dataset_query_relation( 

1305 parent_dataset_type, 

1306 candidate_collection_records, 

1307 columns={"dataset_id", "run", "timespan", "collection"}, 

1308 context=context, 

1309 ) 

1310 reader = queries.DatasetRefReader( 

1311 parent_dataset_type, 

1312 translate_collection=lambda k: self._managers.collections[k].name, 

1313 full=False, 

1314 ) 

1315 for row in context.fetch_iterable(relation): 

1316 ref = reader.read(row) 

1317 collection_record = self._managers.collections[row[collection_tag]] 

1318 if collection_record.type is CollectionType.CALIBRATION: 

1319 timespan = row[timespan_tag] 

1320 else: 

1321 # For backwards compatibility and (possibly?) user 

1322 # convenience we continue to define the timespan of a 

1323 # DatasetAssociation row for a non-CALIBRATION 

1324 # collection to be None rather than a fully unbounded 

1325 # timespan. 

1326 timespan = None 

1327 yield DatasetAssociation(ref=ref, collection=collection_record.name, timespan=timespan) 

1328 

1329 @property 

1330 def obsCoreTableManager(self) -> ObsCoreTableManager | None: 

1331 # Docstring inherited from lsst.daf.butler.registry.Registry 

1332 return self._managers.obscore 

1333 

1334 storageClasses: StorageClassFactory 

1335 """All storage classes known to the registry (`StorageClassFactory`). 

1336 """