Coverage for python/lsst/daf/butler/registries/sql.py: 12%

508 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2023-02-05 02:04 -0800

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ("SqlRegistry",) 

25 

26import contextlib 

27import logging 

28import warnings 

29from typing import ( 

30 TYPE_CHECKING, 

31 Any, 

32 Dict, 

33 Iterable, 

34 Iterator, 

35 List, 

36 Literal, 

37 Mapping, 

38 Optional, 

39 Sequence, 

40 Set, 

41 Union, 

42 cast, 

43) 

44 

45import sqlalchemy 

46from lsst.daf.relation import LeafRelation, Relation 

47from lsst.resources import ResourcePathExpression 

48from lsst.utils.iteration import ensure_iterable 

49 

50from ..core import ( 

51 Config, 

52 DataCoordinate, 

53 DataId, 

54 DatasetAssociation, 

55 DatasetColumnTag, 

56 DatasetId, 

57 DatasetRef, 

58 DatasetType, 

59 Dimension, 

60 DimensionConfig, 

61 DimensionElement, 

62 DimensionGraph, 

63 DimensionRecord, 

64 DimensionUniverse, 

65 NamedKeyMapping, 

66 NameLookupMapping, 

67 Progress, 

68 StorageClassFactory, 

69 Timespan, 

70 ddl, 

71) 

72from ..core.utils import transactional 

73from ..registry import ( 

74 ArgumentError, 

75 CollectionExpressionError, 

76 CollectionSummary, 

77 CollectionType, 

78 CollectionTypeError, 

79 ConflictingDefinitionError, 

80 DataIdValueError, 

81 DatasetTypeError, 

82 DimensionNameError, 

83 InconsistentDataIdError, 

84 NoDefaultCollectionError, 

85 OrphanedRecordError, 

86 Registry, 

87 RegistryConfig, 

88 RegistryDefaults, 

89 queries, 

90) 

91from ..registry.interfaces import ChainedCollectionRecord, DatasetIdFactory, DatasetIdGenEnum, RunRecord 

92from ..registry.managers import RegistryManagerInstances, RegistryManagerTypes 

93from ..registry.wildcards import CollectionWildcard, DatasetTypeWildcard 

94 

95if TYPE_CHECKING: 95 ↛ 96line 95 didn't jump to line 96, because the condition on line 95 was never true

96 from .._butlerConfig import ButlerConfig 

97 from ..registry.interfaces import CollectionRecord, Database, DatastoreRegistryBridgeManager 

98 

99 

100_LOG = logging.getLogger(__name__) 

101 

102 

103class SqlRegistry(Registry): 

104 """Registry implementation based on SQLAlchemy. 

105 

106 Parameters 

107 ---------- 

108 database : `Database` 

109 Database instance to store Registry. 

110 defaults : `RegistryDefaults` 

111 Default collection search path and/or output `~CollectionType.RUN` 

112 collection. 

113 managers : `RegistryManagerInstances` 

114 All the managers required for this registry. 

115 """ 

116 

117 defaultConfigFile: Optional[str] = None 

118 """Path to configuration defaults. Accessed within the ``configs`` resource 

119 or relative to a search path. Can be None if no defaults specified. 

120 """ 

121 

122 @classmethod 

123 def createFromConfig( 

124 cls, 

125 config: Optional[Union[RegistryConfig, str]] = None, 

126 dimensionConfig: Optional[Union[DimensionConfig, str]] = None, 

127 butlerRoot: Optional[ResourcePathExpression] = None, 

128 ) -> Registry: 

129 """Create registry database and return `SqlRegistry` instance. 

130 

131 This method initializes database contents, database must be empty 

132 prior to calling this method. 

133 

134 Parameters 

135 ---------- 

136 config : `RegistryConfig` or `str`, optional 

137 Registry configuration, if missing then default configuration will 

138 be loaded from registry.yaml. 

139 dimensionConfig : `DimensionConfig` or `str`, optional 

140 Dimensions configuration, if missing then default configuration 

141 will be loaded from dimensions.yaml. 

142 butlerRoot : convertible to `lsst.resources.ResourcePath`, optional 

143 Path to the repository root this `SqlRegistry` will manage. 

144 

145 Returns 

146 ------- 

147 registry : `SqlRegistry` 

148 A new `SqlRegistry` instance. 

149 """ 

150 config = cls.forceRegistryConfig(config) 

151 config.replaceRoot(butlerRoot) 

152 

153 if isinstance(dimensionConfig, str): 

154 dimensionConfig = DimensionConfig(dimensionConfig) 

155 elif dimensionConfig is None: 

156 dimensionConfig = DimensionConfig() 

157 elif not isinstance(dimensionConfig, DimensionConfig): 

158 raise TypeError(f"Incompatible Dimension configuration type: {type(dimensionConfig)}") 

159 

160 DatabaseClass = config.getDatabaseClass() 

161 database = DatabaseClass.fromUri( 

162 str(config.connectionString), origin=config.get("origin", 0), namespace=config.get("namespace") 

163 ) 

164 managerTypes = RegistryManagerTypes.fromConfig(config) 

165 managers = managerTypes.makeRepo(database, dimensionConfig) 

166 return cls(database, RegistryDefaults(), managers) 

167 

168 @classmethod 

169 def fromConfig( 

170 cls, 

171 config: Union[ButlerConfig, RegistryConfig, Config, str], 

172 butlerRoot: Optional[ResourcePathExpression] = None, 

173 writeable: bool = True, 

174 defaults: Optional[RegistryDefaults] = None, 

175 ) -> Registry: 

176 """Create `Registry` subclass instance from `config`. 

177 

178 Registry database must be initialized prior to calling this method. 

179 

180 Parameters 

181 ---------- 

182 config : `ButlerConfig`, `RegistryConfig`, `Config` or `str` 

183 Registry configuration 

184 butlerRoot : `lsst.resources.ResourcePathExpression`, optional 

185 Path to the repository root this `Registry` will manage. 

186 writeable : `bool`, optional 

187 If `True` (default) create a read-write connection to the database. 

188 defaults : `RegistryDefaults`, optional 

189 Default collection search path and/or output `~CollectionType.RUN` 

190 collection. 

191 

192 Returns 

193 ------- 

194 registry : `SqlRegistry` (subclass) 

195 A new `SqlRegistry` subclass instance. 

196 """ 

197 config = cls.forceRegistryConfig(config) 

198 config.replaceRoot(butlerRoot) 

199 DatabaseClass = config.getDatabaseClass() 

200 database = DatabaseClass.fromUri( 

201 str(config.connectionString), 

202 origin=config.get("origin", 0), 

203 namespace=config.get("namespace"), 

204 writeable=writeable, 

205 ) 

206 managerTypes = RegistryManagerTypes.fromConfig(config) 

207 with database.session(): 

208 managers = managerTypes.loadRepo(database) 

209 if defaults is None: 

210 defaults = RegistryDefaults() 

211 return cls(database, defaults, managers) 

212 

213 def __init__(self, database: Database, defaults: RegistryDefaults, managers: RegistryManagerInstances): 

214 self._db = database 

215 self._managers = managers 

216 self.storageClasses = StorageClassFactory() 

217 # Intentionally invoke property setter to initialize defaults. This 

218 # can only be done after most of the rest of Registry has already been 

219 # initialized, and must be done before the property getter is used. 

220 self.defaults = defaults 

221 # In the future DatasetIdFactory may become configurable and this 

222 # instance will need to be shared with datasets manager. 

223 self.datasetIdFactory = DatasetIdFactory() 

224 

225 def __str__(self) -> str: 

226 return str(self._db) 

227 

228 def __repr__(self) -> str: 

229 return f"SqlRegistry({self._db!r}, {self.dimensions!r})" 

230 

231 def isWriteable(self) -> bool: 

232 # Docstring inherited from lsst.daf.butler.registry.Registry 

233 return self._db.isWriteable() 

234 

235 def copy(self, defaults: Optional[RegistryDefaults] = None) -> Registry: 

236 # Docstring inherited from lsst.daf.butler.registry.Registry 

237 if defaults is None: 

238 # No need to copy, because `RegistryDefaults` is immutable; we 

239 # effectively copy on write. 

240 defaults = self.defaults 

241 return type(self)(self._db, defaults, self._managers) 

242 

243 @property 

244 def dimensions(self) -> DimensionUniverse: 

245 # Docstring inherited from lsst.daf.butler.registry.Registry 

246 return self._managers.dimensions.universe 

247 

248 def refresh(self) -> None: 

249 # Docstring inherited from lsst.daf.butler.registry.Registry 

250 with self._db.transaction(): 

251 self._managers.refresh() 

252 

253 @contextlib.contextmanager 

254 def transaction(self, *, savepoint: bool = False) -> Iterator[None]: 

255 # Docstring inherited from lsst.daf.butler.registry.Registry 

256 try: 

257 with self._db.transaction(savepoint=savepoint): 

258 yield 

259 except BaseException: 

260 # TODO: this clears the caches sometimes when we wouldn't actually 

261 # need to. Can we avoid that? 

262 self._managers.dimensions.clearCaches() 

263 raise 

264 

265 def resetConnectionPool(self) -> None: 

266 """Reset SQLAlchemy connection pool for `SqlRegistry` database. 

267 

268 This operation is useful when using registry with fork-based 

269 multiprocessing. To use registry across fork boundary one has to make 

270 sure that there are no currently active connections (no session or 

271 transaction is in progress) and connection pool is reset using this 

272 method. This method should be called by the child process immediately 

273 after the fork. 

274 """ 

275 self._db._engine.dispose() 

276 

277 def registerOpaqueTable(self, tableName: str, spec: ddl.TableSpec) -> None: 

278 """Add an opaque (to the `Registry`) table for use by a `Datastore` or 

279 other data repository client. 

280 

281 Opaque table records can be added via `insertOpaqueData`, retrieved via 

282 `fetchOpaqueData`, and removed via `deleteOpaqueData`. 

283 

284 Parameters 

285 ---------- 

286 tableName : `str` 

287 Logical name of the opaque table. This may differ from the 

288 actual name used in the database by a prefix and/or suffix. 

289 spec : `ddl.TableSpec` 

290 Specification for the table to be added. 

291 """ 

292 self._managers.opaque.register(tableName, spec) 

293 

294 @transactional 

295 def insertOpaqueData(self, tableName: str, *data: dict) -> None: 

296 """Insert records into an opaque table. 

297 

298 Parameters 

299 ---------- 

300 tableName : `str` 

301 Logical name of the opaque table. Must match the name used in a 

302 previous call to `registerOpaqueTable`. 

303 data 

304 Each additional positional argument is a dictionary that represents 

305 a single row to be added. 

306 """ 

307 self._managers.opaque[tableName].insert(*data) 

308 

309 def fetchOpaqueData(self, tableName: str, **where: Any) -> Iterator[dict]: 

310 """Retrieve records from an opaque table. 

311 

312 Parameters 

313 ---------- 

314 tableName : `str` 

315 Logical name of the opaque table. Must match the name used in a 

316 previous call to `registerOpaqueTable`. 

317 where 

318 Additional keyword arguments are interpreted as equality 

319 constraints that restrict the returned rows (combined with AND); 

320 keyword arguments are column names and values are the values they 

321 must have. 

322 

323 Yields 

324 ------ 

325 row : `dict` 

326 A dictionary representing a single result row. 

327 """ 

328 yield from self._managers.opaque[tableName].fetch(**where) 

329 

330 @transactional 

331 def deleteOpaqueData(self, tableName: str, **where: Any) -> None: 

332 """Remove records from an opaque table. 

333 

334 Parameters 

335 ---------- 

336 tableName : `str` 

337 Logical name of the opaque table. Must match the name used in a 

338 previous call to `registerOpaqueTable`. 

339 where 

340 Additional keyword arguments are interpreted as equality 

341 constraints that restrict the deleted rows (combined with AND); 

342 keyword arguments are column names and values are the values they 

343 must have. 

344 """ 

345 self._managers.opaque[tableName].delete(where.keys(), where) 

346 

347 def registerCollection( 

348 self, name: str, type: CollectionType = CollectionType.TAGGED, doc: Optional[str] = None 

349 ) -> bool: 

350 # Docstring inherited from lsst.daf.butler.registry.Registry 

351 _, registered = self._managers.collections.register(name, type, doc=doc) 

352 return registered 

353 

354 def getCollectionType(self, name: str) -> CollectionType: 

355 # Docstring inherited from lsst.daf.butler.registry.Registry 

356 return self._managers.collections.find(name).type 

357 

358 def _get_collection_record(self, name: str) -> CollectionRecord: 

359 # Docstring inherited from lsst.daf.butler.registry.Registry 

360 return self._managers.collections.find(name) 

361 

362 def registerRun(self, name: str, doc: Optional[str] = None) -> bool: 

363 # Docstring inherited from lsst.daf.butler.registry.Registry 

364 _, registered = self._managers.collections.register(name, CollectionType.RUN, doc=doc) 

365 return registered 

366 

367 @transactional 

368 def removeCollection(self, name: str) -> None: 

369 # Docstring inherited from lsst.daf.butler.registry.Registry 

370 self._managers.collections.remove(name) 

371 

372 def getCollectionChain(self, parent: str) -> tuple[str, ...]: 

373 # Docstring inherited from lsst.daf.butler.registry.Registry 

374 record = self._managers.collections.find(parent) 

375 if record.type is not CollectionType.CHAINED: 

376 raise CollectionTypeError(f"Collection '{parent}' has type {record.type.name}, not CHAINED.") 

377 assert isinstance(record, ChainedCollectionRecord) 

378 return record.children 

379 

380 @transactional 

381 def setCollectionChain(self, parent: str, children: Any, *, flatten: bool = False) -> None: 

382 # Docstring inherited from lsst.daf.butler.registry.Registry 

383 record = self._managers.collections.find(parent) 

384 if record.type is not CollectionType.CHAINED: 

385 raise CollectionTypeError(f"Collection '{parent}' has type {record.type.name}, not CHAINED.") 

386 assert isinstance(record, ChainedCollectionRecord) 

387 children = CollectionWildcard.from_expression(children).require_ordered() 

388 if children != record.children or flatten: 

389 record.update(self._managers.collections, children, flatten=flatten) 

390 

391 def getCollectionParentChains(self, collection: str) -> Set[str]: 

392 # Docstring inherited from lsst.daf.butler.registry.Registry 

393 return { 

394 record.name 

395 for record in self._managers.collections.getParentChains( 

396 self._managers.collections.find(collection).key 

397 ) 

398 } 

399 

400 def getCollectionDocumentation(self, collection: str) -> Optional[str]: 

401 # Docstring inherited from lsst.daf.butler.registry.Registry 

402 return self._managers.collections.getDocumentation(self._managers.collections.find(collection).key) 

403 

404 def setCollectionDocumentation(self, collection: str, doc: Optional[str]) -> None: 

405 # Docstring inherited from lsst.daf.butler.registry.Registry 

406 self._managers.collections.setDocumentation(self._managers.collections.find(collection).key, doc) 

407 

408 def getCollectionSummary(self, collection: str) -> CollectionSummary: 

409 # Docstring inherited from lsst.daf.butler.registry.Registry 

410 record = self._managers.collections.find(collection) 

411 return self._managers.datasets.getCollectionSummary(record) 

412 

413 def registerDatasetType(self, datasetType: DatasetType) -> bool: 

414 # Docstring inherited from lsst.daf.butler.registry.Registry 

415 _, inserted = self._managers.datasets.register(datasetType) 

416 return inserted 

417 

418 def removeDatasetType(self, name: str) -> None: 

419 # Docstring inherited from lsst.daf.butler.registry.Registry 

420 self._managers.datasets.remove(name) 

421 

422 def getDatasetType(self, name: str) -> DatasetType: 

423 # Docstring inherited from lsst.daf.butler.registry.Registry 

424 parent_name, component = DatasetType.splitDatasetTypeName(name) 

425 storage = self._managers.datasets[parent_name] 

426 if component is None: 

427 return storage.datasetType 

428 else: 

429 return storage.datasetType.makeComponentDatasetType(component) 

430 

431 def supportsIdGenerationMode(self, mode: DatasetIdGenEnum) -> bool: 

432 # Docstring inherited from lsst.daf.butler.registry.Registry 

433 return self._managers.datasets.supportsIdGenerationMode(mode) 

434 

435 def findDataset( 

436 self, 

437 datasetType: Union[DatasetType, str], 

438 dataId: Optional[DataId] = None, 

439 *, 

440 collections: Any = None, 

441 timespan: Optional[Timespan] = None, 

442 **kwargs: Any, 

443 ) -> Optional[DatasetRef]: 

444 # Docstring inherited from lsst.daf.butler.registry.Registry 

445 if collections is None: 

446 if not self.defaults.collections: 

447 raise NoDefaultCollectionError( 

448 "No collections provided to findDataset, and no defaults from registry construction." 

449 ) 

450 collections = self.defaults.collections 

451 backend = queries.SqlQueryBackend(self._db, self._managers) 

452 collection_wildcard = CollectionWildcard.from_expression(collections, require_ordered=True) 

453 matched_collections = backend.resolve_collection_wildcard(collection_wildcard) 

454 parent_dataset_type, components = backend.resolve_single_dataset_type_wildcard( 

455 datasetType, components_deprecated=False 

456 ) 

457 if len(components) > 1: 

458 raise DatasetTypeError( 

459 f"findDataset requires exactly one dataset type; got multiple components {components} " 

460 f"for parent dataset type {parent_dataset_type.name}." 

461 ) 

462 component = components[0] 

463 dataId = DataCoordinate.standardize( 

464 dataId, 

465 graph=parent_dataset_type.dimensions, 

466 universe=self.dimensions, 

467 defaults=self.defaults.dataId, 

468 **kwargs, 

469 ) 

470 governor_constraints = {name: {cast(str, dataId[name])} for name in dataId.graph.governors.names} 

471 (filtered_collections,) = backend.filter_dataset_collections( 

472 [parent_dataset_type], 

473 matched_collections, 

474 governor_constraints=governor_constraints, 

475 ).values() 

476 if not filtered_collections: 

477 return None 

478 tail_collections: list[CollectionRecord] = [] 

479 if timespan is None: 

480 for n, collection_record in enumerate(filtered_collections): 

481 if collection_record.type is CollectionType.CALIBRATION: 

482 tail_collections.extend(filtered_collections[n:]) 

483 del filtered_collections[n:] 

484 break 

485 if filtered_collections: 

486 requested_columns = {"dataset_id", "run", "collection"} 

487 with backend.context() as context: 

488 predicate = context.make_data_coordinate_predicate( 

489 dataId.subset(parent_dataset_type.dimensions), full=False 

490 ) 

491 if timespan is not None: 

492 requested_columns.add("timespan") 

493 predicate = predicate.logical_and( 

494 context.make_timespan_overlap_predicate( 

495 DatasetColumnTag(parent_dataset_type.name, "timespan"), timespan 

496 ) 

497 ) 

498 relation = backend.make_dataset_query_relation( 

499 parent_dataset_type, filtered_collections, requested_columns, context 

500 ).with_rows_satisfying(predicate) 

501 rows = list(context.fetch_iterable(relation)) 

502 else: 

503 rows = [] 

504 if not rows: 

505 if tail_collections: 

506 msg = ( 

507 f"Cannot search for dataset '{parent_dataset_type.name}' in CALIBRATION collection " 

508 f"{tail_collections[0].name} without an input timespan." 

509 ) 

510 if len(tail_collections) > 1: 

511 remainder_names = [", ".join(c.name for c in tail_collections[1:])] 

512 msg += f" This also blocks searching collections [{remainder_names}] that follow it." 

513 raise TypeError(msg) 

514 return None 

515 elif len(rows) == 1: 

516 best_row = rows[0] 

517 else: 

518 rank_by_collection_key = {record.key: n for n, record in enumerate(filtered_collections)} 

519 collection_tag = DatasetColumnTag(parent_dataset_type.name, "collection") 

520 row_iter = iter(rows) 

521 best_row = next(row_iter) 

522 best_rank = rank_by_collection_key[best_row[collection_tag]] 

523 have_tie = False 

524 for row in row_iter: 

525 if (rank := rank_by_collection_key[row[collection_tag]]) < best_rank: 

526 best_row = row 

527 best_rank = rank 

528 have_tie = False 

529 elif rank == best_rank: 

530 have_tie = True 

531 assert timespan is not None, "Rank ties should be impossible given DB constraints." 

532 if have_tie: 

533 raise LookupError( 

534 f"Ambiguous calibration lookup for {parent_dataset_type.name} in collections " 

535 f"{collection_wildcard.strings} with timespan {timespan}." 

536 ) 

537 reader = queries.DatasetRefReader( 

538 parent_dataset_type, 

539 translate_collection=lambda k: self._managers.collections[k].name, 

540 ) 

541 ref = reader.read(best_row, data_id=dataId) 

542 if component is not None: 

543 ref = ref.makeComponentRef(component) 

544 return ref 

545 

546 @transactional 

547 def insertDatasets( 

548 self, 

549 datasetType: Union[DatasetType, str], 

550 dataIds: Iterable[DataId], 

551 run: Optional[str] = None, 

552 expand: bool = True, 

553 idGenerationMode: DatasetIdGenEnum = DatasetIdGenEnum.UNIQUE, 

554 ) -> List[DatasetRef]: 

555 # Docstring inherited from lsst.daf.butler.registry.Registry 

556 if isinstance(datasetType, DatasetType): 

557 storage = self._managers.datasets.find(datasetType.name) 

558 if storage is None: 

559 raise DatasetTypeError(f"DatasetType '{datasetType}' has not been registered.") 

560 else: 

561 storage = self._managers.datasets.find(datasetType) 

562 if storage is None: 

563 raise DatasetTypeError(f"DatasetType with name '{datasetType}' has not been registered.") 

564 if run is None: 

565 if self.defaults.run is None: 

566 raise NoDefaultCollectionError( 

567 "No run provided to insertDatasets, and no default from registry construction." 

568 ) 

569 run = self.defaults.run 

570 runRecord = self._managers.collections.find(run) 

571 if runRecord.type is not CollectionType.RUN: 

572 raise CollectionTypeError( 

573 f"Given collection is of type {runRecord.type.name}; RUN collection required." 

574 ) 

575 assert isinstance(runRecord, RunRecord) 

576 progress = Progress("daf.butler.Registry.insertDatasets", level=logging.DEBUG) 

577 if expand: 

578 expandedDataIds = [ 

579 self.expandDataId(dataId, graph=storage.datasetType.dimensions) 

580 for dataId in progress.wrap(dataIds, f"Expanding {storage.datasetType.name} data IDs") 

581 ] 

582 else: 

583 expandedDataIds = [ 

584 DataCoordinate.standardize(dataId, graph=storage.datasetType.dimensions) for dataId in dataIds 

585 ] 

586 try: 

587 refs = list(storage.insert(runRecord, expandedDataIds, idGenerationMode)) 

588 if self._managers.obscore: 

589 context = queries.SqlQueryContext(self._db, self._managers.column_types) 

590 self._managers.obscore.add_datasets(refs, context) 

591 except sqlalchemy.exc.IntegrityError as err: 

592 raise ConflictingDefinitionError( 

593 "A database constraint failure was triggered by inserting " 

594 f"one or more datasets of type {storage.datasetType} into " 

595 f"collection '{run}'. " 

596 "This probably means a dataset with the same data ID " 

597 "and dataset type already exists, but it may also mean a " 

598 "dimension row is missing." 

599 ) from err 

600 return refs 

601 

602 @transactional 

603 def _importDatasets( 

604 self, 

605 datasets: Iterable[DatasetRef], 

606 expand: bool = True, 

607 idGenerationMode: DatasetIdGenEnum = DatasetIdGenEnum.UNIQUE, 

608 reuseIds: bool = False, 

609 ) -> List[DatasetRef]: 

610 # Docstring inherited from lsst.daf.butler.registry.Registry 

611 datasets = list(datasets) 

612 if not datasets: 

613 # nothing to do 

614 return [] 

615 

616 # find dataset type 

617 datasetTypes = set(dataset.datasetType for dataset in datasets) 

618 if len(datasetTypes) != 1: 

619 raise DatasetTypeError(f"Multiple dataset types in input datasets: {datasetTypes}") 

620 datasetType = datasetTypes.pop() 

621 

622 # get storage handler for this dataset type 

623 storage = self._managers.datasets.find(datasetType.name) 

624 if storage is None: 

625 raise DatasetTypeError(f"DatasetType '{datasetType}' has not been registered.") 

626 

627 # find run name 

628 runs = set(dataset.run for dataset in datasets) 

629 if len(runs) != 1: 

630 raise ValueError(f"Multiple run names in input datasets: {runs}") 

631 run = runs.pop() 

632 if run is None: 

633 if self.defaults.run is None: 

634 raise NoDefaultCollectionError( 

635 "No run provided to ingestDatasets, and no default from registry construction." 

636 ) 

637 run = self.defaults.run 

638 

639 runRecord = self._managers.collections.find(run) 

640 if runRecord.type is not CollectionType.RUN: 

641 raise CollectionTypeError( 

642 f"Given collection '{runRecord.name}' is of type {runRecord.type.name};" 

643 " RUN collection required." 

644 ) 

645 assert isinstance(runRecord, RunRecord) 

646 

647 progress = Progress("daf.butler.Registry.insertDatasets", level=logging.DEBUG) 

648 if expand: 

649 expandedDatasets = [ 

650 dataset.expanded(self.expandDataId(dataset.dataId, graph=storage.datasetType.dimensions)) 

651 for dataset in progress.wrap(datasets, f"Expanding {storage.datasetType.name} data IDs") 

652 ] 

653 else: 

654 expandedDatasets = [ 

655 DatasetRef(datasetType, dataset.dataId, id=dataset.id, run=dataset.run, conform=True) 

656 for dataset in datasets 

657 ] 

658 

659 try: 

660 refs = list(storage.import_(runRecord, expandedDatasets, idGenerationMode, reuseIds)) 

661 if self._managers.obscore: 

662 context = queries.SqlQueryContext(self._db, self._managers.column_types) 

663 self._managers.obscore.add_datasets(refs, context) 

664 except sqlalchemy.exc.IntegrityError as err: 

665 raise ConflictingDefinitionError( 

666 "A database constraint failure was triggered by inserting " 

667 f"one or more datasets of type {storage.datasetType} into " 

668 f"collection '{run}'. " 

669 "This probably means a dataset with the same data ID " 

670 "and dataset type already exists, but it may also mean a " 

671 "dimension row is missing." 

672 ) from err 

673 return refs 

674 

675 def getDataset(self, id: DatasetId) -> Optional[DatasetRef]: 

676 # Docstring inherited from lsst.daf.butler.registry.Registry 

677 return self._managers.datasets.getDatasetRef(id) 

678 

679 @transactional 

680 def removeDatasets(self, refs: Iterable[DatasetRef]) -> None: 

681 # Docstring inherited from lsst.daf.butler.registry.Registry 

682 progress = Progress("lsst.daf.butler.Registry.removeDatasets", level=logging.DEBUG) 

683 for datasetType, refsForType in progress.iter_item_chunks( 

684 DatasetRef.groupByType(refs).items(), desc="Removing datasets by type" 

685 ): 

686 storage = self._managers.datasets[datasetType.name] 

687 try: 

688 storage.delete(refsForType) 

689 except sqlalchemy.exc.IntegrityError as err: 

690 raise OrphanedRecordError( 

691 "One or more datasets is still present in one or more Datastores." 

692 ) from err 

693 

694 @transactional 

695 def associate(self, collection: str, refs: Iterable[DatasetRef]) -> None: 

696 # Docstring inherited from lsst.daf.butler.registry.Registry 

697 progress = Progress("lsst.daf.butler.Registry.associate", level=logging.DEBUG) 

698 collectionRecord = self._managers.collections.find(collection) 

699 if collectionRecord.type is not CollectionType.TAGGED: 

700 raise CollectionTypeError( 

701 f"Collection '{collection}' has type {collectionRecord.type.name}, not TAGGED." 

702 ) 

703 for datasetType, refsForType in progress.iter_item_chunks( 

704 DatasetRef.groupByType(refs).items(), desc="Associating datasets by type" 

705 ): 

706 storage = self._managers.datasets[datasetType.name] 

707 try: 

708 storage.associate(collectionRecord, refsForType) 

709 if self._managers.obscore: 

710 # If a TAGGED collection is being monitored by ObsCore 

711 # manager then we may need to save the dataset. 

712 context = queries.SqlQueryContext(self._db, self._managers.column_types) 

713 self._managers.obscore.associate(refsForType, collectionRecord, context) 

714 except sqlalchemy.exc.IntegrityError as err: 

715 raise ConflictingDefinitionError( 

716 f"Constraint violation while associating dataset of type {datasetType.name} with " 

717 f"collection {collection}. This probably means that one or more datasets with the same " 

718 "dataset type and data ID already exist in the collection, but it may also indicate " 

719 "that the datasets do not exist." 

720 ) from err 

721 

722 @transactional 

723 def disassociate(self, collection: str, refs: Iterable[DatasetRef]) -> None: 

724 # Docstring inherited from lsst.daf.butler.registry.Registry 

725 progress = Progress("lsst.daf.butler.Registry.disassociate", level=logging.DEBUG) 

726 collectionRecord = self._managers.collections.find(collection) 

727 if collectionRecord.type is not CollectionType.TAGGED: 

728 raise CollectionTypeError( 

729 f"Collection '{collection}' has type {collectionRecord.type.name}; expected TAGGED." 

730 ) 

731 for datasetType, refsForType in progress.iter_item_chunks( 

732 DatasetRef.groupByType(refs).items(), desc="Disassociating datasets by type" 

733 ): 

734 storage = self._managers.datasets[datasetType.name] 

735 storage.disassociate(collectionRecord, refsForType) 

736 if self._managers.obscore: 

737 self._managers.obscore.disassociate(refsForType, collectionRecord) 

738 

739 @transactional 

740 def certify(self, collection: str, refs: Iterable[DatasetRef], timespan: Timespan) -> None: 

741 # Docstring inherited from lsst.daf.butler.registry.Registry 

742 progress = Progress("lsst.daf.butler.Registry.certify", level=logging.DEBUG) 

743 collectionRecord = self._managers.collections.find(collection) 

744 for datasetType, refsForType in progress.iter_item_chunks( 

745 DatasetRef.groupByType(refs).items(), desc="Certifying datasets by type" 

746 ): 

747 storage = self._managers.datasets[datasetType.name] 

748 storage.certify( 

749 collectionRecord, 

750 refsForType, 

751 timespan, 

752 context=queries.SqlQueryContext(self._db, self._managers.column_types), 

753 ) 

754 

755 @transactional 

756 def decertify( 

757 self, 

758 collection: str, 

759 datasetType: Union[str, DatasetType], 

760 timespan: Timespan, 

761 *, 

762 dataIds: Optional[Iterable[DataId]] = None, 

763 ) -> None: 

764 # Docstring inherited from lsst.daf.butler.registry.Registry 

765 collectionRecord = self._managers.collections.find(collection) 

766 if isinstance(datasetType, str): 

767 storage = self._managers.datasets[datasetType] 

768 else: 

769 storage = self._managers.datasets[datasetType.name] 

770 standardizedDataIds = None 

771 if dataIds is not None: 

772 standardizedDataIds = [ 

773 DataCoordinate.standardize(d, graph=storage.datasetType.dimensions) for d in dataIds 

774 ] 

775 storage.decertify( 

776 collectionRecord, 

777 timespan, 

778 dataIds=standardizedDataIds, 

779 context=queries.SqlQueryContext(self._db, self._managers.column_types), 

780 ) 

781 

782 def getDatastoreBridgeManager(self) -> DatastoreRegistryBridgeManager: 

783 """Return an object that allows a new `Datastore` instance to 

784 communicate with this `Registry`. 

785 

786 Returns 

787 ------- 

788 manager : `DatastoreRegistryBridgeManager` 

789 Object that mediates communication between this `Registry` and its 

790 associated datastores. 

791 """ 

792 return self._managers.datastores 

793 

794 def getDatasetLocations(self, ref: DatasetRef) -> Iterable[str]: 

795 # Docstring inherited from lsst.daf.butler.registry.Registry 

796 return self._managers.datastores.findDatastores(ref) 

797 

798 def expandDataId( 

799 self, 

800 dataId: Optional[DataId] = None, 

801 *, 

802 graph: Optional[DimensionGraph] = None, 

803 records: Optional[NameLookupMapping[DimensionElement, Optional[DimensionRecord]]] = None, 

804 withDefaults: bool = True, 

805 **kwargs: Any, 

806 ) -> DataCoordinate: 

807 # Docstring inherited from lsst.daf.butler.registry.Registry 

808 if not withDefaults: 

809 defaults = None 

810 else: 

811 defaults = self.defaults.dataId 

812 try: 

813 standardized = DataCoordinate.standardize( 

814 dataId, graph=graph, universe=self.dimensions, defaults=defaults, **kwargs 

815 ) 

816 except KeyError as exc: 

817 # This means either kwargs have some odd name or required 

818 # dimension is missing. 

819 raise DimensionNameError(str(exc)) from exc 

820 if standardized.hasRecords(): 

821 return standardized 

822 if records is None: 

823 records = {} 

824 elif isinstance(records, NamedKeyMapping): 

825 records = records.byName() 

826 else: 

827 records = dict(records) 

828 if isinstance(dataId, DataCoordinate) and dataId.hasRecords(): 

829 records.update(dataId.records.byName()) 

830 keys = standardized.byName() 

831 context = queries.SqlQueryContext(self._db, self._managers.column_types) 

832 for element in standardized.graph.primaryKeyTraversalOrder: 

833 record = records.get(element.name, ...) # Use ... to mean not found; None might mean NULL 

834 if record is ...: 

835 if isinstance(element, Dimension) and keys.get(element.name) is None: 

836 if element in standardized.graph.required: 

837 raise DimensionNameError( 

838 f"No value or null value for required dimension {element.name}." 

839 ) 

840 keys[element.name] = None 

841 record = None 

842 else: 

843 storage = self._managers.dimensions[element] 

844 record = storage.fetch_one(DataCoordinate.standardize(keys, graph=element.graph), context) 

845 records[element.name] = record 

846 if record is not None: 

847 for d in element.implied: 

848 value = getattr(record, d.name) 

849 if keys.setdefault(d.name, value) != value: 

850 raise InconsistentDataIdError( 

851 f"Data ID {standardized} has {d.name}={keys[d.name]!r}, " 

852 f"but {element.name} implies {d.name}={value!r}." 

853 ) 

854 else: 

855 if element in standardized.graph.required: 

856 raise DataIdValueError( 

857 f"Could not fetch record for required dimension {element.name} via keys {keys}." 

858 ) 

859 if element.alwaysJoin: 

860 raise InconsistentDataIdError( 

861 f"Could not fetch record for element {element.name} via keys {keys}, ", 

862 "but it is marked alwaysJoin=True; this means one or more dimensions are not " 

863 "related.", 

864 ) 

865 for d in element.implied: 

866 keys.setdefault(d.name, None) 

867 records.setdefault(d.name, None) 

868 return DataCoordinate.standardize(keys, graph=standardized.graph).expanded(records=records) 

869 

870 def insertDimensionData( 

871 self, 

872 element: Union[DimensionElement, str], 

873 *data: Union[Mapping[str, Any], DimensionRecord], 

874 conform: bool = True, 

875 replace: bool = False, 

876 skip_existing: bool = False, 

877 ) -> None: 

878 # Docstring inherited from lsst.daf.butler.registry.Registry 

879 if conform: 

880 if isinstance(element, str): 

881 element = self.dimensions[element] 

882 records = [ 

883 row if isinstance(row, DimensionRecord) else element.RecordClass(**row) for row in data 

884 ] 

885 else: 

886 # Ignore typing since caller said to trust them with conform=False. 

887 records = data # type: ignore 

888 storage = self._managers.dimensions[element] 

889 storage.insert(*records, replace=replace, skip_existing=skip_existing) 

890 

891 def syncDimensionData( 

892 self, 

893 element: Union[DimensionElement, str], 

894 row: Union[Mapping[str, Any], DimensionRecord], 

895 conform: bool = True, 

896 update: bool = False, 

897 ) -> Union[bool, Dict[str, Any]]: 

898 # Docstring inherited from lsst.daf.butler.registry.Registry 

899 if conform: 

900 if isinstance(element, str): 

901 element = self.dimensions[element] 

902 record = row if isinstance(row, DimensionRecord) else element.RecordClass(**row) 

903 else: 

904 # Ignore typing since caller said to trust them with conform=False. 

905 record = row # type: ignore 

906 storage = self._managers.dimensions[element] 

907 return storage.sync(record, update=update) 

908 

909 def queryDatasetTypes( 

910 self, 

911 expression: Any = ..., 

912 *, 

913 components: Optional[bool] = None, 

914 missing: Optional[List[str]] = None, 

915 ) -> Iterable[DatasetType]: 

916 # Docstring inherited from lsst.daf.butler.registry.Registry 

917 wildcard = DatasetTypeWildcard.from_expression(expression) 

918 composition_dict = self._managers.datasets.resolve_wildcard( 

919 wildcard, 

920 components=components, 

921 missing=missing, 

922 ) 

923 result: list[DatasetType] = [] 

924 for parent_dataset_type, components_for_parent in composition_dict.items(): 

925 result.extend( 

926 parent_dataset_type.makeComponentDatasetType(c) if c is not None else parent_dataset_type 

927 for c in components_for_parent 

928 ) 

929 return result 

930 

931 def queryCollections( 

932 self, 

933 expression: Any = ..., 

934 datasetType: Optional[DatasetType] = None, 

935 collectionTypes: Union[Iterable[CollectionType], CollectionType] = CollectionType.all(), 

936 flattenChains: bool = False, 

937 includeChains: Optional[bool] = None, 

938 ) -> Sequence[str]: 

939 # Docstring inherited from lsst.daf.butler.registry.Registry 

940 

941 # Right now the datasetTypes argument is completely ignored, but that 

942 # is consistent with its [lack of] guarantees. DM-24939 or a follow-up 

943 # ticket will take care of that. 

944 try: 

945 wildcard = CollectionWildcard.from_expression(expression) 

946 except TypeError as exc: 

947 raise CollectionExpressionError(f"Invalid collection expression '{expression}'") from exc 

948 collectionTypes = ensure_iterable(collectionTypes) 

949 return [ 

950 record.name 

951 for record in self._managers.collections.resolve_wildcard( 

952 wildcard, 

953 collection_types=frozenset(collectionTypes), 

954 flatten_chains=flattenChains, 

955 include_chains=includeChains, 

956 ) 

957 ] 

958 

959 def _makeQueryBuilder( 

960 self, 

961 summary: queries.QuerySummary, 

962 doomed_by: Iterable[str] = (), 

963 ) -> queries.QueryBuilder: 

964 """Return a `QueryBuilder` instance capable of constructing and 

965 managing more complex queries than those obtainable via `Registry` 

966 interfaces. 

967 

968 This is an advanced interface; downstream code should prefer 

969 `Registry.queryDataIds` and `Registry.queryDatasets` whenever those 

970 are sufficient. 

971 

972 Parameters 

973 ---------- 

974 summary : `queries.QuerySummary` 

975 Object describing and categorizing the full set of dimensions that 

976 will be included in the query. 

977 doomed_by : `Iterable` of `str`, optional 

978 A list of diagnostic messages that indicate why the query is going 

979 to yield no results and should not even be executed. If an empty 

980 container (default) the query will be executed unless other code 

981 determines that it is doomed. 

982 

983 Returns 

984 ------- 

985 builder : `queries.QueryBuilder` 

986 Object that can be used to construct and perform advanced queries. 

987 """ 

988 doomed_by = list(doomed_by) 

989 backend = queries.SqlQueryBackend(self._db, self._managers) 

990 context = backend.context() 

991 relation: Relation | None = None 

992 if doomed_by: 

993 relation = LeafRelation.make_doomed(context.sql_engine, set(), doomed_by) 

994 return queries.QueryBuilder( 

995 summary, 

996 backend=backend, 

997 context=context, 

998 relation=relation, 

999 ) 

1000 

1001 def _standardize_query_data_id_args( 

1002 self, data_id: DataId | None, *, doomed_by: list[str], **kwargs: Any 

1003 ) -> DataCoordinate: 

1004 """Preprocess the data ID arguments passed to query* methods. 

1005 

1006 Parameters 

1007 ---------- 

1008 data_id : `DataId` or `None` 

1009 Data ID that constrains the query results. 

1010 doomed_by : `list` [ `str` ] 

1011 List to append messages indicating why the query is doomed to 

1012 yield no results. 

1013 **kwargs 

1014 Additional data ID key-value pairs, extending and overriding 

1015 ``data_id``. 

1016 

1017 Returns 

1018 ------- 

1019 data_id : `DataCoordinate` 

1020 Standardized data ID. Will be fully expanded unless expansion 

1021 fails, in which case a message will be appended to ``doomed_by`` 

1022 on return. 

1023 """ 

1024 try: 

1025 return self.expandDataId(data_id, **kwargs) 

1026 except DataIdValueError as err: 

1027 doomed_by.append(str(err)) 

1028 return DataCoordinate.standardize( 

1029 data_id, **kwargs, universe=self.dimensions, defaults=self.defaults.dataId 

1030 ) 

1031 

1032 def _standardize_query_dataset_args( 

1033 self, 

1034 datasets: Any, 

1035 collections: Any, 

1036 components: bool | None, 

1037 mode: Literal["find_first"] | Literal["find_all"] | Literal["constrain"] = "constrain", 

1038 *, 

1039 doomed_by: list[str], 

1040 ) -> tuple[dict[DatasetType, list[str | None]], CollectionWildcard | None]: 

1041 """Preprocess dataset arguments passed to query* methods. 

1042 

1043 Parameters 

1044 ---------- 

1045 datasets : `DatasetType`, `str`, `re.Pattern`, or iterable of these 

1046 Expression identifying dataset types. See `queryDatasetTypes` for 

1047 details. 

1048 collections : `str`, `re.Pattern`, or iterable of these 

1049 Expression identifying collections to be searched. See 

1050 `queryCollections` for details. 

1051 components : `bool`, optional 

1052 If `True`, apply all expression patterns to component dataset type 

1053 names as well. If `False`, never apply patterns to components. 

1054 If `None` (default), apply patterns to components only if their 

1055 parent datasets were not matched by the expression. 

1056 Fully-specified component datasets (`str` or `DatasetType` 

1057 instances) are always included. 

1058 

1059 Values other than `False` are deprecated, and only `False` will be 

1060 supported after v26. After v27 this argument will be removed 

1061 entirely. 

1062 mode : `str`, optional 

1063 The way in which datasets are being used in this query; one of: 

1064 

1065 - "find_first": this is a query for the first dataset in an 

1066 ordered list of collections. Prohibits collection wildcards, 

1067 but permits dataset type wildcards. 

1068 

1069 - "find_all": this is a query for all datasets in all matched 

1070 collections. Permits collection and dataset type wildcards. 

1071 

1072 - "constrain": this is a query for something other than datasets, 

1073 with results constrained by dataset existence. Permits 

1074 collection wildcards and prohibits ``...`` as a dataset type 

1075 wildcard. 

1076 doomed_by : `list` [ `str` ] 

1077 List to append messages indicating why the query is doomed to 

1078 yield no results. 

1079 

1080 Returns 

1081 ------- 

1082 composition : `defaultdict` [ `DatasetType`, `list` [ `str` ] ] 

1083 Dictionary mapping parent dataset type to `list` of components 

1084 matched for that dataset type (or `None` for the parent itself). 

1085 collections : `CollectionWildcard` 

1086 Processed collection expression. 

1087 """ 

1088 composition: dict[DatasetType, list[str | None]] = {} 

1089 if datasets is not None: 

1090 if not collections: 

1091 if not self.defaults.collections: 

1092 raise NoDefaultCollectionError("No collections, and no registry default collections.") 

1093 collections = self.defaults.collections 

1094 else: 

1095 collections = CollectionWildcard.from_expression(collections) 

1096 if mode == "find_first" and collections.patterns: 

1097 raise TypeError( 

1098 f"Collection pattern(s) {collections.patterns} not allowed in this context." 

1099 ) 

1100 missing: list[str] = [] 

1101 composition = self._managers.datasets.resolve_wildcard( 

1102 datasets, components=components, missing=missing, explicit_only=(mode == "constrain") 

1103 ) 

1104 if missing and mode == "constrain": 

1105 # After v26 this should raise MissingDatasetTypeError, to be 

1106 # implemented on DM-36303. 

1107 warnings.warn( 

1108 f"Dataset type(s) {missing} are not registered; this will be an error after v26.", 

1109 FutureWarning, 

1110 ) 

1111 doomed_by.extend(f"Dataset type {name} is not registered." for name in missing) 

1112 elif collections: 

1113 raise ArgumentError(f"Cannot pass 'collections' (='{collections}') without 'datasets'.") 

1114 return composition, collections 

1115 

1116 def queryDatasets( 

1117 self, 

1118 datasetType: Any, 

1119 *, 

1120 collections: Any = None, 

1121 dimensions: Optional[Iterable[Union[Dimension, str]]] = None, 

1122 dataId: Optional[DataId] = None, 

1123 where: str = "", 

1124 findFirst: bool = False, 

1125 components: Optional[bool] = None, 

1126 bind: Optional[Mapping[str, Any]] = None, 

1127 check: bool = True, 

1128 **kwargs: Any, 

1129 ) -> queries.DatasetQueryResults: 

1130 # Docstring inherited from lsst.daf.butler.registry.Registry 

1131 doomed_by: list[str] = [] 

1132 data_id = self._standardize_query_data_id_args(dataId, doomed_by=doomed_by, **kwargs) 

1133 dataset_composition, collections = self._standardize_query_dataset_args( 

1134 datasetType, 

1135 collections, 

1136 components, 

1137 mode="find_first" if findFirst else "find_all", 

1138 doomed_by=doomed_by, 

1139 ) 

1140 parent_results: list[queries.ParentDatasetQueryResults] = [] 

1141 for parent_dataset_type, components_for_parent in dataset_composition.items(): 

1142 # The full set of dimensions in the query is the combination of 

1143 # those needed for the DatasetType and those explicitly requested, 

1144 # if any. 

1145 dimension_names = set(parent_dataset_type.dimensions.names) 

1146 if dimensions is not None: 

1147 dimension_names.update(self.dimensions.extract(dimensions).names) 

1148 # Construct the summary structure needed to construct a 

1149 # QueryBuilder. 

1150 summary = queries.QuerySummary( 

1151 requested=DimensionGraph(self.dimensions, names=dimension_names), 

1152 data_id=data_id, 

1153 expression=where, 

1154 bind=bind, 

1155 defaults=self.defaults.dataId, 

1156 check=check, 

1157 datasets=[parent_dataset_type], 

1158 ) 

1159 builder = self._makeQueryBuilder(summary) 

1160 # Add the dataset subquery to the query, telling the QueryBuilder 

1161 # to include the rank of the selected collection in the results 

1162 # only if we need to findFirst. Note that if any of the 

1163 # collections are actually wildcard expressions, and 

1164 # findFirst=True, this will raise TypeError for us. 

1165 builder.joinDataset(parent_dataset_type, collections, isResult=True, findFirst=findFirst) 

1166 query = builder.finish() 

1167 parent_results.append( 

1168 queries.ParentDatasetQueryResults( 

1169 query, parent_dataset_type, components=components_for_parent 

1170 ) 

1171 ) 

1172 if not parent_results: 

1173 doomed_by.extend( 

1174 f"No registered dataset type matching {t!r} found, so no matching datasets can " 

1175 "exist in any collection." 

1176 for t in ensure_iterable(datasetType) 

1177 ) 

1178 return queries.ChainedDatasetQueryResults([], doomed_by=doomed_by) 

1179 elif len(parent_results) == 1: 

1180 return parent_results[0] 

1181 else: 

1182 return queries.ChainedDatasetQueryResults(parent_results) 

1183 

1184 def queryDataIds( 

1185 self, 

1186 dimensions: Union[Iterable[Union[Dimension, str]], Dimension, str], 

1187 *, 

1188 dataId: Optional[DataId] = None, 

1189 datasets: Any = None, 

1190 collections: Any = None, 

1191 where: str = "", 

1192 components: Optional[bool] = None, 

1193 bind: Optional[Mapping[str, Any]] = None, 

1194 check: bool = True, 

1195 **kwargs: Any, 

1196 ) -> queries.DataCoordinateQueryResults: 

1197 # Docstring inherited from lsst.daf.butler.registry.Registry 

1198 dimensions = ensure_iterable(dimensions) 

1199 requestedDimensions = self.dimensions.extract(dimensions) 

1200 doomed_by: list[str] = [] 

1201 data_id = self._standardize_query_data_id_args(dataId, doomed_by=doomed_by, **kwargs) 

1202 dataset_composition, collections = self._standardize_query_dataset_args( 

1203 datasets, collections, components, doomed_by=doomed_by 

1204 ) 

1205 summary = queries.QuerySummary( 

1206 requested=requestedDimensions, 

1207 data_id=data_id, 

1208 expression=where, 

1209 bind=bind, 

1210 defaults=self.defaults.dataId, 

1211 check=check, 

1212 datasets=dataset_composition.keys(), 

1213 ) 

1214 builder = self._makeQueryBuilder(summary, doomed_by=doomed_by) 

1215 for datasetType in dataset_composition.keys(): 

1216 builder.joinDataset(datasetType, collections, isResult=False) 

1217 query = builder.finish() 

1218 

1219 return queries.DataCoordinateQueryResults(query) 

1220 

1221 def queryDimensionRecords( 

1222 self, 

1223 element: Union[DimensionElement, str], 

1224 *, 

1225 dataId: Optional[DataId] = None, 

1226 datasets: Any = None, 

1227 collections: Any = None, 

1228 where: str = "", 

1229 components: Optional[bool] = None, 

1230 bind: Optional[Mapping[str, Any]] = None, 

1231 check: bool = True, 

1232 **kwargs: Any, 

1233 ) -> queries.DimensionRecordQueryResults: 

1234 # Docstring inherited from lsst.daf.butler.registry.Registry 

1235 if not isinstance(element, DimensionElement): 

1236 try: 

1237 element = self.dimensions[element] 

1238 except KeyError as e: 

1239 raise DimensionNameError( 

1240 f"No such dimension '{element}', available dimensions: " 

1241 + str(self.dimensions.getStaticElements()) 

1242 ) from e 

1243 doomed_by: list[str] = [] 

1244 data_id = self._standardize_query_data_id_args(dataId, doomed_by=doomed_by, **kwargs) 

1245 dataset_composition, collections = self._standardize_query_dataset_args( 

1246 datasets, collections, components, doomed_by=doomed_by 

1247 ) 

1248 summary = queries.QuerySummary( 

1249 requested=element.graph, 

1250 data_id=data_id, 

1251 expression=where, 

1252 bind=bind, 

1253 defaults=self.defaults.dataId, 

1254 check=check, 

1255 datasets=dataset_composition.keys(), 

1256 ) 

1257 builder = self._makeQueryBuilder(summary, doomed_by=doomed_by) 

1258 for datasetType in dataset_composition.keys(): 

1259 builder.joinDataset(datasetType, collections, isResult=False) 

1260 query = builder.finish().with_record_columns(element) 

1261 return queries.DatabaseDimensionRecordQueryResults(query, element) 

1262 

1263 def queryDatasetAssociations( 

1264 self, 

1265 datasetType: Union[str, DatasetType], 

1266 collections: Any = ..., 

1267 *, 

1268 collectionTypes: Iterable[CollectionType] = CollectionType.all(), 

1269 flattenChains: bool = False, 

1270 ) -> Iterator[DatasetAssociation]: 

1271 # Docstring inherited from lsst.daf.butler.registry.Registry 

1272 if collections is None: 

1273 if not self.defaults.collections: 

1274 raise NoDefaultCollectionError( 

1275 "No collections provided to queryDatasetAssociations, " 

1276 "and no defaults from registry construction." 

1277 ) 

1278 collections = self.defaults.collections 

1279 collections = CollectionWildcard.from_expression(collections) 

1280 backend = queries.SqlQueryBackend(self._db, self._managers) 

1281 parent_dataset_type, _ = backend.resolve_single_dataset_type_wildcard(datasetType, components=False) 

1282 timespan_tag = DatasetColumnTag(parent_dataset_type.name, "timespan") 

1283 collection_tag = DatasetColumnTag(parent_dataset_type.name, "collection") 

1284 for parent_collection_record in backend.resolve_collection_wildcard( 

1285 collections, 

1286 collection_types=frozenset(collectionTypes), 

1287 flatten_chains=flattenChains, 

1288 ): 

1289 # Resolve this possibly-chained collection into a list of 

1290 # non-CHAINED collections that actually hold datasets of this 

1291 # type. 

1292 candidate_collection_records = backend.resolve_dataset_collections( 

1293 parent_dataset_type, 

1294 CollectionWildcard.from_names([parent_collection_record.name]), 

1295 allow_calibration_collections=True, 

1296 governor_constraints={}, 

1297 ) 

1298 if not candidate_collection_records: 

1299 continue 

1300 with backend.context() as context: 

1301 relation = backend.make_dataset_query_relation( 

1302 parent_dataset_type, 

1303 candidate_collection_records, 

1304 columns={"dataset_id", "run", "timespan", "collection"}, 

1305 context=context, 

1306 ) 

1307 reader = queries.DatasetRefReader( 

1308 parent_dataset_type, 

1309 translate_collection=lambda k: self._managers.collections[k].name, 

1310 full=False, 

1311 ) 

1312 for row in context.fetch_iterable(relation): 

1313 ref = reader.read(row) 

1314 collection_record = self._managers.collections[row[collection_tag]] 

1315 if collection_record.type is CollectionType.CALIBRATION: 

1316 timespan = row[timespan_tag] 

1317 else: 

1318 # For backwards compatibility and (possibly?) user 

1319 # convenience we continue to define the timespan of a 

1320 # DatasetAssociation row for a non-CALIBRATION 

1321 # collection to be None rather than a fully unbounded 

1322 # timespan. 

1323 timespan = None 

1324 yield DatasetAssociation(ref=ref, collection=collection_record.name, timespan=timespan) 

1325 

1326 storageClasses: StorageClassFactory 

1327 """All storage classes known to the registry (`StorageClassFactory`). 

1328 """