Coverage for python/lsst/daf/butler/registries/sql.py: 13%

497 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2023-02-16 02:54 -0800

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ("SqlRegistry",) 

25 

26import contextlib 

27import logging 

28import warnings 

29from typing import ( 

30 TYPE_CHECKING, 

31 Any, 

32 Dict, 

33 Iterable, 

34 Iterator, 

35 List, 

36 Literal, 

37 Mapping, 

38 Optional, 

39 Sequence, 

40 Set, 

41 Union, 

42 cast, 

43) 

44 

45import sqlalchemy 

46from lsst.daf.relation import LeafRelation, Relation 

47from lsst.resources import ResourcePathExpression 

48from lsst.utils.iteration import ensure_iterable 

49 

50from ..core import ( 

51 Config, 

52 DataCoordinate, 

53 DataId, 

54 DatasetAssociation, 

55 DatasetColumnTag, 

56 DatasetId, 

57 DatasetRef, 

58 DatasetType, 

59 Dimension, 

60 DimensionConfig, 

61 DimensionElement, 

62 DimensionGraph, 

63 DimensionRecord, 

64 DimensionUniverse, 

65 NamedKeyMapping, 

66 NameLookupMapping, 

67 Progress, 

68 StorageClassFactory, 

69 Timespan, 

70 ddl, 

71) 

72from ..core.utils import transactional 

73from ..registry import ( 

74 ArgumentError, 

75 CollectionExpressionError, 

76 CollectionSummary, 

77 CollectionType, 

78 CollectionTypeError, 

79 ConflictingDefinitionError, 

80 DataIdValueError, 

81 DatasetTypeError, 

82 DimensionNameError, 

83 InconsistentDataIdError, 

84 NoDefaultCollectionError, 

85 OrphanedRecordError, 

86 Registry, 

87 RegistryConfig, 

88 RegistryDefaults, 

89 queries, 

90) 

91from ..registry.interfaces import ChainedCollectionRecord, DatasetIdFactory, DatasetIdGenEnum, RunRecord 

92from ..registry.managers import RegistryManagerInstances, RegistryManagerTypes 

93from ..registry.wildcards import CollectionWildcard, DatasetTypeWildcard 

94 

95if TYPE_CHECKING: 95 ↛ 96line 95 didn't jump to line 96, because the condition on line 95 was never true

96 from .._butlerConfig import ButlerConfig 

97 from ..registry.interfaces import CollectionRecord, Database, DatastoreRegistryBridgeManager 

98 

99 

100_LOG = logging.getLogger(__name__) 

101 

102 

103class SqlRegistry(Registry): 

104 """Registry implementation based on SQLAlchemy. 

105 

106 Parameters 

107 ---------- 

108 database : `Database` 

109 Database instance to store Registry. 

110 defaults : `RegistryDefaults` 

111 Default collection search path and/or output `~CollectionType.RUN` 

112 collection. 

113 managers : `RegistryManagerInstances` 

114 All the managers required for this registry. 

115 """ 

116 

117 defaultConfigFile: Optional[str] = None 

118 """Path to configuration defaults. Accessed within the ``configs`` resource 

119 or relative to a search path. Can be None if no defaults specified. 

120 """ 

121 

122 @classmethod 

123 def createFromConfig( 

124 cls, 

125 config: Optional[Union[RegistryConfig, str]] = None, 

126 dimensionConfig: Optional[Union[DimensionConfig, str]] = None, 

127 butlerRoot: Optional[ResourcePathExpression] = None, 

128 ) -> Registry: 

129 """Create registry database and return `SqlRegistry` instance. 

130 

131 This method initializes database contents, database must be empty 

132 prior to calling this method. 

133 

134 Parameters 

135 ---------- 

136 config : `RegistryConfig` or `str`, optional 

137 Registry configuration, if missing then default configuration will 

138 be loaded from registry.yaml. 

139 dimensionConfig : `DimensionConfig` or `str`, optional 

140 Dimensions configuration, if missing then default configuration 

141 will be loaded from dimensions.yaml. 

142 butlerRoot : convertible to `lsst.resources.ResourcePath`, optional 

143 Path to the repository root this `SqlRegistry` will manage. 

144 

145 Returns 

146 ------- 

147 registry : `SqlRegistry` 

148 A new `SqlRegistry` instance. 

149 """ 

150 config = cls.forceRegistryConfig(config) 

151 config.replaceRoot(butlerRoot) 

152 

153 if isinstance(dimensionConfig, str): 

154 dimensionConfig = DimensionConfig(dimensionConfig) 

155 elif dimensionConfig is None: 

156 dimensionConfig = DimensionConfig() 

157 elif not isinstance(dimensionConfig, DimensionConfig): 

158 raise TypeError(f"Incompatible Dimension configuration type: {type(dimensionConfig)}") 

159 

160 DatabaseClass = config.getDatabaseClass() 

161 database = DatabaseClass.fromUri( 

162 str(config.connectionString), origin=config.get("origin", 0), namespace=config.get("namespace") 

163 ) 

164 managerTypes = RegistryManagerTypes.fromConfig(config) 

165 managers = managerTypes.makeRepo(database, dimensionConfig) 

166 return cls(database, RegistryDefaults(), managers) 

167 

168 @classmethod 

169 def fromConfig( 

170 cls, 

171 config: Union[ButlerConfig, RegistryConfig, Config, str], 

172 butlerRoot: Optional[ResourcePathExpression] = None, 

173 writeable: bool = True, 

174 defaults: Optional[RegistryDefaults] = None, 

175 ) -> Registry: 

176 """Create `Registry` subclass instance from `config`. 

177 

178 Registry database must be initialized prior to calling this method. 

179 

180 Parameters 

181 ---------- 

182 config : `ButlerConfig`, `RegistryConfig`, `Config` or `str` 

183 Registry configuration 

184 butlerRoot : `lsst.resources.ResourcePathExpression`, optional 

185 Path to the repository root this `Registry` will manage. 

186 writeable : `bool`, optional 

187 If `True` (default) create a read-write connection to the database. 

188 defaults : `RegistryDefaults`, optional 

189 Default collection search path and/or output `~CollectionType.RUN` 

190 collection. 

191 

192 Returns 

193 ------- 

194 registry : `SqlRegistry` (subclass) 

195 A new `SqlRegistry` subclass instance. 

196 """ 

197 config = cls.forceRegistryConfig(config) 

198 config.replaceRoot(butlerRoot) 

199 DatabaseClass = config.getDatabaseClass() 

200 database = DatabaseClass.fromUri( 

201 config.connectionString.render_as_string(hide_password=False), 

202 origin=config.get("origin", 0), 

203 namespace=config.get("namespace"), 

204 writeable=writeable, 

205 ) 

206 managerTypes = RegistryManagerTypes.fromConfig(config) 

207 with database.session(): 

208 managers = managerTypes.loadRepo(database) 

209 if defaults is None: 

210 defaults = RegistryDefaults() 

211 return cls(database, defaults, managers) 

212 

213 def __init__(self, database: Database, defaults: RegistryDefaults, managers: RegistryManagerInstances): 

214 self._db = database 

215 self._managers = managers 

216 self.storageClasses = StorageClassFactory() 

217 # Intentionally invoke property setter to initialize defaults. This 

218 # can only be done after most of the rest of Registry has already been 

219 # initialized, and must be done before the property getter is used. 

220 self.defaults = defaults 

221 # In the future DatasetIdFactory may become configurable and this 

222 # instance will need to be shared with datasets manager. 

223 self.datasetIdFactory = DatasetIdFactory() 

224 

225 def __str__(self) -> str: 

226 return str(self._db) 

227 

228 def __repr__(self) -> str: 

229 return f"SqlRegistry({self._db!r}, {self.dimensions!r})" 

230 

231 def isWriteable(self) -> bool: 

232 # Docstring inherited from lsst.daf.butler.registry.Registry 

233 return self._db.isWriteable() 

234 

235 def copy(self, defaults: Optional[RegistryDefaults] = None) -> Registry: 

236 # Docstring inherited from lsst.daf.butler.registry.Registry 

237 if defaults is None: 

238 # No need to copy, because `RegistryDefaults` is immutable; we 

239 # effectively copy on write. 

240 defaults = self.defaults 

241 return type(self)(self._db, defaults, self._managers) 

242 

243 @property 

244 def dimensions(self) -> DimensionUniverse: 

245 # Docstring inherited from lsst.daf.butler.registry.Registry 

246 return self._managers.dimensions.universe 

247 

248 def refresh(self) -> None: 

249 # Docstring inherited from lsst.daf.butler.registry.Registry 

250 with self._db.transaction(): 

251 self._managers.refresh() 

252 

253 @contextlib.contextmanager 

254 def transaction(self, *, savepoint: bool = False) -> Iterator[None]: 

255 # Docstring inherited from lsst.daf.butler.registry.Registry 

256 try: 

257 with self._db.transaction(savepoint=savepoint): 

258 yield 

259 except BaseException: 

260 # TODO: this clears the caches sometimes when we wouldn't actually 

261 # need to. Can we avoid that? 

262 self._managers.dimensions.clearCaches() 

263 raise 

264 

265 def resetConnectionPool(self) -> None: 

266 """Reset SQLAlchemy connection pool for `SqlRegistry` database. 

267 

268 This operation is useful when using registry with fork-based 

269 multiprocessing. To use registry across fork boundary one has to make 

270 sure that there are no currently active connections (no session or 

271 transaction is in progress) and connection pool is reset using this 

272 method. This method should be called by the child process immediately 

273 after the fork. 

274 """ 

275 self._db._engine.dispose() 

276 

277 def registerOpaqueTable(self, tableName: str, spec: ddl.TableSpec) -> None: 

278 """Add an opaque (to the `Registry`) table for use by a `Datastore` or 

279 other data repository client. 

280 

281 Opaque table records can be added via `insertOpaqueData`, retrieved via 

282 `fetchOpaqueData`, and removed via `deleteOpaqueData`. 

283 

284 Parameters 

285 ---------- 

286 tableName : `str` 

287 Logical name of the opaque table. This may differ from the 

288 actual name used in the database by a prefix and/or suffix. 

289 spec : `ddl.TableSpec` 

290 Specification for the table to be added. 

291 """ 

292 self._managers.opaque.register(tableName, spec) 

293 

294 @transactional 

295 def insertOpaqueData(self, tableName: str, *data: dict) -> None: 

296 """Insert records into an opaque table. 

297 

298 Parameters 

299 ---------- 

300 tableName : `str` 

301 Logical name of the opaque table. Must match the name used in a 

302 previous call to `registerOpaqueTable`. 

303 data 

304 Each additional positional argument is a dictionary that represents 

305 a single row to be added. 

306 """ 

307 self._managers.opaque[tableName].insert(*data) 

308 

309 def fetchOpaqueData(self, tableName: str, **where: Any) -> Iterator[Mapping[str, Any]]: 

310 """Retrieve records from an opaque table. 

311 

312 Parameters 

313 ---------- 

314 tableName : `str` 

315 Logical name of the opaque table. Must match the name used in a 

316 previous call to `registerOpaqueTable`. 

317 where 

318 Additional keyword arguments are interpreted as equality 

319 constraints that restrict the returned rows (combined with AND); 

320 keyword arguments are column names and values are the values they 

321 must have. 

322 

323 Yields 

324 ------ 

325 row : `dict` 

326 A dictionary representing a single result row. 

327 """ 

328 yield from self._managers.opaque[tableName].fetch(**where) 

329 

330 @transactional 

331 def deleteOpaqueData(self, tableName: str, **where: Any) -> None: 

332 """Remove records from an opaque table. 

333 

334 Parameters 

335 ---------- 

336 tableName : `str` 

337 Logical name of the opaque table. Must match the name used in a 

338 previous call to `registerOpaqueTable`. 

339 where 

340 Additional keyword arguments are interpreted as equality 

341 constraints that restrict the deleted rows (combined with AND); 

342 keyword arguments are column names and values are the values they 

343 must have. 

344 """ 

345 self._managers.opaque[tableName].delete(where.keys(), where) 

346 

347 def registerCollection( 

348 self, name: str, type: CollectionType = CollectionType.TAGGED, doc: Optional[str] = None 

349 ) -> bool: 

350 # Docstring inherited from lsst.daf.butler.registry.Registry 

351 _, registered = self._managers.collections.register(name, type, doc=doc) 

352 return registered 

353 

354 def getCollectionType(self, name: str) -> CollectionType: 

355 # Docstring inherited from lsst.daf.butler.registry.Registry 

356 return self._managers.collections.find(name).type 

357 

358 def _get_collection_record(self, name: str) -> CollectionRecord: 

359 # Docstring inherited from lsst.daf.butler.registry.Registry 

360 return self._managers.collections.find(name) 

361 

362 def registerRun(self, name: str, doc: Optional[str] = None) -> bool: 

363 # Docstring inherited from lsst.daf.butler.registry.Registry 

364 _, registered = self._managers.collections.register(name, CollectionType.RUN, doc=doc) 

365 return registered 

366 

367 @transactional 

368 def removeCollection(self, name: str) -> None: 

369 # Docstring inherited from lsst.daf.butler.registry.Registry 

370 self._managers.collections.remove(name) 

371 

372 def getCollectionChain(self, parent: str) -> tuple[str, ...]: 

373 # Docstring inherited from lsst.daf.butler.registry.Registry 

374 record = self._managers.collections.find(parent) 

375 if record.type is not CollectionType.CHAINED: 

376 raise CollectionTypeError(f"Collection '{parent}' has type {record.type.name}, not CHAINED.") 

377 assert isinstance(record, ChainedCollectionRecord) 

378 return record.children 

379 

380 @transactional 

381 def setCollectionChain(self, parent: str, children: Any, *, flatten: bool = False) -> None: 

382 # Docstring inherited from lsst.daf.butler.registry.Registry 

383 record = self._managers.collections.find(parent) 

384 if record.type is not CollectionType.CHAINED: 

385 raise CollectionTypeError(f"Collection '{parent}' has type {record.type.name}, not CHAINED.") 

386 assert isinstance(record, ChainedCollectionRecord) 

387 children = CollectionWildcard.from_expression(children).require_ordered() 

388 if children != record.children or flatten: 

389 record.update(self._managers.collections, children, flatten=flatten) 

390 

391 def getCollectionParentChains(self, collection: str) -> Set[str]: 

392 # Docstring inherited from lsst.daf.butler.registry.Registry 

393 return { 

394 record.name 

395 for record in self._managers.collections.getParentChains( 

396 self._managers.collections.find(collection).key 

397 ) 

398 } 

399 

400 def getCollectionDocumentation(self, collection: str) -> Optional[str]: 

401 # Docstring inherited from lsst.daf.butler.registry.Registry 

402 return self._managers.collections.getDocumentation(self._managers.collections.find(collection).key) 

403 

404 def setCollectionDocumentation(self, collection: str, doc: Optional[str]) -> None: 

405 # Docstring inherited from lsst.daf.butler.registry.Registry 

406 self._managers.collections.setDocumentation(self._managers.collections.find(collection).key, doc) 

407 

408 def getCollectionSummary(self, collection: str) -> CollectionSummary: 

409 # Docstring inherited from lsst.daf.butler.registry.Registry 

410 record = self._managers.collections.find(collection) 

411 return self._managers.datasets.getCollectionSummary(record) 

412 

413 def registerDatasetType(self, datasetType: DatasetType) -> bool: 

414 # Docstring inherited from lsst.daf.butler.registry.Registry 

415 _, inserted = self._managers.datasets.register(datasetType) 

416 return inserted 

417 

418 def removeDatasetType(self, name: str) -> None: 

419 # Docstring inherited from lsst.daf.butler.registry.Registry 

420 self._managers.datasets.remove(name) 

421 

422 def getDatasetType(self, name: str) -> DatasetType: 

423 # Docstring inherited from lsst.daf.butler.registry.Registry 

424 parent_name, component = DatasetType.splitDatasetTypeName(name) 

425 storage = self._managers.datasets[parent_name] 

426 if component is None: 

427 return storage.datasetType 

428 else: 

429 return storage.datasetType.makeComponentDatasetType(component) 

430 

431 def supportsIdGenerationMode(self, mode: DatasetIdGenEnum) -> bool: 

432 # Docstring inherited from lsst.daf.butler.registry.Registry 

433 return self._managers.datasets.supportsIdGenerationMode(mode) 

434 

435 def findDataset( 

436 self, 

437 datasetType: Union[DatasetType, str], 

438 dataId: Optional[DataId] = None, 

439 *, 

440 collections: Any = None, 

441 timespan: Optional[Timespan] = None, 

442 **kwargs: Any, 

443 ) -> Optional[DatasetRef]: 

444 # Docstring inherited from lsst.daf.butler.registry.Registry 

445 if collections is None: 

446 if not self.defaults.collections: 

447 raise NoDefaultCollectionError( 

448 "No collections provided to findDataset, and no defaults from registry construction." 

449 ) 

450 collections = self.defaults.collections 

451 backend = queries.SqlQueryBackend(self._db, self._managers) 

452 collection_wildcard = CollectionWildcard.from_expression(collections, require_ordered=True) 

453 matched_collections = backend.resolve_collection_wildcard(collection_wildcard) 

454 parent_dataset_type, components = backend.resolve_single_dataset_type_wildcard( 

455 datasetType, components_deprecated=False 

456 ) 

457 if len(components) > 1: 

458 raise DatasetTypeError( 

459 f"findDataset requires exactly one dataset type; got multiple components {components} " 

460 f"for parent dataset type {parent_dataset_type.name}." 

461 ) 

462 component = components[0] 

463 dataId = DataCoordinate.standardize( 

464 dataId, 

465 graph=parent_dataset_type.dimensions, 

466 universe=self.dimensions, 

467 defaults=self.defaults.dataId, 

468 **kwargs, 

469 ) 

470 governor_constraints = {name: {cast(str, dataId[name])} for name in dataId.graph.governors.names} 

471 (filtered_collections,) = backend.filter_dataset_collections( 

472 [parent_dataset_type], 

473 matched_collections, 

474 governor_constraints=governor_constraints, 

475 ).values() 

476 if not filtered_collections: 

477 return None 

478 if timespan is None: 

479 filtered_collections = [ 

480 collection_record 

481 for collection_record in filtered_collections 

482 if collection_record.type is not CollectionType.CALIBRATION 

483 ] 

484 if filtered_collections: 

485 requested_columns = {"dataset_id", "run", "collection"} 

486 with backend.context() as context: 

487 predicate = context.make_data_coordinate_predicate( 

488 dataId.subset(parent_dataset_type.dimensions), full=False 

489 ) 

490 if timespan is not None: 

491 requested_columns.add("timespan") 

492 predicate = predicate.logical_and( 

493 context.make_timespan_overlap_predicate( 

494 DatasetColumnTag(parent_dataset_type.name, "timespan"), timespan 

495 ) 

496 ) 

497 relation = backend.make_dataset_query_relation( 

498 parent_dataset_type, filtered_collections, requested_columns, context 

499 ).with_rows_satisfying(predicate) 

500 rows = list(context.fetch_iterable(relation)) 

501 else: 

502 rows = [] 

503 if not rows: 

504 return None 

505 elif len(rows) == 1: 

506 best_row = rows[0] 

507 else: 

508 rank_by_collection_key = {record.key: n for n, record in enumerate(filtered_collections)} 

509 collection_tag = DatasetColumnTag(parent_dataset_type.name, "collection") 

510 row_iter = iter(rows) 

511 best_row = next(row_iter) 

512 best_rank = rank_by_collection_key[best_row[collection_tag]] 

513 have_tie = False 

514 for row in row_iter: 

515 if (rank := rank_by_collection_key[row[collection_tag]]) < best_rank: 

516 best_row = row 

517 best_rank = rank 

518 have_tie = False 

519 elif rank == best_rank: 

520 have_tie = True 

521 assert timespan is not None, "Rank ties should be impossible given DB constraints." 

522 if have_tie: 

523 raise LookupError( 

524 f"Ambiguous calibration lookup for {parent_dataset_type.name} in collections " 

525 f"{collection_wildcard.strings} with timespan {timespan}." 

526 ) 

527 reader = queries.DatasetRefReader( 

528 parent_dataset_type, 

529 translate_collection=lambda k: self._managers.collections[k].name, 

530 ) 

531 ref = reader.read(best_row, data_id=dataId) 

532 if component is not None: 

533 ref = ref.makeComponentRef(component) 

534 return ref 

535 

536 @transactional 

537 def insertDatasets( 

538 self, 

539 datasetType: Union[DatasetType, str], 

540 dataIds: Iterable[DataId], 

541 run: Optional[str] = None, 

542 expand: bool = True, 

543 idGenerationMode: DatasetIdGenEnum = DatasetIdGenEnum.UNIQUE, 

544 ) -> List[DatasetRef]: 

545 # Docstring inherited from lsst.daf.butler.registry.Registry 

546 if isinstance(datasetType, DatasetType): 

547 storage = self._managers.datasets.find(datasetType.name) 

548 if storage is None: 

549 raise DatasetTypeError(f"DatasetType '{datasetType}' has not been registered.") 

550 else: 

551 storage = self._managers.datasets.find(datasetType) 

552 if storage is None: 

553 raise DatasetTypeError(f"DatasetType with name '{datasetType}' has not been registered.") 

554 if run is None: 

555 if self.defaults.run is None: 

556 raise NoDefaultCollectionError( 

557 "No run provided to insertDatasets, and no default from registry construction." 

558 ) 

559 run = self.defaults.run 

560 runRecord = self._managers.collections.find(run) 

561 if runRecord.type is not CollectionType.RUN: 

562 raise CollectionTypeError( 

563 f"Given collection is of type {runRecord.type.name}; RUN collection required." 

564 ) 

565 assert isinstance(runRecord, RunRecord) 

566 progress = Progress("daf.butler.Registry.insertDatasets", level=logging.DEBUG) 

567 if expand: 

568 expandedDataIds = [ 

569 self.expandDataId(dataId, graph=storage.datasetType.dimensions) 

570 for dataId in progress.wrap(dataIds, f"Expanding {storage.datasetType.name} data IDs") 

571 ] 

572 else: 

573 expandedDataIds = [ 

574 DataCoordinate.standardize(dataId, graph=storage.datasetType.dimensions) for dataId in dataIds 

575 ] 

576 try: 

577 refs = list(storage.insert(runRecord, expandedDataIds, idGenerationMode)) 

578 if self._managers.obscore: 

579 context = queries.SqlQueryContext(self._db, self._managers.column_types) 

580 self._managers.obscore.add_datasets(refs, context) 

581 except sqlalchemy.exc.IntegrityError as err: 

582 raise ConflictingDefinitionError( 

583 "A database constraint failure was triggered by inserting " 

584 f"one or more datasets of type {storage.datasetType} into " 

585 f"collection '{run}'. " 

586 "This probably means a dataset with the same data ID " 

587 "and dataset type already exists, but it may also mean a " 

588 "dimension row is missing." 

589 ) from err 

590 return refs 

591 

592 @transactional 

593 def _importDatasets( 

594 self, 

595 datasets: Iterable[DatasetRef], 

596 expand: bool = True, 

597 idGenerationMode: DatasetIdGenEnum = DatasetIdGenEnum.UNIQUE, 

598 reuseIds: bool = False, 

599 ) -> List[DatasetRef]: 

600 # Docstring inherited from lsst.daf.butler.registry.Registry 

601 datasets = list(datasets) 

602 if not datasets: 

603 # nothing to do 

604 return [] 

605 

606 # find dataset type 

607 datasetTypes = set(dataset.datasetType for dataset in datasets) 

608 if len(datasetTypes) != 1: 

609 raise DatasetTypeError(f"Multiple dataset types in input datasets: {datasetTypes}") 

610 datasetType = datasetTypes.pop() 

611 

612 # get storage handler for this dataset type 

613 storage = self._managers.datasets.find(datasetType.name) 

614 if storage is None: 

615 raise DatasetTypeError(f"DatasetType '{datasetType}' has not been registered.") 

616 

617 # find run name 

618 runs = set(dataset.run for dataset in datasets) 

619 if len(runs) != 1: 

620 raise ValueError(f"Multiple run names in input datasets: {runs}") 

621 run = runs.pop() 

622 if run is None: 

623 if self.defaults.run is None: 

624 raise NoDefaultCollectionError( 

625 "No run provided to ingestDatasets, and no default from registry construction." 

626 ) 

627 run = self.defaults.run 

628 

629 runRecord = self._managers.collections.find(run) 

630 if runRecord.type is not CollectionType.RUN: 

631 raise CollectionTypeError( 

632 f"Given collection '{runRecord.name}' is of type {runRecord.type.name};" 

633 " RUN collection required." 

634 ) 

635 assert isinstance(runRecord, RunRecord) 

636 

637 progress = Progress("daf.butler.Registry.insertDatasets", level=logging.DEBUG) 

638 if expand: 

639 expandedDatasets = [ 

640 dataset.expanded(self.expandDataId(dataset.dataId, graph=storage.datasetType.dimensions)) 

641 for dataset in progress.wrap(datasets, f"Expanding {storage.datasetType.name} data IDs") 

642 ] 

643 else: 

644 expandedDatasets = [ 

645 DatasetRef(datasetType, dataset.dataId, id=dataset.id, run=dataset.run, conform=True) 

646 for dataset in datasets 

647 ] 

648 

649 try: 

650 refs = list(storage.import_(runRecord, expandedDatasets, idGenerationMode, reuseIds)) 

651 if self._managers.obscore: 

652 context = queries.SqlQueryContext(self._db, self._managers.column_types) 

653 self._managers.obscore.add_datasets(refs, context) 

654 except sqlalchemy.exc.IntegrityError as err: 

655 raise ConflictingDefinitionError( 

656 "A database constraint failure was triggered by inserting " 

657 f"one or more datasets of type {storage.datasetType} into " 

658 f"collection '{run}'. " 

659 "This probably means a dataset with the same data ID " 

660 "and dataset type already exists, but it may also mean a " 

661 "dimension row is missing." 

662 ) from err 

663 return refs 

664 

665 def getDataset(self, id: DatasetId) -> Optional[DatasetRef]: 

666 # Docstring inherited from lsst.daf.butler.registry.Registry 

667 return self._managers.datasets.getDatasetRef(id) 

668 

669 @transactional 

670 def removeDatasets(self, refs: Iterable[DatasetRef]) -> None: 

671 # Docstring inherited from lsst.daf.butler.registry.Registry 

672 progress = Progress("lsst.daf.butler.Registry.removeDatasets", level=logging.DEBUG) 

673 for datasetType, refsForType in progress.iter_item_chunks( 

674 DatasetRef.groupByType(refs).items(), desc="Removing datasets by type" 

675 ): 

676 storage = self._managers.datasets[datasetType.name] 

677 try: 

678 storage.delete(refsForType) 

679 except sqlalchemy.exc.IntegrityError as err: 

680 raise OrphanedRecordError( 

681 "One or more datasets is still present in one or more Datastores." 

682 ) from err 

683 

684 @transactional 

685 def associate(self, collection: str, refs: Iterable[DatasetRef]) -> None: 

686 # Docstring inherited from lsst.daf.butler.registry.Registry 

687 progress = Progress("lsst.daf.butler.Registry.associate", level=logging.DEBUG) 

688 collectionRecord = self._managers.collections.find(collection) 

689 if collectionRecord.type is not CollectionType.TAGGED: 

690 raise CollectionTypeError( 

691 f"Collection '{collection}' has type {collectionRecord.type.name}, not TAGGED." 

692 ) 

693 for datasetType, refsForType in progress.iter_item_chunks( 

694 DatasetRef.groupByType(refs).items(), desc="Associating datasets by type" 

695 ): 

696 storage = self._managers.datasets[datasetType.name] 

697 try: 

698 storage.associate(collectionRecord, refsForType) 

699 if self._managers.obscore: 

700 # If a TAGGED collection is being monitored by ObsCore 

701 # manager then we may need to save the dataset. 

702 context = queries.SqlQueryContext(self._db, self._managers.column_types) 

703 self._managers.obscore.associate(refsForType, collectionRecord, context) 

704 except sqlalchemy.exc.IntegrityError as err: 

705 raise ConflictingDefinitionError( 

706 f"Constraint violation while associating dataset of type {datasetType.name} with " 

707 f"collection {collection}. This probably means that one or more datasets with the same " 

708 "dataset type and data ID already exist in the collection, but it may also indicate " 

709 "that the datasets do not exist." 

710 ) from err 

711 

712 @transactional 

713 def disassociate(self, collection: str, refs: Iterable[DatasetRef]) -> None: 

714 # Docstring inherited from lsst.daf.butler.registry.Registry 

715 progress = Progress("lsst.daf.butler.Registry.disassociate", level=logging.DEBUG) 

716 collectionRecord = self._managers.collections.find(collection) 

717 if collectionRecord.type is not CollectionType.TAGGED: 

718 raise CollectionTypeError( 

719 f"Collection '{collection}' has type {collectionRecord.type.name}; expected TAGGED." 

720 ) 

721 for datasetType, refsForType in progress.iter_item_chunks( 

722 DatasetRef.groupByType(refs).items(), desc="Disassociating datasets by type" 

723 ): 

724 storage = self._managers.datasets[datasetType.name] 

725 storage.disassociate(collectionRecord, refsForType) 

726 if self._managers.obscore: 

727 self._managers.obscore.disassociate(refsForType, collectionRecord) 

728 

729 @transactional 

730 def certify(self, collection: str, refs: Iterable[DatasetRef], timespan: Timespan) -> None: 

731 # Docstring inherited from lsst.daf.butler.registry.Registry 

732 progress = Progress("lsst.daf.butler.Registry.certify", level=logging.DEBUG) 

733 collectionRecord = self._managers.collections.find(collection) 

734 for datasetType, refsForType in progress.iter_item_chunks( 

735 DatasetRef.groupByType(refs).items(), desc="Certifying datasets by type" 

736 ): 

737 storage = self._managers.datasets[datasetType.name] 

738 storage.certify( 

739 collectionRecord, 

740 refsForType, 

741 timespan, 

742 context=queries.SqlQueryContext(self._db, self._managers.column_types), 

743 ) 

744 

745 @transactional 

746 def decertify( 

747 self, 

748 collection: str, 

749 datasetType: Union[str, DatasetType], 

750 timespan: Timespan, 

751 *, 

752 dataIds: Optional[Iterable[DataId]] = None, 

753 ) -> None: 

754 # Docstring inherited from lsst.daf.butler.registry.Registry 

755 collectionRecord = self._managers.collections.find(collection) 

756 if isinstance(datasetType, str): 

757 storage = self._managers.datasets[datasetType] 

758 else: 

759 storage = self._managers.datasets[datasetType.name] 

760 standardizedDataIds = None 

761 if dataIds is not None: 

762 standardizedDataIds = [ 

763 DataCoordinate.standardize(d, graph=storage.datasetType.dimensions) for d in dataIds 

764 ] 

765 storage.decertify( 

766 collectionRecord, 

767 timespan, 

768 dataIds=standardizedDataIds, 

769 context=queries.SqlQueryContext(self._db, self._managers.column_types), 

770 ) 

771 

772 def getDatastoreBridgeManager(self) -> DatastoreRegistryBridgeManager: 

773 """Return an object that allows a new `Datastore` instance to 

774 communicate with this `Registry`. 

775 

776 Returns 

777 ------- 

778 manager : `DatastoreRegistryBridgeManager` 

779 Object that mediates communication between this `Registry` and its 

780 associated datastores. 

781 """ 

782 return self._managers.datastores 

783 

784 def getDatasetLocations(self, ref: DatasetRef) -> Iterable[str]: 

785 # Docstring inherited from lsst.daf.butler.registry.Registry 

786 return self._managers.datastores.findDatastores(ref) 

787 

788 def expandDataId( 

789 self, 

790 dataId: Optional[DataId] = None, 

791 *, 

792 graph: Optional[DimensionGraph] = None, 

793 records: Optional[NameLookupMapping[DimensionElement, Optional[DimensionRecord]]] = None, 

794 withDefaults: bool = True, 

795 **kwargs: Any, 

796 ) -> DataCoordinate: 

797 # Docstring inherited from lsst.daf.butler.registry.Registry 

798 if not withDefaults: 

799 defaults = None 

800 else: 

801 defaults = self.defaults.dataId 

802 try: 

803 standardized = DataCoordinate.standardize( 

804 dataId, graph=graph, universe=self.dimensions, defaults=defaults, **kwargs 

805 ) 

806 except KeyError as exc: 

807 # This means either kwargs have some odd name or required 

808 # dimension is missing. 

809 raise DimensionNameError(str(exc)) from exc 

810 if standardized.hasRecords(): 

811 return standardized 

812 if records is None: 

813 records = {} 

814 elif isinstance(records, NamedKeyMapping): 

815 records = records.byName() 

816 else: 

817 records = dict(records) 

818 if isinstance(dataId, DataCoordinate) and dataId.hasRecords(): 

819 records.update(dataId.records.byName()) 

820 keys = standardized.byName() 

821 context = queries.SqlQueryContext(self._db, self._managers.column_types) 

822 for element in standardized.graph.primaryKeyTraversalOrder: 

823 record = records.get(element.name, ...) # Use ... to mean not found; None might mean NULL 

824 if record is ...: 

825 if isinstance(element, Dimension) and keys.get(element.name) is None: 

826 if element in standardized.graph.required: 

827 raise DimensionNameError( 

828 f"No value or null value for required dimension {element.name}." 

829 ) 

830 keys[element.name] = None 

831 record = None 

832 else: 

833 storage = self._managers.dimensions[element] 

834 record = storage.fetch_one(DataCoordinate.standardize(keys, graph=element.graph), context) 

835 records[element.name] = record 

836 if record is not None: 

837 for d in element.implied: 

838 value = getattr(record, d.name) 

839 if keys.setdefault(d.name, value) != value: 

840 raise InconsistentDataIdError( 

841 f"Data ID {standardized} has {d.name}={keys[d.name]!r}, " 

842 f"but {element.name} implies {d.name}={value!r}." 

843 ) 

844 else: 

845 if element in standardized.graph.required: 

846 raise DataIdValueError( 

847 f"Could not fetch record for required dimension {element.name} via keys {keys}." 

848 ) 

849 if element.alwaysJoin: 

850 raise InconsistentDataIdError( 

851 f"Could not fetch record for element {element.name} via keys {keys}, ", 

852 "but it is marked alwaysJoin=True; this means one or more dimensions are not " 

853 "related.", 

854 ) 

855 for d in element.implied: 

856 keys.setdefault(d.name, None) 

857 records.setdefault(d.name, None) 

858 return DataCoordinate.standardize(keys, graph=standardized.graph).expanded(records=records) 

859 

860 def insertDimensionData( 

861 self, 

862 element: Union[DimensionElement, str], 

863 *data: Union[Mapping[str, Any], DimensionRecord], 

864 conform: bool = True, 

865 replace: bool = False, 

866 skip_existing: bool = False, 

867 ) -> None: 

868 # Docstring inherited from lsst.daf.butler.registry.Registry 

869 if conform: 

870 if isinstance(element, str): 

871 element = self.dimensions[element] 

872 records = [ 

873 row if isinstance(row, DimensionRecord) else element.RecordClass(**row) for row in data 

874 ] 

875 else: 

876 # Ignore typing since caller said to trust them with conform=False. 

877 records = data # type: ignore 

878 storage = self._managers.dimensions[element] 

879 storage.insert(*records, replace=replace, skip_existing=skip_existing) 

880 

881 def syncDimensionData( 

882 self, 

883 element: Union[DimensionElement, str], 

884 row: Union[Mapping[str, Any], DimensionRecord], 

885 conform: bool = True, 

886 update: bool = False, 

887 ) -> Union[bool, Dict[str, Any]]: 

888 # Docstring inherited from lsst.daf.butler.registry.Registry 

889 if conform: 

890 if isinstance(element, str): 

891 element = self.dimensions[element] 

892 record = row if isinstance(row, DimensionRecord) else element.RecordClass(**row) 

893 else: 

894 # Ignore typing since caller said to trust them with conform=False. 

895 record = row # type: ignore 

896 storage = self._managers.dimensions[element] 

897 return storage.sync(record, update=update) 

898 

899 def queryDatasetTypes( 

900 self, 

901 expression: Any = ..., 

902 *, 

903 components: Optional[bool] = None, 

904 missing: Optional[List[str]] = None, 

905 ) -> Iterable[DatasetType]: 

906 # Docstring inherited from lsst.daf.butler.registry.Registry 

907 wildcard = DatasetTypeWildcard.from_expression(expression) 

908 composition_dict = self._managers.datasets.resolve_wildcard( 

909 wildcard, 

910 components=components, 

911 missing=missing, 

912 ) 

913 result: list[DatasetType] = [] 

914 for parent_dataset_type, components_for_parent in composition_dict.items(): 

915 result.extend( 

916 parent_dataset_type.makeComponentDatasetType(c) if c is not None else parent_dataset_type 

917 for c in components_for_parent 

918 ) 

919 return result 

920 

921 def queryCollections( 

922 self, 

923 expression: Any = ..., 

924 datasetType: Optional[DatasetType] = None, 

925 collectionTypes: Union[Iterable[CollectionType], CollectionType] = CollectionType.all(), 

926 flattenChains: bool = False, 

927 includeChains: Optional[bool] = None, 

928 ) -> Sequence[str]: 

929 # Docstring inherited from lsst.daf.butler.registry.Registry 

930 

931 # Right now the datasetTypes argument is completely ignored, but that 

932 # is consistent with its [lack of] guarantees. DM-24939 or a follow-up 

933 # ticket will take care of that. 

934 try: 

935 wildcard = CollectionWildcard.from_expression(expression) 

936 except TypeError as exc: 

937 raise CollectionExpressionError(f"Invalid collection expression '{expression}'") from exc 

938 collectionTypes = ensure_iterable(collectionTypes) 

939 return [ 

940 record.name 

941 for record in self._managers.collections.resolve_wildcard( 

942 wildcard, 

943 collection_types=frozenset(collectionTypes), 

944 flatten_chains=flattenChains, 

945 include_chains=includeChains, 

946 ) 

947 ] 

948 

949 def _makeQueryBuilder( 

950 self, 

951 summary: queries.QuerySummary, 

952 doomed_by: Iterable[str] = (), 

953 ) -> queries.QueryBuilder: 

954 """Return a `QueryBuilder` instance capable of constructing and 

955 managing more complex queries than those obtainable via `Registry` 

956 interfaces. 

957 

958 This is an advanced interface; downstream code should prefer 

959 `Registry.queryDataIds` and `Registry.queryDatasets` whenever those 

960 are sufficient. 

961 

962 Parameters 

963 ---------- 

964 summary : `queries.QuerySummary` 

965 Object describing and categorizing the full set of dimensions that 

966 will be included in the query. 

967 doomed_by : `Iterable` of `str`, optional 

968 A list of diagnostic messages that indicate why the query is going 

969 to yield no results and should not even be executed. If an empty 

970 container (default) the query will be executed unless other code 

971 determines that it is doomed. 

972 

973 Returns 

974 ------- 

975 builder : `queries.QueryBuilder` 

976 Object that can be used to construct and perform advanced queries. 

977 """ 

978 doomed_by = list(doomed_by) 

979 backend = queries.SqlQueryBackend(self._db, self._managers) 

980 context = backend.context() 

981 relation: Relation | None = None 

982 if doomed_by: 

983 relation = LeafRelation.make_doomed(context.sql_engine, set(), doomed_by) 

984 return queries.QueryBuilder( 

985 summary, 

986 backend=backend, 

987 context=context, 

988 relation=relation, 

989 ) 

990 

991 def _standardize_query_data_id_args( 

992 self, data_id: DataId | None, *, doomed_by: list[str], **kwargs: Any 

993 ) -> DataCoordinate: 

994 """Preprocess the data ID arguments passed to query* methods. 

995 

996 Parameters 

997 ---------- 

998 data_id : `DataId` or `None` 

999 Data ID that constrains the query results. 

1000 doomed_by : `list` [ `str` ] 

1001 List to append messages indicating why the query is doomed to 

1002 yield no results. 

1003 **kwargs 

1004 Additional data ID key-value pairs, extending and overriding 

1005 ``data_id``. 

1006 

1007 Returns 

1008 ------- 

1009 data_id : `DataCoordinate` 

1010 Standardized data ID. Will be fully expanded unless expansion 

1011 fails, in which case a message will be appended to ``doomed_by`` 

1012 on return. 

1013 """ 

1014 try: 

1015 return self.expandDataId(data_id, **kwargs) 

1016 except DataIdValueError as err: 

1017 doomed_by.append(str(err)) 

1018 return DataCoordinate.standardize( 

1019 data_id, **kwargs, universe=self.dimensions, defaults=self.defaults.dataId 

1020 ) 

1021 

1022 def _standardize_query_dataset_args( 

1023 self, 

1024 datasets: Any, 

1025 collections: Any, 

1026 components: bool | None, 

1027 mode: Literal["find_first"] | Literal["find_all"] | Literal["constrain"] = "constrain", 

1028 *, 

1029 doomed_by: list[str], 

1030 ) -> tuple[dict[DatasetType, list[str | None]], CollectionWildcard | None]: 

1031 """Preprocess dataset arguments passed to query* methods. 

1032 

1033 Parameters 

1034 ---------- 

1035 datasets : `DatasetType`, `str`, `re.Pattern`, or iterable of these 

1036 Expression identifying dataset types. See `queryDatasetTypes` for 

1037 details. 

1038 collections : `str`, `re.Pattern`, or iterable of these 

1039 Expression identifying collections to be searched. See 

1040 `queryCollections` for details. 

1041 components : `bool`, optional 

1042 If `True`, apply all expression patterns to component dataset type 

1043 names as well. If `False`, never apply patterns to components. 

1044 If `None` (default), apply patterns to components only if their 

1045 parent datasets were not matched by the expression. 

1046 Fully-specified component datasets (`str` or `DatasetType` 

1047 instances) are always included. 

1048 

1049 Values other than `False` are deprecated, and only `False` will be 

1050 supported after v26. After v27 this argument will be removed 

1051 entirely. 

1052 mode : `str`, optional 

1053 The way in which datasets are being used in this query; one of: 

1054 

1055 - "find_first": this is a query for the first dataset in an 

1056 ordered list of collections. Prohibits collection wildcards, 

1057 but permits dataset type wildcards. 

1058 

1059 - "find_all": this is a query for all datasets in all matched 

1060 collections. Permits collection and dataset type wildcards. 

1061 

1062 - "constrain": this is a query for something other than datasets, 

1063 with results constrained by dataset existence. Permits 

1064 collection wildcards and prohibits ``...`` as a dataset type 

1065 wildcard. 

1066 doomed_by : `list` [ `str` ] 

1067 List to append messages indicating why the query is doomed to 

1068 yield no results. 

1069 

1070 Returns 

1071 ------- 

1072 composition : `defaultdict` [ `DatasetType`, `list` [ `str` ] ] 

1073 Dictionary mapping parent dataset type to `list` of components 

1074 matched for that dataset type (or `None` for the parent itself). 

1075 collections : `CollectionWildcard` 

1076 Processed collection expression. 

1077 """ 

1078 composition: dict[DatasetType, list[str | None]] = {} 

1079 if datasets is not None: 

1080 if not collections: 

1081 if not self.defaults.collections: 

1082 raise NoDefaultCollectionError("No collections, and no registry default collections.") 

1083 collections = self.defaults.collections 

1084 else: 

1085 collections = CollectionWildcard.from_expression(collections) 

1086 if mode == "find_first" and collections.patterns: 

1087 raise TypeError( 

1088 f"Collection pattern(s) {collections.patterns} not allowed in this context." 

1089 ) 

1090 missing: list[str] = [] 

1091 composition = self._managers.datasets.resolve_wildcard( 

1092 datasets, components=components, missing=missing, explicit_only=(mode == "constrain") 

1093 ) 

1094 if missing and mode == "constrain": 

1095 # After v26 this should raise MissingDatasetTypeError, to be 

1096 # implemented on DM-36303. 

1097 warnings.warn( 

1098 f"Dataset type(s) {missing} are not registered; this will be an error after v26.", 

1099 FutureWarning, 

1100 ) 

1101 doomed_by.extend(f"Dataset type {name} is not registered." for name in missing) 

1102 elif collections: 

1103 raise ArgumentError(f"Cannot pass 'collections' (='{collections}') without 'datasets'.") 

1104 return composition, collections 

1105 

1106 def queryDatasets( 

1107 self, 

1108 datasetType: Any, 

1109 *, 

1110 collections: Any = None, 

1111 dimensions: Optional[Iterable[Union[Dimension, str]]] = None, 

1112 dataId: Optional[DataId] = None, 

1113 where: str = "", 

1114 findFirst: bool = False, 

1115 components: Optional[bool] = None, 

1116 bind: Optional[Mapping[str, Any]] = None, 

1117 check: bool = True, 

1118 **kwargs: Any, 

1119 ) -> queries.DatasetQueryResults: 

1120 # Docstring inherited from lsst.daf.butler.registry.Registry 

1121 doomed_by: list[str] = [] 

1122 data_id = self._standardize_query_data_id_args(dataId, doomed_by=doomed_by, **kwargs) 

1123 dataset_composition, collections = self._standardize_query_dataset_args( 

1124 datasetType, 

1125 collections, 

1126 components, 

1127 mode="find_first" if findFirst else "find_all", 

1128 doomed_by=doomed_by, 

1129 ) 

1130 parent_results: list[queries.ParentDatasetQueryResults] = [] 

1131 for parent_dataset_type, components_for_parent in dataset_composition.items(): 

1132 # The full set of dimensions in the query is the combination of 

1133 # those needed for the DatasetType and those explicitly requested, 

1134 # if any. 

1135 dimension_names = set(parent_dataset_type.dimensions.names) 

1136 if dimensions is not None: 

1137 dimension_names.update(self.dimensions.extract(dimensions).names) 

1138 # Construct the summary structure needed to construct a 

1139 # QueryBuilder. 

1140 summary = queries.QuerySummary( 

1141 requested=DimensionGraph(self.dimensions, names=dimension_names), 

1142 data_id=data_id, 

1143 expression=where, 

1144 bind=bind, 

1145 defaults=self.defaults.dataId, 

1146 check=check, 

1147 datasets=[parent_dataset_type], 

1148 ) 

1149 builder = self._makeQueryBuilder(summary) 

1150 # Add the dataset subquery to the query, telling the QueryBuilder 

1151 # to include the rank of the selected collection in the results 

1152 # only if we need to findFirst. Note that if any of the 

1153 # collections are actually wildcard expressions, and 

1154 # findFirst=True, this will raise TypeError for us. 

1155 builder.joinDataset(parent_dataset_type, collections, isResult=True, findFirst=findFirst) 

1156 query = builder.finish() 

1157 parent_results.append( 

1158 queries.ParentDatasetQueryResults( 

1159 query, parent_dataset_type, components=components_for_parent 

1160 ) 

1161 ) 

1162 if not parent_results: 

1163 doomed_by.extend( 

1164 f"No registered dataset type matching {t!r} found, so no matching datasets can " 

1165 "exist in any collection." 

1166 for t in ensure_iterable(datasetType) 

1167 ) 

1168 return queries.ChainedDatasetQueryResults([], doomed_by=doomed_by) 

1169 elif len(parent_results) == 1: 

1170 return parent_results[0] 

1171 else: 

1172 return queries.ChainedDatasetQueryResults(parent_results) 

1173 

1174 def queryDataIds( 

1175 self, 

1176 dimensions: Union[Iterable[Union[Dimension, str]], Dimension, str], 

1177 *, 

1178 dataId: Optional[DataId] = None, 

1179 datasets: Any = None, 

1180 collections: Any = None, 

1181 where: str = "", 

1182 components: Optional[bool] = None, 

1183 bind: Optional[Mapping[str, Any]] = None, 

1184 check: bool = True, 

1185 **kwargs: Any, 

1186 ) -> queries.DataCoordinateQueryResults: 

1187 # Docstring inherited from lsst.daf.butler.registry.Registry 

1188 dimensions = ensure_iterable(dimensions) 

1189 requestedDimensions = self.dimensions.extract(dimensions) 

1190 doomed_by: list[str] = [] 

1191 data_id = self._standardize_query_data_id_args(dataId, doomed_by=doomed_by, **kwargs) 

1192 dataset_composition, collections = self._standardize_query_dataset_args( 

1193 datasets, collections, components, doomed_by=doomed_by 

1194 ) 

1195 summary = queries.QuerySummary( 

1196 requested=requestedDimensions, 

1197 data_id=data_id, 

1198 expression=where, 

1199 bind=bind, 

1200 defaults=self.defaults.dataId, 

1201 check=check, 

1202 datasets=dataset_composition.keys(), 

1203 ) 

1204 builder = self._makeQueryBuilder(summary, doomed_by=doomed_by) 

1205 for datasetType in dataset_composition.keys(): 

1206 builder.joinDataset(datasetType, collections, isResult=False) 

1207 query = builder.finish() 

1208 

1209 return queries.DataCoordinateQueryResults(query) 

1210 

1211 def queryDimensionRecords( 

1212 self, 

1213 element: Union[DimensionElement, str], 

1214 *, 

1215 dataId: Optional[DataId] = None, 

1216 datasets: Any = None, 

1217 collections: Any = None, 

1218 where: str = "", 

1219 components: Optional[bool] = None, 

1220 bind: Optional[Mapping[str, Any]] = None, 

1221 check: bool = True, 

1222 **kwargs: Any, 

1223 ) -> queries.DimensionRecordQueryResults: 

1224 # Docstring inherited from lsst.daf.butler.registry.Registry 

1225 if not isinstance(element, DimensionElement): 

1226 try: 

1227 element = self.dimensions[element] 

1228 except KeyError as e: 

1229 raise DimensionNameError( 

1230 f"No such dimension '{element}', available dimensions: " 

1231 + str(self.dimensions.getStaticElements()) 

1232 ) from e 

1233 doomed_by: list[str] = [] 

1234 data_id = self._standardize_query_data_id_args(dataId, doomed_by=doomed_by, **kwargs) 

1235 dataset_composition, collections = self._standardize_query_dataset_args( 

1236 datasets, collections, components, doomed_by=doomed_by 

1237 ) 

1238 summary = queries.QuerySummary( 

1239 requested=element.graph, 

1240 data_id=data_id, 

1241 expression=where, 

1242 bind=bind, 

1243 defaults=self.defaults.dataId, 

1244 check=check, 

1245 datasets=dataset_composition.keys(), 

1246 ) 

1247 builder = self._makeQueryBuilder(summary, doomed_by=doomed_by) 

1248 for datasetType in dataset_composition.keys(): 

1249 builder.joinDataset(datasetType, collections, isResult=False) 

1250 query = builder.finish().with_record_columns(element) 

1251 return queries.DatabaseDimensionRecordQueryResults(query, element) 

1252 

1253 def queryDatasetAssociations( 

1254 self, 

1255 datasetType: Union[str, DatasetType], 

1256 collections: Any = ..., 

1257 *, 

1258 collectionTypes: Iterable[CollectionType] = CollectionType.all(), 

1259 flattenChains: bool = False, 

1260 ) -> Iterator[DatasetAssociation]: 

1261 # Docstring inherited from lsst.daf.butler.registry.Registry 

1262 if collections is None: 

1263 if not self.defaults.collections: 

1264 raise NoDefaultCollectionError( 

1265 "No collections provided to queryDatasetAssociations, " 

1266 "and no defaults from registry construction." 

1267 ) 

1268 collections = self.defaults.collections 

1269 collections = CollectionWildcard.from_expression(collections) 

1270 backend = queries.SqlQueryBackend(self._db, self._managers) 

1271 parent_dataset_type, _ = backend.resolve_single_dataset_type_wildcard(datasetType, components=False) 

1272 timespan_tag = DatasetColumnTag(parent_dataset_type.name, "timespan") 

1273 collection_tag = DatasetColumnTag(parent_dataset_type.name, "collection") 

1274 for parent_collection_record in backend.resolve_collection_wildcard( 

1275 collections, 

1276 collection_types=frozenset(collectionTypes), 

1277 flatten_chains=flattenChains, 

1278 ): 

1279 # Resolve this possibly-chained collection into a list of 

1280 # non-CHAINED collections that actually hold datasets of this 

1281 # type. 

1282 candidate_collection_records = backend.resolve_dataset_collections( 

1283 parent_dataset_type, 

1284 CollectionWildcard.from_names([parent_collection_record.name]), 

1285 allow_calibration_collections=True, 

1286 governor_constraints={}, 

1287 ) 

1288 if not candidate_collection_records: 

1289 continue 

1290 with backend.context() as context: 

1291 relation = backend.make_dataset_query_relation( 

1292 parent_dataset_type, 

1293 candidate_collection_records, 

1294 columns={"dataset_id", "run", "timespan", "collection"}, 

1295 context=context, 

1296 ) 

1297 reader = queries.DatasetRefReader( 

1298 parent_dataset_type, 

1299 translate_collection=lambda k: self._managers.collections[k].name, 

1300 full=False, 

1301 ) 

1302 for row in context.fetch_iterable(relation): 

1303 ref = reader.read(row) 

1304 collection_record = self._managers.collections[row[collection_tag]] 

1305 if collection_record.type is CollectionType.CALIBRATION: 

1306 timespan = row[timespan_tag] 

1307 else: 

1308 # For backwards compatibility and (possibly?) user 

1309 # convenience we continue to define the timespan of a 

1310 # DatasetAssociation row for a non-CALIBRATION 

1311 # collection to be None rather than a fully unbounded 

1312 # timespan. 

1313 timespan = None 

1314 yield DatasetAssociation(ref=ref, collection=collection_record.name, timespan=timespan) 

1315 

1316 storageClasses: StorageClassFactory 

1317 """All storage classes known to the registry (`StorageClassFactory`). 

1318 """