Coverage for python/lsst/daf/butler/registries/sql.py: 12%

513 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-06-15 09:13 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ("SqlRegistry",) 

25 

26import contextlib 

27import logging 

28import warnings 

29from collections.abc import Iterable, Iterator, Mapping, Sequence 

30from typing import TYPE_CHECKING, Any, Literal, cast 

31 

32import sqlalchemy 

33from lsst.daf.relation import LeafRelation, Relation 

34from lsst.resources import ResourcePathExpression 

35from lsst.utils.iteration import ensure_iterable 

36 

37from ..core import ( 

38 Config, 

39 DataCoordinate, 

40 DataId, 

41 DatasetAssociation, 

42 DatasetColumnTag, 

43 DatasetId, 

44 DatasetIdFactory, 

45 DatasetIdGenEnum, 

46 DatasetRef, 

47 DatasetType, 

48 Dimension, 

49 DimensionConfig, 

50 DimensionElement, 

51 DimensionGraph, 

52 DimensionRecord, 

53 DimensionUniverse, 

54 NamedKeyMapping, 

55 NameLookupMapping, 

56 Progress, 

57 StorageClassFactory, 

58 Timespan, 

59 ddl, 

60) 

61from ..core.utils import transactional 

62from ..registry import ( 

63 ArgumentError, 

64 CollectionExpressionError, 

65 CollectionSummary, 

66 CollectionType, 

67 CollectionTypeError, 

68 ConflictingDefinitionError, 

69 DataIdValueError, 

70 DatasetTypeError, 

71 DimensionNameError, 

72 InconsistentDataIdError, 

73 NoDefaultCollectionError, 

74 OrphanedRecordError, 

75 Registry, 

76 RegistryConfig, 

77 RegistryConsistencyError, 

78 RegistryDefaults, 

79 queries, 

80) 

81from ..registry.interfaces import ChainedCollectionRecord, RunRecord 

82from ..registry.managers import RegistryManagerInstances, RegistryManagerTypes 

83from ..registry.wildcards import CollectionWildcard, DatasetTypeWildcard 

84 

85if TYPE_CHECKING: 

86 from .._butlerConfig import ButlerConfig 

87 from ..registry._registry import CollectionArgType 

88 from ..registry.interfaces import ( 

89 CollectionRecord, 

90 Database, 

91 DatastoreRegistryBridgeManager, 

92 ObsCoreTableManager, 

93 ) 

94 

95 

96_LOG = logging.getLogger(__name__) 

97 

98 

99class SqlRegistry(Registry): 

100 """Registry implementation based on SQLAlchemy. 

101 

102 Parameters 

103 ---------- 

104 database : `Database` 

105 Database instance to store Registry. 

106 defaults : `RegistryDefaults` 

107 Default collection search path and/or output `~CollectionType.RUN` 

108 collection. 

109 managers : `RegistryManagerInstances` 

110 All the managers required for this registry. 

111 """ 

112 

113 defaultConfigFile: str | None = None 

114 """Path to configuration defaults. Accessed within the ``configs`` resource 

115 or relative to a search path. Can be None if no defaults specified. 

116 """ 

117 

118 @classmethod 

119 def createFromConfig( 

120 cls, 

121 config: RegistryConfig | str | None = None, 

122 dimensionConfig: DimensionConfig | str | None = None, 

123 butlerRoot: ResourcePathExpression | None = None, 

124 ) -> Registry: 

125 """Create registry database and return `SqlRegistry` instance. 

126 

127 This method initializes database contents, database must be empty 

128 prior to calling this method. 

129 

130 Parameters 

131 ---------- 

132 config : `RegistryConfig` or `str`, optional 

133 Registry configuration, if missing then default configuration will 

134 be loaded from registry.yaml. 

135 dimensionConfig : `DimensionConfig` or `str`, optional 

136 Dimensions configuration, if missing then default configuration 

137 will be loaded from dimensions.yaml. 

138 butlerRoot : convertible to `lsst.resources.ResourcePath`, optional 

139 Path to the repository root this `SqlRegistry` will manage. 

140 

141 Returns 

142 ------- 

143 registry : `SqlRegistry` 

144 A new `SqlRegistry` instance. 

145 """ 

146 config = cls.forceRegistryConfig(config) 

147 config.replaceRoot(butlerRoot) 

148 

149 if isinstance(dimensionConfig, str): 

150 dimensionConfig = DimensionConfig(dimensionConfig) 

151 elif dimensionConfig is None: 

152 dimensionConfig = DimensionConfig() 

153 elif not isinstance(dimensionConfig, DimensionConfig): 

154 raise TypeError(f"Incompatible Dimension configuration type: {type(dimensionConfig)}") 

155 

156 DatabaseClass = config.getDatabaseClass() 

157 database = DatabaseClass.fromUri( 

158 config.connectionString, origin=config.get("origin", 0), namespace=config.get("namespace") 

159 ) 

160 managerTypes = RegistryManagerTypes.fromConfig(config) 

161 managers = managerTypes.makeRepo(database, dimensionConfig) 

162 return cls(database, RegistryDefaults(), managers) 

163 

164 @classmethod 

165 def fromConfig( 

166 cls, 

167 config: ButlerConfig | RegistryConfig | Config | str, 

168 butlerRoot: ResourcePathExpression | None = None, 

169 writeable: bool = True, 

170 defaults: RegistryDefaults | None = None, 

171 ) -> Registry: 

172 """Create `Registry` subclass instance from `config`. 

173 

174 Registry database must be initialized prior to calling this method. 

175 

176 Parameters 

177 ---------- 

178 config : `ButlerConfig`, `RegistryConfig`, `Config` or `str` 

179 Registry configuration 

180 butlerRoot : `lsst.resources.ResourcePathExpression`, optional 

181 Path to the repository root this `Registry` will manage. 

182 writeable : `bool`, optional 

183 If `True` (default) create a read-write connection to the database. 

184 defaults : `RegistryDefaults`, optional 

185 Default collection search path and/or output `~CollectionType.RUN` 

186 collection. 

187 

188 Returns 

189 ------- 

190 registry : `SqlRegistry` (subclass) 

191 A new `SqlRegistry` subclass instance. 

192 """ 

193 config = cls.forceRegistryConfig(config) 

194 config.replaceRoot(butlerRoot) 

195 DatabaseClass = config.getDatabaseClass() 

196 database = DatabaseClass.fromUri( 

197 config.connectionString, 

198 origin=config.get("origin", 0), 

199 namespace=config.get("namespace"), 

200 writeable=writeable, 

201 ) 

202 managerTypes = RegistryManagerTypes.fromConfig(config) 

203 with database.session(): 

204 managers = managerTypes.loadRepo(database) 

205 if defaults is None: 

206 defaults = RegistryDefaults() 

207 return cls(database, defaults, managers) 

208 

209 def __init__(self, database: Database, defaults: RegistryDefaults, managers: RegistryManagerInstances): 

210 self._db = database 

211 self._managers = managers 

212 self.storageClasses = StorageClassFactory() 

213 # Intentionally invoke property setter to initialize defaults. This 

214 # can only be done after most of the rest of Registry has already been 

215 # initialized, and must be done before the property getter is used. 

216 self.defaults = defaults 

217 # In the future DatasetIdFactory may become configurable and this 

218 # instance will need to be shared with datasets manager. 

219 self.datasetIdFactory = DatasetIdFactory() 

220 

221 def __str__(self) -> str: 

222 return str(self._db) 

223 

224 def __repr__(self) -> str: 

225 return f"SqlRegistry({self._db!r}, {self.dimensions!r})" 

226 

227 def isWriteable(self) -> bool: 

228 # Docstring inherited from lsst.daf.butler.registry.Registry 

229 return self._db.isWriteable() 

230 

231 def copy(self, defaults: RegistryDefaults | None = None) -> Registry: 

232 # Docstring inherited from lsst.daf.butler.registry.Registry 

233 if defaults is None: 

234 # No need to copy, because `RegistryDefaults` is immutable; we 

235 # effectively copy on write. 

236 defaults = self.defaults 

237 return type(self)(self._db, defaults, self._managers) 

238 

239 @property 

240 def dimensions(self) -> DimensionUniverse: 

241 # Docstring inherited from lsst.daf.butler.registry.Registry 

242 return self._managers.dimensions.universe 

243 

244 def refresh(self) -> None: 

245 # Docstring inherited from lsst.daf.butler.registry.Registry 

246 with self._db.transaction(): 

247 self._managers.refresh() 

248 

249 @contextlib.contextmanager 

250 def transaction(self, *, savepoint: bool = False) -> Iterator[None]: 

251 # Docstring inherited from lsst.daf.butler.registry.Registry 

252 try: 

253 with self._db.transaction(savepoint=savepoint): 

254 yield 

255 except BaseException: 

256 # TODO: this clears the caches sometimes when we wouldn't actually 

257 # need to. Can we avoid that? 

258 self._managers.dimensions.clearCaches() 

259 raise 

260 

261 def resetConnectionPool(self) -> None: 

262 """Reset SQLAlchemy connection pool for `SqlRegistry` database. 

263 

264 This operation is useful when using registry with fork-based 

265 multiprocessing. To use registry across fork boundary one has to make 

266 sure that there are no currently active connections (no session or 

267 transaction is in progress) and connection pool is reset using this 

268 method. This method should be called by the child process immediately 

269 after the fork. 

270 """ 

271 self._db._engine.dispose() 

272 

273 def registerOpaqueTable(self, tableName: str, spec: ddl.TableSpec) -> None: 

274 """Add an opaque (to the `Registry`) table for use by a `Datastore` or 

275 other data repository client. 

276 

277 Opaque table records can be added via `insertOpaqueData`, retrieved via 

278 `fetchOpaqueData`, and removed via `deleteOpaqueData`. 

279 

280 Parameters 

281 ---------- 

282 tableName : `str` 

283 Logical name of the opaque table. This may differ from the 

284 actual name used in the database by a prefix and/or suffix. 

285 spec : `ddl.TableSpec` 

286 Specification for the table to be added. 

287 """ 

288 self._managers.opaque.register(tableName, spec) 

289 

290 @transactional 

291 def insertOpaqueData(self, tableName: str, *data: dict) -> None: 

292 """Insert records into an opaque table. 

293 

294 Parameters 

295 ---------- 

296 tableName : `str` 

297 Logical name of the opaque table. Must match the name used in a 

298 previous call to `registerOpaqueTable`. 

299 data 

300 Each additional positional argument is a dictionary that represents 

301 a single row to be added. 

302 """ 

303 self._managers.opaque[tableName].insert(*data) 

304 

305 def fetchOpaqueData(self, tableName: str, **where: Any) -> Iterator[Mapping[str, Any]]: 

306 """Retrieve records from an opaque table. 

307 

308 Parameters 

309 ---------- 

310 tableName : `str` 

311 Logical name of the opaque table. Must match the name used in a 

312 previous call to `registerOpaqueTable`. 

313 where 

314 Additional keyword arguments are interpreted as equality 

315 constraints that restrict the returned rows (combined with AND); 

316 keyword arguments are column names and values are the values they 

317 must have. 

318 

319 Yields 

320 ------ 

321 row : `dict` 

322 A dictionary representing a single result row. 

323 """ 

324 yield from self._managers.opaque[tableName].fetch(**where) 

325 

326 @transactional 

327 def deleteOpaqueData(self, tableName: str, **where: Any) -> None: 

328 """Remove records from an opaque table. 

329 

330 Parameters 

331 ---------- 

332 tableName : `str` 

333 Logical name of the opaque table. Must match the name used in a 

334 previous call to `registerOpaqueTable`. 

335 where 

336 Additional keyword arguments are interpreted as equality 

337 constraints that restrict the deleted rows (combined with AND); 

338 keyword arguments are column names and values are the values they 

339 must have. 

340 """ 

341 self._managers.opaque[tableName].delete(where.keys(), where) 

342 

343 def registerCollection( 

344 self, name: str, type: CollectionType = CollectionType.TAGGED, doc: str | None = None 

345 ) -> bool: 

346 # Docstring inherited from lsst.daf.butler.registry.Registry 

347 _, registered = self._managers.collections.register(name, type, doc=doc) 

348 return registered 

349 

350 def getCollectionType(self, name: str) -> CollectionType: 

351 # Docstring inherited from lsst.daf.butler.registry.Registry 

352 return self._managers.collections.find(name).type 

353 

354 def _get_collection_record(self, name: str) -> CollectionRecord: 

355 # Docstring inherited from lsst.daf.butler.registry.Registry 

356 return self._managers.collections.find(name) 

357 

358 def registerRun(self, name: str, doc: str | None = None) -> bool: 

359 # Docstring inherited from lsst.daf.butler.registry.Registry 

360 _, registered = self._managers.collections.register(name, CollectionType.RUN, doc=doc) 

361 return registered 

362 

363 @transactional 

364 def removeCollection(self, name: str) -> None: 

365 # Docstring inherited from lsst.daf.butler.registry.Registry 

366 self._managers.collections.remove(name) 

367 

368 def getCollectionChain(self, parent: str) -> tuple[str, ...]: 

369 # Docstring inherited from lsst.daf.butler.registry.Registry 

370 record = self._managers.collections.find(parent) 

371 if record.type is not CollectionType.CHAINED: 

372 raise CollectionTypeError(f"Collection '{parent}' has type {record.type.name}, not CHAINED.") 

373 assert isinstance(record, ChainedCollectionRecord) 

374 return record.children 

375 

376 @transactional 

377 def setCollectionChain(self, parent: str, children: Any, *, flatten: bool = False) -> None: 

378 # Docstring inherited from lsst.daf.butler.registry.Registry 

379 record = self._managers.collections.find(parent) 

380 if record.type is not CollectionType.CHAINED: 

381 raise CollectionTypeError(f"Collection '{parent}' has type {record.type.name}, not CHAINED.") 

382 assert isinstance(record, ChainedCollectionRecord) 

383 children = CollectionWildcard.from_expression(children).require_ordered() 

384 if children != record.children or flatten: 

385 record.update(self._managers.collections, children, flatten=flatten) 

386 

387 def getCollectionParentChains(self, collection: str) -> set[str]: 

388 # Docstring inherited from lsst.daf.butler.registry.Registry 

389 return { 

390 record.name 

391 for record in self._managers.collections.getParentChains( 

392 self._managers.collections.find(collection).key 

393 ) 

394 } 

395 

396 def getCollectionDocumentation(self, collection: str) -> str | None: 

397 # Docstring inherited from lsst.daf.butler.registry.Registry 

398 return self._managers.collections.getDocumentation(self._managers.collections.find(collection).key) 

399 

400 def setCollectionDocumentation(self, collection: str, doc: str | None) -> None: 

401 # Docstring inherited from lsst.daf.butler.registry.Registry 

402 self._managers.collections.setDocumentation(self._managers.collections.find(collection).key, doc) 

403 

404 def getCollectionSummary(self, collection: str) -> CollectionSummary: 

405 # Docstring inherited from lsst.daf.butler.registry.Registry 

406 record = self._managers.collections.find(collection) 

407 return self._managers.datasets.getCollectionSummary(record) 

408 

409 def registerDatasetType(self, datasetType: DatasetType) -> bool: 

410 # Docstring inherited from lsst.daf.butler.registry.Registry 

411 _, inserted = self._managers.datasets.register(datasetType) 

412 return inserted 

413 

414 def removeDatasetType(self, name: str | tuple[str, ...]) -> None: 

415 # Docstring inherited from lsst.daf.butler.registry.Registry 

416 

417 for datasetTypeExpression in ensure_iterable(name): 

418 datasetTypes = list(self.queryDatasetTypes(datasetTypeExpression)) 

419 if not datasetTypes: 

420 _LOG.info("Dataset type %r not defined", datasetTypeExpression) 

421 else: 

422 for datasetType in datasetTypes: 

423 self._managers.datasets.remove(datasetType.name) 

424 _LOG.info("Removed dataset type %r", datasetType.name) 

425 

426 def getDatasetType(self, name: str) -> DatasetType: 

427 # Docstring inherited from lsst.daf.butler.registry.Registry 

428 parent_name, component = DatasetType.splitDatasetTypeName(name) 

429 storage = self._managers.datasets[parent_name] 

430 if component is None: 

431 return storage.datasetType 

432 else: 

433 return storage.datasetType.makeComponentDatasetType(component) 

434 

435 def supportsIdGenerationMode(self, mode: DatasetIdGenEnum) -> bool: 

436 # Docstring inherited from lsst.daf.butler.registry.Registry 

437 return self._managers.datasets.supportsIdGenerationMode(mode) 

438 

439 def findDataset( 

440 self, 

441 datasetType: DatasetType | str, 

442 dataId: DataId | None = None, 

443 *, 

444 collections: CollectionArgType | None = None, 

445 timespan: Timespan | None = None, 

446 **kwargs: Any, 

447 ) -> DatasetRef | None: 

448 # Docstring inherited from lsst.daf.butler.registry.Registry 

449 if collections is None: 

450 if not self.defaults.collections: 

451 raise NoDefaultCollectionError( 

452 "No collections provided to findDataset, and no defaults from registry construction." 

453 ) 

454 collections = self.defaults.collections 

455 backend = queries.SqlQueryBackend(self._db, self._managers) 

456 collection_wildcard = CollectionWildcard.from_expression(collections, require_ordered=True) 

457 if collection_wildcard.empty(): 

458 return None 

459 matched_collections = backend.resolve_collection_wildcard(collection_wildcard) 

460 parent_dataset_type, components = backend.resolve_single_dataset_type_wildcard( 

461 datasetType, components_deprecated=False 

462 ) 

463 if len(components) > 1: 

464 raise DatasetTypeError( 

465 f"findDataset requires exactly one dataset type; got multiple components {components} " 

466 f"for parent dataset type {parent_dataset_type.name}." 

467 ) 

468 component = components[0] 

469 dataId = DataCoordinate.standardize( 

470 dataId, 

471 graph=parent_dataset_type.dimensions, 

472 universe=self.dimensions, 

473 defaults=self.defaults.dataId, 

474 **kwargs, 

475 ) 

476 governor_constraints = {name: {cast(str, dataId[name])} for name in dataId.graph.governors.names} 

477 (filtered_collections,) = backend.filter_dataset_collections( 

478 [parent_dataset_type], 

479 matched_collections, 

480 governor_constraints=governor_constraints, 

481 ).values() 

482 if not filtered_collections: 

483 return None 

484 if timespan is None: 

485 filtered_collections = [ 

486 collection_record 

487 for collection_record in filtered_collections 

488 if collection_record.type is not CollectionType.CALIBRATION 

489 ] 

490 if filtered_collections: 

491 requested_columns = {"dataset_id", "run", "collection"} 

492 with backend.context() as context: 

493 predicate = context.make_data_coordinate_predicate( 

494 dataId.subset(parent_dataset_type.dimensions), full=False 

495 ) 

496 if timespan is not None: 

497 requested_columns.add("timespan") 

498 predicate = predicate.logical_and( 

499 context.make_timespan_overlap_predicate( 

500 DatasetColumnTag(parent_dataset_type.name, "timespan"), timespan 

501 ) 

502 ) 

503 relation = backend.make_dataset_query_relation( 

504 parent_dataset_type, filtered_collections, requested_columns, context 

505 ).with_rows_satisfying(predicate) 

506 rows = list(context.fetch_iterable(relation)) 

507 else: 

508 rows = [] 

509 if not rows: 

510 return None 

511 elif len(rows) == 1: 

512 best_row = rows[0] 

513 else: 

514 rank_by_collection_key = {record.key: n for n, record in enumerate(filtered_collections)} 

515 collection_tag = DatasetColumnTag(parent_dataset_type.name, "collection") 

516 row_iter = iter(rows) 

517 best_row = next(row_iter) 

518 best_rank = rank_by_collection_key[best_row[collection_tag]] 

519 have_tie = False 

520 for row in row_iter: 

521 if (rank := rank_by_collection_key[row[collection_tag]]) < best_rank: 

522 best_row = row 

523 best_rank = rank 

524 have_tie = False 

525 elif rank == best_rank: 

526 have_tie = True 

527 assert timespan is not None, "Rank ties should be impossible given DB constraints." 

528 if have_tie: 

529 raise LookupError( 

530 f"Ambiguous calibration lookup for {parent_dataset_type.name} in collections " 

531 f"{collection_wildcard.strings} with timespan {timespan}." 

532 ) 

533 reader = queries.DatasetRefReader( 

534 parent_dataset_type, 

535 translate_collection=lambda k: self._managers.collections[k].name, 

536 ) 

537 ref = reader.read(best_row, data_id=dataId) 

538 if component is not None: 

539 ref = ref.makeComponentRef(component) 

540 return ref 

541 

542 @transactional 

543 def insertDatasets( 

544 self, 

545 datasetType: DatasetType | str, 

546 dataIds: Iterable[DataId], 

547 run: str | None = None, 

548 expand: bool = True, 

549 idGenerationMode: DatasetIdGenEnum = DatasetIdGenEnum.UNIQUE, 

550 ) -> list[DatasetRef]: 

551 # Docstring inherited from lsst.daf.butler.registry.Registry 

552 if isinstance(datasetType, DatasetType): 

553 storage = self._managers.datasets.find(datasetType.name) 

554 if storage is None: 

555 raise DatasetTypeError(f"DatasetType '{datasetType}' has not been registered.") 

556 else: 

557 storage = self._managers.datasets.find(datasetType) 

558 if storage is None: 

559 raise DatasetTypeError(f"DatasetType with name '{datasetType}' has not been registered.") 

560 if run is None: 

561 if self.defaults.run is None: 

562 raise NoDefaultCollectionError( 

563 "No run provided to insertDatasets, and no default from registry construction." 

564 ) 

565 run = self.defaults.run 

566 runRecord = self._managers.collections.find(run) 

567 if runRecord.type is not CollectionType.RUN: 

568 raise CollectionTypeError( 

569 f"Given collection is of type {runRecord.type.name}; RUN collection required." 

570 ) 

571 assert isinstance(runRecord, RunRecord) 

572 progress = Progress("daf.butler.Registry.insertDatasets", level=logging.DEBUG) 

573 if expand: 

574 expandedDataIds = [ 

575 self.expandDataId(dataId, graph=storage.datasetType.dimensions) 

576 for dataId in progress.wrap(dataIds, f"Expanding {storage.datasetType.name} data IDs") 

577 ] 

578 else: 

579 expandedDataIds = [ 

580 DataCoordinate.standardize(dataId, graph=storage.datasetType.dimensions) for dataId in dataIds 

581 ] 

582 try: 

583 refs = list(storage.insert(runRecord, expandedDataIds, idGenerationMode)) 

584 if self._managers.obscore: 

585 context = queries.SqlQueryContext(self._db, self._managers.column_types) 

586 self._managers.obscore.add_datasets(refs, context) 

587 except sqlalchemy.exc.IntegrityError as err: 

588 raise ConflictingDefinitionError( 

589 "A database constraint failure was triggered by inserting " 

590 f"one or more datasets of type {storage.datasetType} into " 

591 f"collection '{run}'. " 

592 "This probably means a dataset with the same data ID " 

593 "and dataset type already exists, but it may also mean a " 

594 "dimension row is missing." 

595 ) from err 

596 return refs 

597 

598 @transactional 

599 def _importDatasets( 

600 self, 

601 datasets: Iterable[DatasetRef], 

602 expand: bool = True, 

603 ) -> list[DatasetRef]: 

604 # Docstring inherited from lsst.daf.butler.registry.Registry 

605 datasets = list(datasets) 

606 if not datasets: 

607 # nothing to do 

608 return [] 

609 

610 # find dataset type 

611 datasetTypes = {dataset.datasetType for dataset in datasets} 

612 if len(datasetTypes) != 1: 

613 raise DatasetTypeError(f"Multiple dataset types in input datasets: {datasetTypes}") 

614 datasetType = datasetTypes.pop() 

615 

616 # get storage handler for this dataset type 

617 storage = self._managers.datasets.find(datasetType.name) 

618 if storage is None: 

619 raise DatasetTypeError(f"DatasetType '{datasetType}' has not been registered.") 

620 

621 # find run name 

622 runs = {dataset.run for dataset in datasets} 

623 if len(runs) != 1: 

624 raise ValueError(f"Multiple run names in input datasets: {runs}") 

625 run = runs.pop() 

626 

627 runRecord = self._managers.collections.find(run) 

628 if runRecord.type is not CollectionType.RUN: 

629 raise CollectionTypeError( 

630 f"Given collection '{runRecord.name}' is of type {runRecord.type.name};" 

631 " RUN collection required." 

632 ) 

633 assert isinstance(runRecord, RunRecord) 

634 

635 progress = Progress("daf.butler.Registry.insertDatasets", level=logging.DEBUG) 

636 if expand: 

637 expandedDatasets = [ 

638 dataset.expanded(self.expandDataId(dataset.dataId, graph=storage.datasetType.dimensions)) 

639 for dataset in progress.wrap(datasets, f"Expanding {storage.datasetType.name} data IDs") 

640 ] 

641 else: 

642 expandedDatasets = [ 

643 DatasetRef(datasetType, dataset.dataId, id=dataset.id, run=dataset.run, conform=True) 

644 for dataset in datasets 

645 ] 

646 

647 try: 

648 refs = list(storage.import_(runRecord, expandedDatasets)) 

649 if self._managers.obscore: 

650 context = queries.SqlQueryContext(self._db, self._managers.column_types) 

651 self._managers.obscore.add_datasets(refs, context) 

652 except sqlalchemy.exc.IntegrityError as err: 

653 raise ConflictingDefinitionError( 

654 "A database constraint failure was triggered by inserting " 

655 f"one or more datasets of type {storage.datasetType} into " 

656 f"collection '{run}'. " 

657 "This probably means a dataset with the same data ID " 

658 "and dataset type already exists, but it may also mean a " 

659 "dimension row is missing." 

660 ) from err 

661 # Check that imported dataset IDs match the input 

662 for imported_ref, input_ref in zip(refs, datasets): 

663 if imported_ref.id != input_ref.id: 

664 raise RegistryConsistencyError( 

665 "Imported dataset ID differs from input dataset ID, " 

666 f"input ref: {input_ref}, imported ref: {imported_ref}" 

667 ) 

668 return refs 

669 

670 def getDataset(self, id: DatasetId) -> DatasetRef | None: 

671 # Docstring inherited from lsst.daf.butler.registry.Registry 

672 return self._managers.datasets.getDatasetRef(id) 

673 

674 @transactional 

675 def removeDatasets(self, refs: Iterable[DatasetRef]) -> None: 

676 # Docstring inherited from lsst.daf.butler.registry.Registry 

677 progress = Progress("lsst.daf.butler.Registry.removeDatasets", level=logging.DEBUG) 

678 for datasetType, refsForType in progress.iter_item_chunks( 

679 DatasetRef.groupByType(refs).items(), desc="Removing datasets by type" 

680 ): 

681 storage = self._managers.datasets[datasetType.name] 

682 try: 

683 storage.delete(refsForType) 

684 except sqlalchemy.exc.IntegrityError as err: 

685 raise OrphanedRecordError( 

686 "One or more datasets is still present in one or more Datastores." 

687 ) from err 

688 

689 @transactional 

690 def associate(self, collection: str, refs: Iterable[DatasetRef]) -> None: 

691 # Docstring inherited from lsst.daf.butler.registry.Registry 

692 progress = Progress("lsst.daf.butler.Registry.associate", level=logging.DEBUG) 

693 collectionRecord = self._managers.collections.find(collection) 

694 if collectionRecord.type is not CollectionType.TAGGED: 

695 raise CollectionTypeError( 

696 f"Collection '{collection}' has type {collectionRecord.type.name}, not TAGGED." 

697 ) 

698 for datasetType, refsForType in progress.iter_item_chunks( 

699 DatasetRef.groupByType(refs).items(), desc="Associating datasets by type" 

700 ): 

701 storage = self._managers.datasets[datasetType.name] 

702 try: 

703 storage.associate(collectionRecord, refsForType) 

704 if self._managers.obscore: 

705 # If a TAGGED collection is being monitored by ObsCore 

706 # manager then we may need to save the dataset. 

707 context = queries.SqlQueryContext(self._db, self._managers.column_types) 

708 self._managers.obscore.associate(refsForType, collectionRecord, context) 

709 except sqlalchemy.exc.IntegrityError as err: 

710 raise ConflictingDefinitionError( 

711 f"Constraint violation while associating dataset of type {datasetType.name} with " 

712 f"collection {collection}. This probably means that one or more datasets with the same " 

713 "dataset type and data ID already exist in the collection, but it may also indicate " 

714 "that the datasets do not exist." 

715 ) from err 

716 

717 @transactional 

718 def disassociate(self, collection: str, refs: Iterable[DatasetRef]) -> None: 

719 # Docstring inherited from lsst.daf.butler.registry.Registry 

720 progress = Progress("lsst.daf.butler.Registry.disassociate", level=logging.DEBUG) 

721 collectionRecord = self._managers.collections.find(collection) 

722 if collectionRecord.type is not CollectionType.TAGGED: 

723 raise CollectionTypeError( 

724 f"Collection '{collection}' has type {collectionRecord.type.name}; expected TAGGED." 

725 ) 

726 for datasetType, refsForType in progress.iter_item_chunks( 

727 DatasetRef.groupByType(refs).items(), desc="Disassociating datasets by type" 

728 ): 

729 storage = self._managers.datasets[datasetType.name] 

730 storage.disassociate(collectionRecord, refsForType) 

731 if self._managers.obscore: 

732 self._managers.obscore.disassociate(refsForType, collectionRecord) 

733 

734 @transactional 

735 def certify(self, collection: str, refs: Iterable[DatasetRef], timespan: Timespan) -> None: 

736 # Docstring inherited from lsst.daf.butler.registry.Registry 

737 progress = Progress("lsst.daf.butler.Registry.certify", level=logging.DEBUG) 

738 collectionRecord = self._managers.collections.find(collection) 

739 for datasetType, refsForType in progress.iter_item_chunks( 

740 DatasetRef.groupByType(refs).items(), desc="Certifying datasets by type" 

741 ): 

742 storage = self._managers.datasets[datasetType.name] 

743 storage.certify( 

744 collectionRecord, 

745 refsForType, 

746 timespan, 

747 context=queries.SqlQueryContext(self._db, self._managers.column_types), 

748 ) 

749 

750 @transactional 

751 def decertify( 

752 self, 

753 collection: str, 

754 datasetType: str | DatasetType, 

755 timespan: Timespan, 

756 *, 

757 dataIds: Iterable[DataId] | None = None, 

758 ) -> None: 

759 # Docstring inherited from lsst.daf.butler.registry.Registry 

760 collectionRecord = self._managers.collections.find(collection) 

761 if isinstance(datasetType, str): 

762 storage = self._managers.datasets[datasetType] 

763 else: 

764 storage = self._managers.datasets[datasetType.name] 

765 standardizedDataIds = None 

766 if dataIds is not None: 

767 standardizedDataIds = [ 

768 DataCoordinate.standardize(d, graph=storage.datasetType.dimensions) for d in dataIds 

769 ] 

770 storage.decertify( 

771 collectionRecord, 

772 timespan, 

773 dataIds=standardizedDataIds, 

774 context=queries.SqlQueryContext(self._db, self._managers.column_types), 

775 ) 

776 

777 def getDatastoreBridgeManager(self) -> DatastoreRegistryBridgeManager: 

778 """Return an object that allows a new `Datastore` instance to 

779 communicate with this `Registry`. 

780 

781 Returns 

782 ------- 

783 manager : `DatastoreRegistryBridgeManager` 

784 Object that mediates communication between this `Registry` and its 

785 associated datastores. 

786 """ 

787 return self._managers.datastores 

788 

789 def getDatasetLocations(self, ref: DatasetRef) -> Iterable[str]: 

790 # Docstring inherited from lsst.daf.butler.registry.Registry 

791 return self._managers.datastores.findDatastores(ref) 

792 

793 def expandDataId( 

794 self, 

795 dataId: DataId | None = None, 

796 *, 

797 graph: DimensionGraph | None = None, 

798 records: NameLookupMapping[DimensionElement, DimensionRecord | None] | None = None, 

799 withDefaults: bool = True, 

800 **kwargs: Any, 

801 ) -> DataCoordinate: 

802 # Docstring inherited from lsst.daf.butler.registry.Registry 

803 if not withDefaults: 

804 defaults = None 

805 else: 

806 defaults = self.defaults.dataId 

807 try: 

808 standardized = DataCoordinate.standardize( 

809 dataId, graph=graph, universe=self.dimensions, defaults=defaults, **kwargs 

810 ) 

811 except KeyError as exc: 

812 # This means either kwargs have some odd name or required 

813 # dimension is missing. 

814 raise DimensionNameError(str(exc)) from exc 

815 if standardized.hasRecords(): 

816 return standardized 

817 if records is None: 

818 records = {} 

819 elif isinstance(records, NamedKeyMapping): 

820 records = records.byName() 

821 else: 

822 records = dict(records) 

823 if isinstance(dataId, DataCoordinate) and dataId.hasRecords(): 

824 records.update(dataId.records.byName()) 

825 keys = standardized.byName() 

826 context = queries.SqlQueryContext(self._db, self._managers.column_types) 

827 for element in standardized.graph.primaryKeyTraversalOrder: 

828 record = records.get(element.name, ...) # Use ... to mean not found; None might mean NULL 

829 if record is ...: 

830 if isinstance(element, Dimension) and keys.get(element.name) is None: 

831 if element in standardized.graph.required: 

832 raise DimensionNameError( 

833 f"No value or null value for required dimension {element.name}." 

834 ) 

835 keys[element.name] = None 

836 record = None 

837 else: 

838 storage = self._managers.dimensions[element] 

839 record = storage.fetch_one(DataCoordinate.standardize(keys, graph=element.graph), context) 

840 records[element.name] = record 

841 if record is not None: 

842 for d in element.implied: 

843 value = getattr(record, d.name) 

844 if keys.setdefault(d.name, value) != value: 

845 raise InconsistentDataIdError( 

846 f"Data ID {standardized} has {d.name}={keys[d.name]!r}, " 

847 f"but {element.name} implies {d.name}={value!r}." 

848 ) 

849 else: 

850 if element in standardized.graph.required: 

851 raise DataIdValueError( 

852 f"Could not fetch record for required dimension {element.name} via keys {keys}." 

853 ) 

854 if element.alwaysJoin: 

855 raise InconsistentDataIdError( 

856 f"Could not fetch record for element {element.name} via keys {keys}, ", 

857 "but it is marked alwaysJoin=True; this means one or more dimensions are not " 

858 "related.", 

859 ) 

860 for d in element.implied: 

861 keys.setdefault(d.name, None) 

862 records.setdefault(d.name, None) 

863 return DataCoordinate.standardize(keys, graph=standardized.graph).expanded(records=records) 

864 

865 def insertDimensionData( 

866 self, 

867 element: DimensionElement | str, 

868 *data: Mapping[str, Any] | DimensionRecord, 

869 conform: bool = True, 

870 replace: bool = False, 

871 skip_existing: bool = False, 

872 ) -> None: 

873 # Docstring inherited from lsst.daf.butler.registry.Registry 

874 if conform: 

875 if isinstance(element, str): 

876 element = self.dimensions[element] 

877 records = [ 

878 row if isinstance(row, DimensionRecord) else element.RecordClass(**row) for row in data 

879 ] 

880 else: 

881 # Ignore typing since caller said to trust them with conform=False. 

882 records = data # type: ignore 

883 storage = self._managers.dimensions[element] 

884 storage.insert(*records, replace=replace, skip_existing=skip_existing) 

885 

886 def syncDimensionData( 

887 self, 

888 element: DimensionElement | str, 

889 row: Mapping[str, Any] | DimensionRecord, 

890 conform: bool = True, 

891 update: bool = False, 

892 ) -> bool | dict[str, Any]: 

893 # Docstring inherited from lsst.daf.butler.registry.Registry 

894 if conform: 

895 if isinstance(element, str): 

896 element = self.dimensions[element] 

897 record = row if isinstance(row, DimensionRecord) else element.RecordClass(**row) 

898 else: 

899 # Ignore typing since caller said to trust them with conform=False. 

900 record = row # type: ignore 

901 storage = self._managers.dimensions[element] 

902 return storage.sync(record, update=update) 

903 

904 def queryDatasetTypes( 

905 self, 

906 expression: Any = ..., 

907 *, 

908 components: bool | None = None, 

909 missing: list[str] | None = None, 

910 ) -> Iterable[DatasetType]: 

911 # Docstring inherited from lsst.daf.butler.registry.Registry 

912 wildcard = DatasetTypeWildcard.from_expression(expression) 

913 composition_dict = self._managers.datasets.resolve_wildcard( 

914 wildcard, 

915 components=components, 

916 missing=missing, 

917 ) 

918 result: list[DatasetType] = [] 

919 for parent_dataset_type, components_for_parent in composition_dict.items(): 

920 result.extend( 

921 parent_dataset_type.makeComponentDatasetType(c) if c is not None else parent_dataset_type 

922 for c in components_for_parent 

923 ) 

924 return result 

925 

926 def queryCollections( 

927 self, 

928 expression: Any = ..., 

929 datasetType: DatasetType | None = None, 

930 collectionTypes: Iterable[CollectionType] | CollectionType = CollectionType.all(), 

931 flattenChains: bool = False, 

932 includeChains: bool | None = None, 

933 ) -> Sequence[str]: 

934 # Docstring inherited from lsst.daf.butler.registry.Registry 

935 

936 # Right now the datasetTypes argument is completely ignored, but that 

937 # is consistent with its [lack of] guarantees. DM-24939 or a follow-up 

938 # ticket will take care of that. 

939 try: 

940 wildcard = CollectionWildcard.from_expression(expression) 

941 except TypeError as exc: 

942 raise CollectionExpressionError(f"Invalid collection expression '{expression}'") from exc 

943 collectionTypes = ensure_iterable(collectionTypes) 

944 return [ 

945 record.name 

946 for record in self._managers.collections.resolve_wildcard( 

947 wildcard, 

948 collection_types=frozenset(collectionTypes), 

949 flatten_chains=flattenChains, 

950 include_chains=includeChains, 

951 ) 

952 ] 

953 

954 def _makeQueryBuilder( 

955 self, 

956 summary: queries.QuerySummary, 

957 doomed_by: Iterable[str] = (), 

958 ) -> queries.QueryBuilder: 

959 """Return a `QueryBuilder` instance capable of constructing and 

960 managing more complex queries than those obtainable via `Registry` 

961 interfaces. 

962 

963 This is an advanced interface; downstream code should prefer 

964 `Registry.queryDataIds` and `Registry.queryDatasets` whenever those 

965 are sufficient. 

966 

967 Parameters 

968 ---------- 

969 summary : `queries.QuerySummary` 

970 Object describing and categorizing the full set of dimensions that 

971 will be included in the query. 

972 doomed_by : `~collections.abc.Iterable` of `str`, optional 

973 A list of diagnostic messages that indicate why the query is going 

974 to yield no results and should not even be executed. If an empty 

975 container (default) the query will be executed unless other code 

976 determines that it is doomed. 

977 

978 Returns 

979 ------- 

980 builder : `queries.QueryBuilder` 

981 Object that can be used to construct and perform advanced queries. 

982 """ 

983 doomed_by = list(doomed_by) 

984 backend = queries.SqlQueryBackend(self._db, self._managers) 

985 context = backend.context() 

986 relation: Relation | None = None 

987 if doomed_by: 

988 relation = LeafRelation.make_doomed(context.sql_engine, set(), doomed_by) 

989 return queries.QueryBuilder( 

990 summary, 

991 backend=backend, 

992 context=context, 

993 relation=relation, 

994 ) 

995 

996 def _standardize_query_data_id_args( 

997 self, data_id: DataId | None, *, doomed_by: list[str], **kwargs: Any 

998 ) -> DataCoordinate: 

999 """Preprocess the data ID arguments passed to query* methods. 

1000 

1001 Parameters 

1002 ---------- 

1003 data_id : `DataId` or `None` 

1004 Data ID that constrains the query results. 

1005 doomed_by : `list` [ `str` ] 

1006 List to append messages indicating why the query is doomed to 

1007 yield no results. 

1008 **kwargs 

1009 Additional data ID key-value pairs, extending and overriding 

1010 ``data_id``. 

1011 

1012 Returns 

1013 ------- 

1014 data_id : `DataCoordinate` 

1015 Standardized data ID. Will be fully expanded unless expansion 

1016 fails, in which case a message will be appended to ``doomed_by`` 

1017 on return. 

1018 """ 

1019 try: 

1020 return self.expandDataId(data_id, **kwargs) 

1021 except DataIdValueError as err: 

1022 doomed_by.append(str(err)) 

1023 return DataCoordinate.standardize( 

1024 data_id, **kwargs, universe=self.dimensions, defaults=self.defaults.dataId 

1025 ) 

1026 

1027 def _standardize_query_dataset_args( 

1028 self, 

1029 datasets: Any, 

1030 collections: CollectionArgType | None, 

1031 components: bool | None, 

1032 mode: Literal["find_first"] | Literal["find_all"] | Literal["constrain"] = "constrain", 

1033 *, 

1034 doomed_by: list[str], 

1035 ) -> tuple[dict[DatasetType, list[str | None]], CollectionWildcard | None]: 

1036 """Preprocess dataset arguments passed to query* methods. 

1037 

1038 Parameters 

1039 ---------- 

1040 datasets : `DatasetType`, `str`, `re.Pattern`, or iterable of these 

1041 Expression identifying dataset types. See `queryDatasetTypes` for 

1042 details. 

1043 collections : `str`, `re.Pattern`, or iterable of these 

1044 Expression identifying collections to be searched. See 

1045 `queryCollections` for details. 

1046 components : `bool`, optional 

1047 If `True`, apply all expression patterns to component dataset type 

1048 names as well. If `False`, never apply patterns to components. 

1049 If `None` (default), apply patterns to components only if their 

1050 parent datasets were not matched by the expression. 

1051 Fully-specified component datasets (`str` or `DatasetType` 

1052 instances) are always included. 

1053 

1054 Values other than `False` are deprecated, and only `False` will be 

1055 supported after v26. After v27 this argument will be removed 

1056 entirely. 

1057 mode : `str`, optional 

1058 The way in which datasets are being used in this query; one of: 

1059 

1060 - "find_first": this is a query for the first dataset in an 

1061 ordered list of collections. Prohibits collection wildcards, 

1062 but permits dataset type wildcards. 

1063 

1064 - "find_all": this is a query for all datasets in all matched 

1065 collections. Permits collection and dataset type wildcards. 

1066 

1067 - "constrain": this is a query for something other than datasets, 

1068 with results constrained by dataset existence. Permits 

1069 collection wildcards and prohibits ``...`` as a dataset type 

1070 wildcard. 

1071 doomed_by : `list` [ `str` ] 

1072 List to append messages indicating why the query is doomed to 

1073 yield no results. 

1074 

1075 Returns 

1076 ------- 

1077 composition : `defaultdict` [ `DatasetType`, `list` [ `str` ] ] 

1078 Dictionary mapping parent dataset type to `list` of components 

1079 matched for that dataset type (or `None` for the parent itself). 

1080 collections : `CollectionWildcard` 

1081 Processed collection expression. 

1082 """ 

1083 composition: dict[DatasetType, list[str | None]] = {} 

1084 collection_wildcard: CollectionWildcard | None = None 

1085 if datasets is not None: 

1086 if collections is None: 

1087 if not self.defaults.collections: 

1088 raise NoDefaultCollectionError("No collections, and no registry default collections.") 

1089 collection_wildcard = CollectionWildcard.from_expression(self.defaults.collections) 

1090 else: 

1091 collection_wildcard = CollectionWildcard.from_expression(collections) 

1092 if mode == "find_first" and collection_wildcard.patterns: 

1093 raise TypeError( 

1094 f"Collection pattern(s) {collection_wildcard.patterns} not allowed in this context." 

1095 ) 

1096 missing: list[str] = [] 

1097 composition = self._managers.datasets.resolve_wildcard( 

1098 datasets, components=components, missing=missing, explicit_only=(mode == "constrain") 

1099 ) 

1100 if missing and mode == "constrain": 

1101 # After v26 this should raise MissingDatasetTypeError, to be 

1102 # implemented on DM-36303. 

1103 warnings.warn( 

1104 f"Dataset type(s) {missing} are not registered; this will be an error after v26.", 

1105 FutureWarning, 

1106 ) 

1107 doomed_by.extend(f"Dataset type {name} is not registered." for name in missing) 

1108 elif collections: 

1109 # I think this check should actually be `collections is not None`, 

1110 # but it looks like some CLI scripts use empty tuple as default. 

1111 raise ArgumentError(f"Cannot pass 'collections' (='{collections}') without 'datasets'.") 

1112 return composition, collection_wildcard 

1113 

1114 def queryDatasets( 

1115 self, 

1116 datasetType: Any, 

1117 *, 

1118 collections: CollectionArgType | None = None, 

1119 dimensions: Iterable[Dimension | str] | None = None, 

1120 dataId: DataId | None = None, 

1121 where: str = "", 

1122 findFirst: bool = False, 

1123 components: bool | None = None, 

1124 bind: Mapping[str, Any] | None = None, 

1125 check: bool = True, 

1126 **kwargs: Any, 

1127 ) -> queries.DatasetQueryResults: 

1128 # Docstring inherited from lsst.daf.butler.registry.Registry 

1129 doomed_by: list[str] = [] 

1130 data_id = self._standardize_query_data_id_args(dataId, doomed_by=doomed_by, **kwargs) 

1131 dataset_composition, collection_wildcard = self._standardize_query_dataset_args( 

1132 datasetType, 

1133 collections, 

1134 components, 

1135 mode="find_first" if findFirst else "find_all", 

1136 doomed_by=doomed_by, 

1137 ) 

1138 if collection_wildcard is not None and collection_wildcard.empty(): 

1139 doomed_by.append("No datasets can be found because collection list is empty.") 

1140 return queries.ChainedDatasetQueryResults([], doomed_by=doomed_by) 

1141 parent_results: list[queries.ParentDatasetQueryResults] = [] 

1142 for parent_dataset_type, components_for_parent in dataset_composition.items(): 

1143 # The full set of dimensions in the query is the combination of 

1144 # those needed for the DatasetType and those explicitly requested, 

1145 # if any. 

1146 dimension_names = set(parent_dataset_type.dimensions.names) 

1147 if dimensions is not None: 

1148 dimension_names.update(self.dimensions.extract(dimensions).names) 

1149 # Construct the summary structure needed to construct a 

1150 # QueryBuilder. 

1151 summary = queries.QuerySummary( 

1152 requested=DimensionGraph(self.dimensions, names=dimension_names), 

1153 column_types=self._managers.column_types, 

1154 data_id=data_id, 

1155 expression=where, 

1156 bind=bind, 

1157 defaults=self.defaults.dataId, 

1158 check=check, 

1159 datasets=[parent_dataset_type], 

1160 ) 

1161 builder = self._makeQueryBuilder(summary) 

1162 # Add the dataset subquery to the query, telling the QueryBuilder 

1163 # to include the rank of the selected collection in the results 

1164 # only if we need to findFirst. Note that if any of the 

1165 # collections are actually wildcard expressions, and 

1166 # findFirst=True, this will raise TypeError for us. 

1167 builder.joinDataset(parent_dataset_type, collection_wildcard, isResult=True, findFirst=findFirst) 

1168 query = builder.finish() 

1169 parent_results.append( 

1170 queries.ParentDatasetQueryResults( 

1171 query, parent_dataset_type, components=components_for_parent 

1172 ) 

1173 ) 

1174 if not parent_results: 

1175 doomed_by.extend( 

1176 f"No registered dataset type matching {t!r} found, so no matching datasets can " 

1177 "exist in any collection." 

1178 for t in ensure_iterable(datasetType) 

1179 ) 

1180 return queries.ChainedDatasetQueryResults([], doomed_by=doomed_by) 

1181 elif len(parent_results) == 1: 

1182 return parent_results[0] 

1183 else: 

1184 return queries.ChainedDatasetQueryResults(parent_results) 

1185 

1186 def queryDataIds( 

1187 self, 

1188 dimensions: Iterable[Dimension | str] | Dimension | str, 

1189 *, 

1190 dataId: DataId | None = None, 

1191 datasets: Any = None, 

1192 collections: CollectionArgType | None = None, 

1193 where: str = "", 

1194 components: bool | None = None, 

1195 bind: Mapping[str, Any] | None = None, 

1196 check: bool = True, 

1197 **kwargs: Any, 

1198 ) -> queries.DataCoordinateQueryResults: 

1199 # Docstring inherited from lsst.daf.butler.registry.Registry 

1200 dimensions = ensure_iterable(dimensions) 

1201 requestedDimensions = self.dimensions.extract(dimensions) 

1202 doomed_by: list[str] = [] 

1203 data_id = self._standardize_query_data_id_args(dataId, doomed_by=doomed_by, **kwargs) 

1204 dataset_composition, collection_wildcard = self._standardize_query_dataset_args( 

1205 datasets, collections, components, doomed_by=doomed_by 

1206 ) 

1207 if collection_wildcard is not None and collection_wildcard.empty(): 

1208 doomed_by.append("No data coordinates can be found because collection list is empty.") 

1209 summary = queries.QuerySummary( 

1210 requested=requestedDimensions, 

1211 column_types=self._managers.column_types, 

1212 data_id=data_id, 

1213 expression=where, 

1214 bind=bind, 

1215 defaults=self.defaults.dataId, 

1216 check=check, 

1217 datasets=dataset_composition.keys(), 

1218 ) 

1219 builder = self._makeQueryBuilder(summary, doomed_by=doomed_by) 

1220 for datasetType in dataset_composition.keys(): 

1221 builder.joinDataset(datasetType, collection_wildcard, isResult=False) 

1222 query = builder.finish() 

1223 

1224 return queries.DataCoordinateQueryResults(query) 

1225 

1226 def queryDimensionRecords( 

1227 self, 

1228 element: DimensionElement | str, 

1229 *, 

1230 dataId: DataId | None = None, 

1231 datasets: Any = None, 

1232 collections: CollectionArgType | None = None, 

1233 where: str = "", 

1234 components: bool | None = None, 

1235 bind: Mapping[str, Any] | None = None, 

1236 check: bool = True, 

1237 **kwargs: Any, 

1238 ) -> queries.DimensionRecordQueryResults: 

1239 # Docstring inherited from lsst.daf.butler.registry.Registry 

1240 if not isinstance(element, DimensionElement): 

1241 try: 

1242 element = self.dimensions[element] 

1243 except KeyError as e: 

1244 raise DimensionNameError( 

1245 f"No such dimension '{element}', available dimensions: " 

1246 + str(self.dimensions.getStaticElements()) 

1247 ) from e 

1248 doomed_by: list[str] = [] 

1249 data_id = self._standardize_query_data_id_args(dataId, doomed_by=doomed_by, **kwargs) 

1250 dataset_composition, collection_wildcard = self._standardize_query_dataset_args( 

1251 datasets, collections, components, doomed_by=doomed_by 

1252 ) 

1253 if collection_wildcard is not None and collection_wildcard.empty(): 

1254 doomed_by.append("No dimension records can be found because collection list is empty.") 

1255 summary = queries.QuerySummary( 

1256 requested=element.graph, 

1257 column_types=self._managers.column_types, 

1258 data_id=data_id, 

1259 expression=where, 

1260 bind=bind, 

1261 defaults=self.defaults.dataId, 

1262 check=check, 

1263 datasets=dataset_composition.keys(), 

1264 ) 

1265 builder = self._makeQueryBuilder(summary, doomed_by=doomed_by) 

1266 for datasetType in dataset_composition.keys(): 

1267 builder.joinDataset(datasetType, collection_wildcard, isResult=False) 

1268 query = builder.finish().with_record_columns(element) 

1269 return queries.DatabaseDimensionRecordQueryResults(query, element) 

1270 

1271 def queryDatasetAssociations( 

1272 self, 

1273 datasetType: str | DatasetType, 

1274 collections: CollectionArgType | None = ..., 

1275 *, 

1276 collectionTypes: Iterable[CollectionType] = CollectionType.all(), 

1277 flattenChains: bool = False, 

1278 ) -> Iterator[DatasetAssociation]: 

1279 # Docstring inherited from lsst.daf.butler.registry.Registry 

1280 if collections is None: 

1281 if not self.defaults.collections: 

1282 raise NoDefaultCollectionError( 

1283 "No collections provided to queryDatasetAssociations, " 

1284 "and no defaults from registry construction." 

1285 ) 

1286 collections = self.defaults.collections 

1287 collection_wildcard = CollectionWildcard.from_expression(collections) 

1288 backend = queries.SqlQueryBackend(self._db, self._managers) 

1289 parent_dataset_type, _ = backend.resolve_single_dataset_type_wildcard(datasetType, components=False) 

1290 timespan_tag = DatasetColumnTag(parent_dataset_type.name, "timespan") 

1291 collection_tag = DatasetColumnTag(parent_dataset_type.name, "collection") 

1292 for parent_collection_record in backend.resolve_collection_wildcard( 

1293 collection_wildcard, 

1294 collection_types=frozenset(collectionTypes), 

1295 flatten_chains=flattenChains, 

1296 ): 

1297 # Resolve this possibly-chained collection into a list of 

1298 # non-CHAINED collections that actually hold datasets of this 

1299 # type. 

1300 candidate_collection_records = backend.resolve_dataset_collections( 

1301 parent_dataset_type, 

1302 CollectionWildcard.from_names([parent_collection_record.name]), 

1303 allow_calibration_collections=True, 

1304 governor_constraints={}, 

1305 ) 

1306 if not candidate_collection_records: 

1307 continue 

1308 with backend.context() as context: 

1309 relation = backend.make_dataset_query_relation( 

1310 parent_dataset_type, 

1311 candidate_collection_records, 

1312 columns={"dataset_id", "run", "timespan", "collection"}, 

1313 context=context, 

1314 ) 

1315 reader = queries.DatasetRefReader( 

1316 parent_dataset_type, 

1317 translate_collection=lambda k: self._managers.collections[k].name, 

1318 full=False, 

1319 ) 

1320 for row in context.fetch_iterable(relation): 

1321 ref = reader.read(row) 

1322 collection_record = self._managers.collections[row[collection_tag]] 

1323 if collection_record.type is CollectionType.CALIBRATION: 

1324 timespan = row[timespan_tag] 

1325 else: 

1326 # For backwards compatibility and (possibly?) user 

1327 # convenience we continue to define the timespan of a 

1328 # DatasetAssociation row for a non-CALIBRATION 

1329 # collection to be None rather than a fully unbounded 

1330 # timespan. 

1331 timespan = None 

1332 yield DatasetAssociation(ref=ref, collection=collection_record.name, timespan=timespan) 

1333 

1334 @property 

1335 def obsCoreTableManager(self) -> ObsCoreTableManager | None: 

1336 # Docstring inherited from lsst.daf.butler.registry.Registry 

1337 return self._managers.obscore 

1338 

1339 storageClasses: StorageClassFactory 

1340 """All storage classes known to the registry (`StorageClassFactory`). 

1341 """