Coverage for python/lsst/daf/butler/registries/sql.py: 12%

503 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2023-03-04 02:04 -0800

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ("SqlRegistry",) 

25 

26import contextlib 

27import logging 

28import warnings 

29from typing import ( 

30 TYPE_CHECKING, 

31 Any, 

32 Dict, 

33 Iterable, 

34 Iterator, 

35 List, 

36 Literal, 

37 Mapping, 

38 Optional, 

39 Sequence, 

40 Set, 

41 Union, 

42 cast, 

43) 

44 

45import sqlalchemy 

46from lsst.daf.relation import LeafRelation, Relation 

47from lsst.resources import ResourcePathExpression 

48from lsst.utils.iteration import ensure_iterable 

49 

50from ..core import ( 

51 Config, 

52 DataCoordinate, 

53 DataId, 

54 DatasetAssociation, 

55 DatasetColumnTag, 

56 DatasetId, 

57 DatasetRef, 

58 DatasetType, 

59 Dimension, 

60 DimensionConfig, 

61 DimensionElement, 

62 DimensionGraph, 

63 DimensionRecord, 

64 DimensionUniverse, 

65 NamedKeyMapping, 

66 NameLookupMapping, 

67 Progress, 

68 StorageClassFactory, 

69 Timespan, 

70 ddl, 

71) 

72from ..core.utils import transactional 

73from ..registry import ( 

74 ArgumentError, 

75 CollectionExpressionError, 

76 CollectionSummary, 

77 CollectionType, 

78 CollectionTypeError, 

79 ConflictingDefinitionError, 

80 DataIdValueError, 

81 DatasetTypeError, 

82 DimensionNameError, 

83 InconsistentDataIdError, 

84 NoDefaultCollectionError, 

85 OrphanedRecordError, 

86 Registry, 

87 RegistryConfig, 

88 RegistryDefaults, 

89 queries, 

90) 

91from ..registry.interfaces import ChainedCollectionRecord, DatasetIdFactory, DatasetIdGenEnum, RunRecord 

92from ..registry.managers import RegistryManagerInstances, RegistryManagerTypes 

93from ..registry.wildcards import CollectionWildcard, DatasetTypeWildcard 

94 

95if TYPE_CHECKING: 95 ↛ 96line 95 didn't jump to line 96, because the condition on line 95 was never true

96 from .._butlerConfig import ButlerConfig 

97 from ..registry.interfaces import CollectionRecord, Database, DatastoreRegistryBridgeManager 

98 

99 

100_LOG = logging.getLogger(__name__) 

101 

102 

103class SqlRegistry(Registry): 

104 """Registry implementation based on SQLAlchemy. 

105 

106 Parameters 

107 ---------- 

108 database : `Database` 

109 Database instance to store Registry. 

110 defaults : `RegistryDefaults` 

111 Default collection search path and/or output `~CollectionType.RUN` 

112 collection. 

113 managers : `RegistryManagerInstances` 

114 All the managers required for this registry. 

115 """ 

116 

117 defaultConfigFile: Optional[str] = None 

118 """Path to configuration defaults. Accessed within the ``configs`` resource 

119 or relative to a search path. Can be None if no defaults specified. 

120 """ 

121 

122 @classmethod 

123 def createFromConfig( 

124 cls, 

125 config: Optional[Union[RegistryConfig, str]] = None, 

126 dimensionConfig: Optional[Union[DimensionConfig, str]] = None, 

127 butlerRoot: Optional[ResourcePathExpression] = None, 

128 ) -> Registry: 

129 """Create registry database and return `SqlRegistry` instance. 

130 

131 This method initializes database contents, database must be empty 

132 prior to calling this method. 

133 

134 Parameters 

135 ---------- 

136 config : `RegistryConfig` or `str`, optional 

137 Registry configuration, if missing then default configuration will 

138 be loaded from registry.yaml. 

139 dimensionConfig : `DimensionConfig` or `str`, optional 

140 Dimensions configuration, if missing then default configuration 

141 will be loaded from dimensions.yaml. 

142 butlerRoot : convertible to `lsst.resources.ResourcePath`, optional 

143 Path to the repository root this `SqlRegistry` will manage. 

144 

145 Returns 

146 ------- 

147 registry : `SqlRegistry` 

148 A new `SqlRegistry` instance. 

149 """ 

150 config = cls.forceRegistryConfig(config) 

151 config.replaceRoot(butlerRoot) 

152 

153 if isinstance(dimensionConfig, str): 

154 dimensionConfig = DimensionConfig(dimensionConfig) 

155 elif dimensionConfig is None: 

156 dimensionConfig = DimensionConfig() 

157 elif not isinstance(dimensionConfig, DimensionConfig): 

158 raise TypeError(f"Incompatible Dimension configuration type: {type(dimensionConfig)}") 

159 

160 DatabaseClass = config.getDatabaseClass() 

161 database = DatabaseClass.fromUri( 

162 str(config.connectionString), origin=config.get("origin", 0), namespace=config.get("namespace") 

163 ) 

164 managerTypes = RegistryManagerTypes.fromConfig(config) 

165 managers = managerTypes.makeRepo(database, dimensionConfig) 

166 return cls(database, RegistryDefaults(), managers) 

167 

168 @classmethod 

169 def fromConfig( 

170 cls, 

171 config: Union[ButlerConfig, RegistryConfig, Config, str], 

172 butlerRoot: Optional[ResourcePathExpression] = None, 

173 writeable: bool = True, 

174 defaults: Optional[RegistryDefaults] = None, 

175 ) -> Registry: 

176 """Create `Registry` subclass instance from `config`. 

177 

178 Registry database must be initialized prior to calling this method. 

179 

180 Parameters 

181 ---------- 

182 config : `ButlerConfig`, `RegistryConfig`, `Config` or `str` 

183 Registry configuration 

184 butlerRoot : `lsst.resources.ResourcePathExpression`, optional 

185 Path to the repository root this `Registry` will manage. 

186 writeable : `bool`, optional 

187 If `True` (default) create a read-write connection to the database. 

188 defaults : `RegistryDefaults`, optional 

189 Default collection search path and/or output `~CollectionType.RUN` 

190 collection. 

191 

192 Returns 

193 ------- 

194 registry : `SqlRegistry` (subclass) 

195 A new `SqlRegistry` subclass instance. 

196 """ 

197 config = cls.forceRegistryConfig(config) 

198 config.replaceRoot(butlerRoot) 

199 DatabaseClass = config.getDatabaseClass() 

200 database = DatabaseClass.fromUri( 

201 config.connectionString.render_as_string(hide_password=False), 

202 origin=config.get("origin", 0), 

203 namespace=config.get("namespace"), 

204 writeable=writeable, 

205 ) 

206 managerTypes = RegistryManagerTypes.fromConfig(config) 

207 with database.session(): 

208 managers = managerTypes.loadRepo(database) 

209 if defaults is None: 

210 defaults = RegistryDefaults() 

211 return cls(database, defaults, managers) 

212 

213 def __init__(self, database: Database, defaults: RegistryDefaults, managers: RegistryManagerInstances): 

214 self._db = database 

215 self._managers = managers 

216 self.storageClasses = StorageClassFactory() 

217 # Intentionally invoke property setter to initialize defaults. This 

218 # can only be done after most of the rest of Registry has already been 

219 # initialized, and must be done before the property getter is used. 

220 self.defaults = defaults 

221 # In the future DatasetIdFactory may become configurable and this 

222 # instance will need to be shared with datasets manager. 

223 self.datasetIdFactory = DatasetIdFactory() 

224 

225 def __str__(self) -> str: 

226 return str(self._db) 

227 

228 def __repr__(self) -> str: 

229 return f"SqlRegistry({self._db!r}, {self.dimensions!r})" 

230 

231 def isWriteable(self) -> bool: 

232 # Docstring inherited from lsst.daf.butler.registry.Registry 

233 return self._db.isWriteable() 

234 

235 def copy(self, defaults: Optional[RegistryDefaults] = None) -> Registry: 

236 # Docstring inherited from lsst.daf.butler.registry.Registry 

237 if defaults is None: 

238 # No need to copy, because `RegistryDefaults` is immutable; we 

239 # effectively copy on write. 

240 defaults = self.defaults 

241 return type(self)(self._db, defaults, self._managers) 

242 

243 @property 

244 def dimensions(self) -> DimensionUniverse: 

245 # Docstring inherited from lsst.daf.butler.registry.Registry 

246 return self._managers.dimensions.universe 

247 

248 def refresh(self) -> None: 

249 # Docstring inherited from lsst.daf.butler.registry.Registry 

250 with self._db.transaction(): 

251 self._managers.refresh() 

252 

253 @contextlib.contextmanager 

254 def transaction(self, *, savepoint: bool = False) -> Iterator[None]: 

255 # Docstring inherited from lsst.daf.butler.registry.Registry 

256 try: 

257 with self._db.transaction(savepoint=savepoint): 

258 yield 

259 except BaseException: 

260 # TODO: this clears the caches sometimes when we wouldn't actually 

261 # need to. Can we avoid that? 

262 self._managers.dimensions.clearCaches() 

263 raise 

264 

265 def resetConnectionPool(self) -> None: 

266 """Reset SQLAlchemy connection pool for `SqlRegistry` database. 

267 

268 This operation is useful when using registry with fork-based 

269 multiprocessing. To use registry across fork boundary one has to make 

270 sure that there are no currently active connections (no session or 

271 transaction is in progress) and connection pool is reset using this 

272 method. This method should be called by the child process immediately 

273 after the fork. 

274 """ 

275 self._db._engine.dispose() 

276 

277 def registerOpaqueTable(self, tableName: str, spec: ddl.TableSpec) -> None: 

278 """Add an opaque (to the `Registry`) table for use by a `Datastore` or 

279 other data repository client. 

280 

281 Opaque table records can be added via `insertOpaqueData`, retrieved via 

282 `fetchOpaqueData`, and removed via `deleteOpaqueData`. 

283 

284 Parameters 

285 ---------- 

286 tableName : `str` 

287 Logical name of the opaque table. This may differ from the 

288 actual name used in the database by a prefix and/or suffix. 

289 spec : `ddl.TableSpec` 

290 Specification for the table to be added. 

291 """ 

292 self._managers.opaque.register(tableName, spec) 

293 

294 @transactional 

295 def insertOpaqueData(self, tableName: str, *data: dict) -> None: 

296 """Insert records into an opaque table. 

297 

298 Parameters 

299 ---------- 

300 tableName : `str` 

301 Logical name of the opaque table. Must match the name used in a 

302 previous call to `registerOpaqueTable`. 

303 data 

304 Each additional positional argument is a dictionary that represents 

305 a single row to be added. 

306 """ 

307 self._managers.opaque[tableName].insert(*data) 

308 

309 def fetchOpaqueData(self, tableName: str, **where: Any) -> Iterator[Mapping[str, Any]]: 

310 """Retrieve records from an opaque table. 

311 

312 Parameters 

313 ---------- 

314 tableName : `str` 

315 Logical name of the opaque table. Must match the name used in a 

316 previous call to `registerOpaqueTable`. 

317 where 

318 Additional keyword arguments are interpreted as equality 

319 constraints that restrict the returned rows (combined with AND); 

320 keyword arguments are column names and values are the values they 

321 must have. 

322 

323 Yields 

324 ------ 

325 row : `dict` 

326 A dictionary representing a single result row. 

327 """ 

328 yield from self._managers.opaque[tableName].fetch(**where) 

329 

330 @transactional 

331 def deleteOpaqueData(self, tableName: str, **where: Any) -> None: 

332 """Remove records from an opaque table. 

333 

334 Parameters 

335 ---------- 

336 tableName : `str` 

337 Logical name of the opaque table. Must match the name used in a 

338 previous call to `registerOpaqueTable`. 

339 where 

340 Additional keyword arguments are interpreted as equality 

341 constraints that restrict the deleted rows (combined with AND); 

342 keyword arguments are column names and values are the values they 

343 must have. 

344 """ 

345 self._managers.opaque[tableName].delete(where.keys(), where) 

346 

347 def registerCollection( 

348 self, name: str, type: CollectionType = CollectionType.TAGGED, doc: Optional[str] = None 

349 ) -> bool: 

350 # Docstring inherited from lsst.daf.butler.registry.Registry 

351 _, registered = self._managers.collections.register(name, type, doc=doc) 

352 return registered 

353 

354 def getCollectionType(self, name: str) -> CollectionType: 

355 # Docstring inherited from lsst.daf.butler.registry.Registry 

356 return self._managers.collections.find(name).type 

357 

358 def _get_collection_record(self, name: str) -> CollectionRecord: 

359 # Docstring inherited from lsst.daf.butler.registry.Registry 

360 return self._managers.collections.find(name) 

361 

362 def registerRun(self, name: str, doc: Optional[str] = None) -> bool: 

363 # Docstring inherited from lsst.daf.butler.registry.Registry 

364 _, registered = self._managers.collections.register(name, CollectionType.RUN, doc=doc) 

365 return registered 

366 

367 @transactional 

368 def removeCollection(self, name: str) -> None: 

369 # Docstring inherited from lsst.daf.butler.registry.Registry 

370 self._managers.collections.remove(name) 

371 

372 def getCollectionChain(self, parent: str) -> tuple[str, ...]: 

373 # Docstring inherited from lsst.daf.butler.registry.Registry 

374 record = self._managers.collections.find(parent) 

375 if record.type is not CollectionType.CHAINED: 

376 raise CollectionTypeError(f"Collection '{parent}' has type {record.type.name}, not CHAINED.") 

377 assert isinstance(record, ChainedCollectionRecord) 

378 return record.children 

379 

380 @transactional 

381 def setCollectionChain(self, parent: str, children: Any, *, flatten: bool = False) -> None: 

382 # Docstring inherited from lsst.daf.butler.registry.Registry 

383 record = self._managers.collections.find(parent) 

384 if record.type is not CollectionType.CHAINED: 

385 raise CollectionTypeError(f"Collection '{parent}' has type {record.type.name}, not CHAINED.") 

386 assert isinstance(record, ChainedCollectionRecord) 

387 children = CollectionWildcard.from_expression(children).require_ordered() 

388 if children != record.children or flatten: 

389 record.update(self._managers.collections, children, flatten=flatten) 

390 

391 def getCollectionParentChains(self, collection: str) -> Set[str]: 

392 # Docstring inherited from lsst.daf.butler.registry.Registry 

393 return { 

394 record.name 

395 for record in self._managers.collections.getParentChains( 

396 self._managers.collections.find(collection).key 

397 ) 

398 } 

399 

400 def getCollectionDocumentation(self, collection: str) -> Optional[str]: 

401 # Docstring inherited from lsst.daf.butler.registry.Registry 

402 return self._managers.collections.getDocumentation(self._managers.collections.find(collection).key) 

403 

404 def setCollectionDocumentation(self, collection: str, doc: Optional[str]) -> None: 

405 # Docstring inherited from lsst.daf.butler.registry.Registry 

406 self._managers.collections.setDocumentation(self._managers.collections.find(collection).key, doc) 

407 

408 def getCollectionSummary(self, collection: str) -> CollectionSummary: 

409 # Docstring inherited from lsst.daf.butler.registry.Registry 

410 record = self._managers.collections.find(collection) 

411 return self._managers.datasets.getCollectionSummary(record) 

412 

413 def registerDatasetType(self, datasetType: DatasetType) -> bool: 

414 # Docstring inherited from lsst.daf.butler.registry.Registry 

415 _, inserted = self._managers.datasets.register(datasetType) 

416 return inserted 

417 

418 def removeDatasetType(self, name: str | tuple[str, ...]) -> None: 

419 # Docstring inherited from lsst.daf.butler.registry.Registry 

420 

421 for datasetTypeExpression in ensure_iterable(name): 

422 datasetTypes = list(self.queryDatasetTypes(datasetTypeExpression)) 

423 if not datasetTypes: 

424 _LOG.info("Dataset type %r not defined", datasetTypeExpression) 

425 else: 

426 for datasetType in datasetTypes: 

427 self._managers.datasets.remove(datasetType.name) 

428 _LOG.info("Removed dataset type %r", datasetType.name) 

429 

430 def getDatasetType(self, name: str) -> DatasetType: 

431 # Docstring inherited from lsst.daf.butler.registry.Registry 

432 parent_name, component = DatasetType.splitDatasetTypeName(name) 

433 storage = self._managers.datasets[parent_name] 

434 if component is None: 

435 return storage.datasetType 

436 else: 

437 return storage.datasetType.makeComponentDatasetType(component) 

438 

439 def supportsIdGenerationMode(self, mode: DatasetIdGenEnum) -> bool: 

440 # Docstring inherited from lsst.daf.butler.registry.Registry 

441 return self._managers.datasets.supportsIdGenerationMode(mode) 

442 

443 def findDataset( 

444 self, 

445 datasetType: Union[DatasetType, str], 

446 dataId: Optional[DataId] = None, 

447 *, 

448 collections: Any = None, 

449 timespan: Optional[Timespan] = None, 

450 **kwargs: Any, 

451 ) -> Optional[DatasetRef]: 

452 # Docstring inherited from lsst.daf.butler.registry.Registry 

453 if collections is None: 

454 if not self.defaults.collections: 

455 raise NoDefaultCollectionError( 

456 "No collections provided to findDataset, and no defaults from registry construction." 

457 ) 

458 collections = self.defaults.collections 

459 backend = queries.SqlQueryBackend(self._db, self._managers) 

460 collection_wildcard = CollectionWildcard.from_expression(collections, require_ordered=True) 

461 matched_collections = backend.resolve_collection_wildcard(collection_wildcard) 

462 parent_dataset_type, components = backend.resolve_single_dataset_type_wildcard( 

463 datasetType, components_deprecated=False 

464 ) 

465 if len(components) > 1: 

466 raise DatasetTypeError( 

467 f"findDataset requires exactly one dataset type; got multiple components {components} " 

468 f"for parent dataset type {parent_dataset_type.name}." 

469 ) 

470 component = components[0] 

471 dataId = DataCoordinate.standardize( 

472 dataId, 

473 graph=parent_dataset_type.dimensions, 

474 universe=self.dimensions, 

475 defaults=self.defaults.dataId, 

476 **kwargs, 

477 ) 

478 governor_constraints = {name: {cast(str, dataId[name])} for name in dataId.graph.governors.names} 

479 (filtered_collections,) = backend.filter_dataset_collections( 

480 [parent_dataset_type], 

481 matched_collections, 

482 governor_constraints=governor_constraints, 

483 ).values() 

484 if not filtered_collections: 

485 return None 

486 if timespan is None: 

487 filtered_collections = [ 

488 collection_record 

489 for collection_record in filtered_collections 

490 if collection_record.type is not CollectionType.CALIBRATION 

491 ] 

492 if filtered_collections: 

493 requested_columns = {"dataset_id", "run", "collection"} 

494 with backend.context() as context: 

495 predicate = context.make_data_coordinate_predicate( 

496 dataId.subset(parent_dataset_type.dimensions), full=False 

497 ) 

498 if timespan is not None: 

499 requested_columns.add("timespan") 

500 predicate = predicate.logical_and( 

501 context.make_timespan_overlap_predicate( 

502 DatasetColumnTag(parent_dataset_type.name, "timespan"), timespan 

503 ) 

504 ) 

505 relation = backend.make_dataset_query_relation( 

506 parent_dataset_type, filtered_collections, requested_columns, context 

507 ).with_rows_satisfying(predicate) 

508 rows = list(context.fetch_iterable(relation)) 

509 else: 

510 rows = [] 

511 if not rows: 

512 return None 

513 elif len(rows) == 1: 

514 best_row = rows[0] 

515 else: 

516 rank_by_collection_key = {record.key: n for n, record in enumerate(filtered_collections)} 

517 collection_tag = DatasetColumnTag(parent_dataset_type.name, "collection") 

518 row_iter = iter(rows) 

519 best_row = next(row_iter) 

520 best_rank = rank_by_collection_key[best_row[collection_tag]] 

521 have_tie = False 

522 for row in row_iter: 

523 if (rank := rank_by_collection_key[row[collection_tag]]) < best_rank: 

524 best_row = row 

525 best_rank = rank 

526 have_tie = False 

527 elif rank == best_rank: 

528 have_tie = True 

529 assert timespan is not None, "Rank ties should be impossible given DB constraints." 

530 if have_tie: 

531 raise LookupError( 

532 f"Ambiguous calibration lookup for {parent_dataset_type.name} in collections " 

533 f"{collection_wildcard.strings} with timespan {timespan}." 

534 ) 

535 reader = queries.DatasetRefReader( 

536 parent_dataset_type, 

537 translate_collection=lambda k: self._managers.collections[k].name, 

538 ) 

539 ref = reader.read(best_row, data_id=dataId) 

540 if component is not None: 

541 ref = ref.makeComponentRef(component) 

542 return ref 

543 

544 @transactional 

545 def insertDatasets( 

546 self, 

547 datasetType: Union[DatasetType, str], 

548 dataIds: Iterable[DataId], 

549 run: Optional[str] = None, 

550 expand: bool = True, 

551 idGenerationMode: DatasetIdGenEnum = DatasetIdGenEnum.UNIQUE, 

552 ) -> List[DatasetRef]: 

553 # Docstring inherited from lsst.daf.butler.registry.Registry 

554 if isinstance(datasetType, DatasetType): 

555 storage = self._managers.datasets.find(datasetType.name) 

556 if storage is None: 

557 raise DatasetTypeError(f"DatasetType '{datasetType}' has not been registered.") 

558 else: 

559 storage = self._managers.datasets.find(datasetType) 

560 if storage is None: 

561 raise DatasetTypeError(f"DatasetType with name '{datasetType}' has not been registered.") 

562 if run is None: 

563 if self.defaults.run is None: 

564 raise NoDefaultCollectionError( 

565 "No run provided to insertDatasets, and no default from registry construction." 

566 ) 

567 run = self.defaults.run 

568 runRecord = self._managers.collections.find(run) 

569 if runRecord.type is not CollectionType.RUN: 

570 raise CollectionTypeError( 

571 f"Given collection is of type {runRecord.type.name}; RUN collection required." 

572 ) 

573 assert isinstance(runRecord, RunRecord) 

574 progress = Progress("daf.butler.Registry.insertDatasets", level=logging.DEBUG) 

575 if expand: 

576 expandedDataIds = [ 

577 self.expandDataId(dataId, graph=storage.datasetType.dimensions) 

578 for dataId in progress.wrap(dataIds, f"Expanding {storage.datasetType.name} data IDs") 

579 ] 

580 else: 

581 expandedDataIds = [ 

582 DataCoordinate.standardize(dataId, graph=storage.datasetType.dimensions) for dataId in dataIds 

583 ] 

584 try: 

585 refs = list(storage.insert(runRecord, expandedDataIds, idGenerationMode)) 

586 if self._managers.obscore: 

587 context = queries.SqlQueryContext(self._db, self._managers.column_types) 

588 self._managers.obscore.add_datasets(refs, context) 

589 except sqlalchemy.exc.IntegrityError as err: 

590 raise ConflictingDefinitionError( 

591 "A database constraint failure was triggered by inserting " 

592 f"one or more datasets of type {storage.datasetType} into " 

593 f"collection '{run}'. " 

594 "This probably means a dataset with the same data ID " 

595 "and dataset type already exists, but it may also mean a " 

596 "dimension row is missing." 

597 ) from err 

598 return refs 

599 

600 @transactional 

601 def _importDatasets( 

602 self, 

603 datasets: Iterable[DatasetRef], 

604 expand: bool = True, 

605 idGenerationMode: DatasetIdGenEnum = DatasetIdGenEnum.UNIQUE, 

606 reuseIds: bool = False, 

607 ) -> List[DatasetRef]: 

608 # Docstring inherited from lsst.daf.butler.registry.Registry 

609 datasets = list(datasets) 

610 if not datasets: 

611 # nothing to do 

612 return [] 

613 

614 # find dataset type 

615 datasetTypes = set(dataset.datasetType for dataset in datasets) 

616 if len(datasetTypes) != 1: 

617 raise DatasetTypeError(f"Multiple dataset types in input datasets: {datasetTypes}") 

618 datasetType = datasetTypes.pop() 

619 

620 # get storage handler for this dataset type 

621 storage = self._managers.datasets.find(datasetType.name) 

622 if storage is None: 

623 raise DatasetTypeError(f"DatasetType '{datasetType}' has not been registered.") 

624 

625 # find run name 

626 runs = set(dataset.run for dataset in datasets) 

627 if len(runs) != 1: 

628 raise ValueError(f"Multiple run names in input datasets: {runs}") 

629 run = runs.pop() 

630 if run is None: 

631 if self.defaults.run is None: 

632 raise NoDefaultCollectionError( 

633 "No run provided to ingestDatasets, and no default from registry construction." 

634 ) 

635 run = self.defaults.run 

636 

637 runRecord = self._managers.collections.find(run) 

638 if runRecord.type is not CollectionType.RUN: 

639 raise CollectionTypeError( 

640 f"Given collection '{runRecord.name}' is of type {runRecord.type.name};" 

641 " RUN collection required." 

642 ) 

643 assert isinstance(runRecord, RunRecord) 

644 

645 progress = Progress("daf.butler.Registry.insertDatasets", level=logging.DEBUG) 

646 if expand: 

647 expandedDatasets = [ 

648 dataset.expanded(self.expandDataId(dataset.dataId, graph=storage.datasetType.dimensions)) 

649 for dataset in progress.wrap(datasets, f"Expanding {storage.datasetType.name} data IDs") 

650 ] 

651 else: 

652 expandedDatasets = [ 

653 DatasetRef(datasetType, dataset.dataId, id=dataset.id, run=dataset.run, conform=True) 

654 for dataset in datasets 

655 ] 

656 

657 try: 

658 refs = list(storage.import_(runRecord, expandedDatasets, idGenerationMode, reuseIds)) 

659 if self._managers.obscore: 

660 context = queries.SqlQueryContext(self._db, self._managers.column_types) 

661 self._managers.obscore.add_datasets(refs, context) 

662 except sqlalchemy.exc.IntegrityError as err: 

663 raise ConflictingDefinitionError( 

664 "A database constraint failure was triggered by inserting " 

665 f"one or more datasets of type {storage.datasetType} into " 

666 f"collection '{run}'. " 

667 "This probably means a dataset with the same data ID " 

668 "and dataset type already exists, but it may also mean a " 

669 "dimension row is missing." 

670 ) from err 

671 return refs 

672 

673 def getDataset(self, id: DatasetId) -> Optional[DatasetRef]: 

674 # Docstring inherited from lsst.daf.butler.registry.Registry 

675 return self._managers.datasets.getDatasetRef(id) 

676 

677 @transactional 

678 def removeDatasets(self, refs: Iterable[DatasetRef]) -> None: 

679 # Docstring inherited from lsst.daf.butler.registry.Registry 

680 progress = Progress("lsst.daf.butler.Registry.removeDatasets", level=logging.DEBUG) 

681 for datasetType, refsForType in progress.iter_item_chunks( 

682 DatasetRef.groupByType(refs).items(), desc="Removing datasets by type" 

683 ): 

684 storage = self._managers.datasets[datasetType.name] 

685 try: 

686 storage.delete(refsForType) 

687 except sqlalchemy.exc.IntegrityError as err: 

688 raise OrphanedRecordError( 

689 "One or more datasets is still present in one or more Datastores." 

690 ) from err 

691 

692 @transactional 

693 def associate(self, collection: str, refs: Iterable[DatasetRef]) -> None: 

694 # Docstring inherited from lsst.daf.butler.registry.Registry 

695 progress = Progress("lsst.daf.butler.Registry.associate", level=logging.DEBUG) 

696 collectionRecord = self._managers.collections.find(collection) 

697 if collectionRecord.type is not CollectionType.TAGGED: 

698 raise CollectionTypeError( 

699 f"Collection '{collection}' has type {collectionRecord.type.name}, not TAGGED." 

700 ) 

701 for datasetType, refsForType in progress.iter_item_chunks( 

702 DatasetRef.groupByType(refs).items(), desc="Associating datasets by type" 

703 ): 

704 storage = self._managers.datasets[datasetType.name] 

705 try: 

706 storage.associate(collectionRecord, refsForType) 

707 if self._managers.obscore: 

708 # If a TAGGED collection is being monitored by ObsCore 

709 # manager then we may need to save the dataset. 

710 context = queries.SqlQueryContext(self._db, self._managers.column_types) 

711 self._managers.obscore.associate(refsForType, collectionRecord, context) 

712 except sqlalchemy.exc.IntegrityError as err: 

713 raise ConflictingDefinitionError( 

714 f"Constraint violation while associating dataset of type {datasetType.name} with " 

715 f"collection {collection}. This probably means that one or more datasets with the same " 

716 "dataset type and data ID already exist in the collection, but it may also indicate " 

717 "that the datasets do not exist." 

718 ) from err 

719 

720 @transactional 

721 def disassociate(self, collection: str, refs: Iterable[DatasetRef]) -> None: 

722 # Docstring inherited from lsst.daf.butler.registry.Registry 

723 progress = Progress("lsst.daf.butler.Registry.disassociate", level=logging.DEBUG) 

724 collectionRecord = self._managers.collections.find(collection) 

725 if collectionRecord.type is not CollectionType.TAGGED: 

726 raise CollectionTypeError( 

727 f"Collection '{collection}' has type {collectionRecord.type.name}; expected TAGGED." 

728 ) 

729 for datasetType, refsForType in progress.iter_item_chunks( 

730 DatasetRef.groupByType(refs).items(), desc="Disassociating datasets by type" 

731 ): 

732 storage = self._managers.datasets[datasetType.name] 

733 storage.disassociate(collectionRecord, refsForType) 

734 if self._managers.obscore: 

735 self._managers.obscore.disassociate(refsForType, collectionRecord) 

736 

737 @transactional 

738 def certify(self, collection: str, refs: Iterable[DatasetRef], timespan: Timespan) -> None: 

739 # Docstring inherited from lsst.daf.butler.registry.Registry 

740 progress = Progress("lsst.daf.butler.Registry.certify", level=logging.DEBUG) 

741 collectionRecord = self._managers.collections.find(collection) 

742 for datasetType, refsForType in progress.iter_item_chunks( 

743 DatasetRef.groupByType(refs).items(), desc="Certifying datasets by type" 

744 ): 

745 storage = self._managers.datasets[datasetType.name] 

746 storage.certify( 

747 collectionRecord, 

748 refsForType, 

749 timespan, 

750 context=queries.SqlQueryContext(self._db, self._managers.column_types), 

751 ) 

752 

753 @transactional 

754 def decertify( 

755 self, 

756 collection: str, 

757 datasetType: Union[str, DatasetType], 

758 timespan: Timespan, 

759 *, 

760 dataIds: Optional[Iterable[DataId]] = None, 

761 ) -> None: 

762 # Docstring inherited from lsst.daf.butler.registry.Registry 

763 collectionRecord = self._managers.collections.find(collection) 

764 if isinstance(datasetType, str): 

765 storage = self._managers.datasets[datasetType] 

766 else: 

767 storage = self._managers.datasets[datasetType.name] 

768 standardizedDataIds = None 

769 if dataIds is not None: 

770 standardizedDataIds = [ 

771 DataCoordinate.standardize(d, graph=storage.datasetType.dimensions) for d in dataIds 

772 ] 

773 storage.decertify( 

774 collectionRecord, 

775 timespan, 

776 dataIds=standardizedDataIds, 

777 context=queries.SqlQueryContext(self._db, self._managers.column_types), 

778 ) 

779 

780 def getDatastoreBridgeManager(self) -> DatastoreRegistryBridgeManager: 

781 """Return an object that allows a new `Datastore` instance to 

782 communicate with this `Registry`. 

783 

784 Returns 

785 ------- 

786 manager : `DatastoreRegistryBridgeManager` 

787 Object that mediates communication between this `Registry` and its 

788 associated datastores. 

789 """ 

790 return self._managers.datastores 

791 

792 def getDatasetLocations(self, ref: DatasetRef) -> Iterable[str]: 

793 # Docstring inherited from lsst.daf.butler.registry.Registry 

794 return self._managers.datastores.findDatastores(ref) 

795 

796 def expandDataId( 

797 self, 

798 dataId: Optional[DataId] = None, 

799 *, 

800 graph: Optional[DimensionGraph] = None, 

801 records: Optional[NameLookupMapping[DimensionElement, Optional[DimensionRecord]]] = None, 

802 withDefaults: bool = True, 

803 **kwargs: Any, 

804 ) -> DataCoordinate: 

805 # Docstring inherited from lsst.daf.butler.registry.Registry 

806 if not withDefaults: 

807 defaults = None 

808 else: 

809 defaults = self.defaults.dataId 

810 try: 

811 standardized = DataCoordinate.standardize( 

812 dataId, graph=graph, universe=self.dimensions, defaults=defaults, **kwargs 

813 ) 

814 except KeyError as exc: 

815 # This means either kwargs have some odd name or required 

816 # dimension is missing. 

817 raise DimensionNameError(str(exc)) from exc 

818 if standardized.hasRecords(): 

819 return standardized 

820 if records is None: 

821 records = {} 

822 elif isinstance(records, NamedKeyMapping): 

823 records = records.byName() 

824 else: 

825 records = dict(records) 

826 if isinstance(dataId, DataCoordinate) and dataId.hasRecords(): 

827 records.update(dataId.records.byName()) 

828 keys = standardized.byName() 

829 context = queries.SqlQueryContext(self._db, self._managers.column_types) 

830 for element in standardized.graph.primaryKeyTraversalOrder: 

831 record = records.get(element.name, ...) # Use ... to mean not found; None might mean NULL 

832 if record is ...: 

833 if isinstance(element, Dimension) and keys.get(element.name) is None: 

834 if element in standardized.graph.required: 

835 raise DimensionNameError( 

836 f"No value or null value for required dimension {element.name}." 

837 ) 

838 keys[element.name] = None 

839 record = None 

840 else: 

841 storage = self._managers.dimensions[element] 

842 record = storage.fetch_one(DataCoordinate.standardize(keys, graph=element.graph), context) 

843 records[element.name] = record 

844 if record is not None: 

845 for d in element.implied: 

846 value = getattr(record, d.name) 

847 if keys.setdefault(d.name, value) != value: 

848 raise InconsistentDataIdError( 

849 f"Data ID {standardized} has {d.name}={keys[d.name]!r}, " 

850 f"but {element.name} implies {d.name}={value!r}." 

851 ) 

852 else: 

853 if element in standardized.graph.required: 

854 raise DataIdValueError( 

855 f"Could not fetch record for required dimension {element.name} via keys {keys}." 

856 ) 

857 if element.alwaysJoin: 

858 raise InconsistentDataIdError( 

859 f"Could not fetch record for element {element.name} via keys {keys}, ", 

860 "but it is marked alwaysJoin=True; this means one or more dimensions are not " 

861 "related.", 

862 ) 

863 for d in element.implied: 

864 keys.setdefault(d.name, None) 

865 records.setdefault(d.name, None) 

866 return DataCoordinate.standardize(keys, graph=standardized.graph).expanded(records=records) 

867 

868 def insertDimensionData( 

869 self, 

870 element: Union[DimensionElement, str], 

871 *data: Union[Mapping[str, Any], DimensionRecord], 

872 conform: bool = True, 

873 replace: bool = False, 

874 skip_existing: bool = False, 

875 ) -> None: 

876 # Docstring inherited from lsst.daf.butler.registry.Registry 

877 if conform: 

878 if isinstance(element, str): 

879 element = self.dimensions[element] 

880 records = [ 

881 row if isinstance(row, DimensionRecord) else element.RecordClass(**row) for row in data 

882 ] 

883 else: 

884 # Ignore typing since caller said to trust them with conform=False. 

885 records = data # type: ignore 

886 storage = self._managers.dimensions[element] 

887 storage.insert(*records, replace=replace, skip_existing=skip_existing) 

888 

889 def syncDimensionData( 

890 self, 

891 element: Union[DimensionElement, str], 

892 row: Union[Mapping[str, Any], DimensionRecord], 

893 conform: bool = True, 

894 update: bool = False, 

895 ) -> Union[bool, Dict[str, Any]]: 

896 # Docstring inherited from lsst.daf.butler.registry.Registry 

897 if conform: 

898 if isinstance(element, str): 

899 element = self.dimensions[element] 

900 record = row if isinstance(row, DimensionRecord) else element.RecordClass(**row) 

901 else: 

902 # Ignore typing since caller said to trust them with conform=False. 

903 record = row # type: ignore 

904 storage = self._managers.dimensions[element] 

905 return storage.sync(record, update=update) 

906 

907 def queryDatasetTypes( 

908 self, 

909 expression: Any = ..., 

910 *, 

911 components: Optional[bool] = None, 

912 missing: Optional[List[str]] = None, 

913 ) -> Iterable[DatasetType]: 

914 # Docstring inherited from lsst.daf.butler.registry.Registry 

915 wildcard = DatasetTypeWildcard.from_expression(expression) 

916 composition_dict = self._managers.datasets.resolve_wildcard( 

917 wildcard, 

918 components=components, 

919 missing=missing, 

920 ) 

921 result: list[DatasetType] = [] 

922 for parent_dataset_type, components_for_parent in composition_dict.items(): 

923 result.extend( 

924 parent_dataset_type.makeComponentDatasetType(c) if c is not None else parent_dataset_type 

925 for c in components_for_parent 

926 ) 

927 return result 

928 

929 def queryCollections( 

930 self, 

931 expression: Any = ..., 

932 datasetType: Optional[DatasetType] = None, 

933 collectionTypes: Union[Iterable[CollectionType], CollectionType] = CollectionType.all(), 

934 flattenChains: bool = False, 

935 includeChains: Optional[bool] = None, 

936 ) -> Sequence[str]: 

937 # Docstring inherited from lsst.daf.butler.registry.Registry 

938 

939 # Right now the datasetTypes argument is completely ignored, but that 

940 # is consistent with its [lack of] guarantees. DM-24939 or a follow-up 

941 # ticket will take care of that. 

942 try: 

943 wildcard = CollectionWildcard.from_expression(expression) 

944 except TypeError as exc: 

945 raise CollectionExpressionError(f"Invalid collection expression '{expression}'") from exc 

946 collectionTypes = ensure_iterable(collectionTypes) 

947 return [ 

948 record.name 

949 for record in self._managers.collections.resolve_wildcard( 

950 wildcard, 

951 collection_types=frozenset(collectionTypes), 

952 flatten_chains=flattenChains, 

953 include_chains=includeChains, 

954 ) 

955 ] 

956 

957 def _makeQueryBuilder( 

958 self, 

959 summary: queries.QuerySummary, 

960 doomed_by: Iterable[str] = (), 

961 ) -> queries.QueryBuilder: 

962 """Return a `QueryBuilder` instance capable of constructing and 

963 managing more complex queries than those obtainable via `Registry` 

964 interfaces. 

965 

966 This is an advanced interface; downstream code should prefer 

967 `Registry.queryDataIds` and `Registry.queryDatasets` whenever those 

968 are sufficient. 

969 

970 Parameters 

971 ---------- 

972 summary : `queries.QuerySummary` 

973 Object describing and categorizing the full set of dimensions that 

974 will be included in the query. 

975 doomed_by : `Iterable` of `str`, optional 

976 A list of diagnostic messages that indicate why the query is going 

977 to yield no results and should not even be executed. If an empty 

978 container (default) the query will be executed unless other code 

979 determines that it is doomed. 

980 

981 Returns 

982 ------- 

983 builder : `queries.QueryBuilder` 

984 Object that can be used to construct and perform advanced queries. 

985 """ 

986 doomed_by = list(doomed_by) 

987 backend = queries.SqlQueryBackend(self._db, self._managers) 

988 context = backend.context() 

989 relation: Relation | None = None 

990 if doomed_by: 

991 relation = LeafRelation.make_doomed(context.sql_engine, set(), doomed_by) 

992 return queries.QueryBuilder( 

993 summary, 

994 backend=backend, 

995 context=context, 

996 relation=relation, 

997 ) 

998 

999 def _standardize_query_data_id_args( 

1000 self, data_id: DataId | None, *, doomed_by: list[str], **kwargs: Any 

1001 ) -> DataCoordinate: 

1002 """Preprocess the data ID arguments passed to query* methods. 

1003 

1004 Parameters 

1005 ---------- 

1006 data_id : `DataId` or `None` 

1007 Data ID that constrains the query results. 

1008 doomed_by : `list` [ `str` ] 

1009 List to append messages indicating why the query is doomed to 

1010 yield no results. 

1011 **kwargs 

1012 Additional data ID key-value pairs, extending and overriding 

1013 ``data_id``. 

1014 

1015 Returns 

1016 ------- 

1017 data_id : `DataCoordinate` 

1018 Standardized data ID. Will be fully expanded unless expansion 

1019 fails, in which case a message will be appended to ``doomed_by`` 

1020 on return. 

1021 """ 

1022 try: 

1023 return self.expandDataId(data_id, **kwargs) 

1024 except DataIdValueError as err: 

1025 doomed_by.append(str(err)) 

1026 return DataCoordinate.standardize( 

1027 data_id, **kwargs, universe=self.dimensions, defaults=self.defaults.dataId 

1028 ) 

1029 

1030 def _standardize_query_dataset_args( 

1031 self, 

1032 datasets: Any, 

1033 collections: Any, 

1034 components: bool | None, 

1035 mode: Literal["find_first"] | Literal["find_all"] | Literal["constrain"] = "constrain", 

1036 *, 

1037 doomed_by: list[str], 

1038 ) -> tuple[dict[DatasetType, list[str | None]], CollectionWildcard | None]: 

1039 """Preprocess dataset arguments passed to query* methods. 

1040 

1041 Parameters 

1042 ---------- 

1043 datasets : `DatasetType`, `str`, `re.Pattern`, or iterable of these 

1044 Expression identifying dataset types. See `queryDatasetTypes` for 

1045 details. 

1046 collections : `str`, `re.Pattern`, or iterable of these 

1047 Expression identifying collections to be searched. See 

1048 `queryCollections` for details. 

1049 components : `bool`, optional 

1050 If `True`, apply all expression patterns to component dataset type 

1051 names as well. If `False`, never apply patterns to components. 

1052 If `None` (default), apply patterns to components only if their 

1053 parent datasets were not matched by the expression. 

1054 Fully-specified component datasets (`str` or `DatasetType` 

1055 instances) are always included. 

1056 

1057 Values other than `False` are deprecated, and only `False` will be 

1058 supported after v26. After v27 this argument will be removed 

1059 entirely. 

1060 mode : `str`, optional 

1061 The way in which datasets are being used in this query; one of: 

1062 

1063 - "find_first": this is a query for the first dataset in an 

1064 ordered list of collections. Prohibits collection wildcards, 

1065 but permits dataset type wildcards. 

1066 

1067 - "find_all": this is a query for all datasets in all matched 

1068 collections. Permits collection and dataset type wildcards. 

1069 

1070 - "constrain": this is a query for something other than datasets, 

1071 with results constrained by dataset existence. Permits 

1072 collection wildcards and prohibits ``...`` as a dataset type 

1073 wildcard. 

1074 doomed_by : `list` [ `str` ] 

1075 List to append messages indicating why the query is doomed to 

1076 yield no results. 

1077 

1078 Returns 

1079 ------- 

1080 composition : `defaultdict` [ `DatasetType`, `list` [ `str` ] ] 

1081 Dictionary mapping parent dataset type to `list` of components 

1082 matched for that dataset type (or `None` for the parent itself). 

1083 collections : `CollectionWildcard` 

1084 Processed collection expression. 

1085 """ 

1086 composition: dict[DatasetType, list[str | None]] = {} 

1087 if datasets is not None: 

1088 if not collections: 

1089 if not self.defaults.collections: 

1090 raise NoDefaultCollectionError("No collections, and no registry default collections.") 

1091 collections = self.defaults.collections 

1092 else: 

1093 collections = CollectionWildcard.from_expression(collections) 

1094 if mode == "find_first" and collections.patterns: 

1095 raise TypeError( 

1096 f"Collection pattern(s) {collections.patterns} not allowed in this context." 

1097 ) 

1098 missing: list[str] = [] 

1099 composition = self._managers.datasets.resolve_wildcard( 

1100 datasets, components=components, missing=missing, explicit_only=(mode == "constrain") 

1101 ) 

1102 if missing and mode == "constrain": 

1103 # After v26 this should raise MissingDatasetTypeError, to be 

1104 # implemented on DM-36303. 

1105 warnings.warn( 

1106 f"Dataset type(s) {missing} are not registered; this will be an error after v26.", 

1107 FutureWarning, 

1108 ) 

1109 doomed_by.extend(f"Dataset type {name} is not registered." for name in missing) 

1110 elif collections: 

1111 raise ArgumentError(f"Cannot pass 'collections' (='{collections}') without 'datasets'.") 

1112 return composition, collections 

1113 

1114 def queryDatasets( 

1115 self, 

1116 datasetType: Any, 

1117 *, 

1118 collections: Any = None, 

1119 dimensions: Optional[Iterable[Union[Dimension, str]]] = None, 

1120 dataId: Optional[DataId] = None, 

1121 where: str = "", 

1122 findFirst: bool = False, 

1123 components: Optional[bool] = None, 

1124 bind: Optional[Mapping[str, Any]] = None, 

1125 check: bool = True, 

1126 **kwargs: Any, 

1127 ) -> queries.DatasetQueryResults: 

1128 # Docstring inherited from lsst.daf.butler.registry.Registry 

1129 doomed_by: list[str] = [] 

1130 data_id = self._standardize_query_data_id_args(dataId, doomed_by=doomed_by, **kwargs) 

1131 dataset_composition, collections = self._standardize_query_dataset_args( 

1132 datasetType, 

1133 collections, 

1134 components, 

1135 mode="find_first" if findFirst else "find_all", 

1136 doomed_by=doomed_by, 

1137 ) 

1138 parent_results: list[queries.ParentDatasetQueryResults] = [] 

1139 for parent_dataset_type, components_for_parent in dataset_composition.items(): 

1140 # The full set of dimensions in the query is the combination of 

1141 # those needed for the DatasetType and those explicitly requested, 

1142 # if any. 

1143 dimension_names = set(parent_dataset_type.dimensions.names) 

1144 if dimensions is not None: 

1145 dimension_names.update(self.dimensions.extract(dimensions).names) 

1146 # Construct the summary structure needed to construct a 

1147 # QueryBuilder. 

1148 summary = queries.QuerySummary( 

1149 requested=DimensionGraph(self.dimensions, names=dimension_names), 

1150 data_id=data_id, 

1151 expression=where, 

1152 bind=bind, 

1153 defaults=self.defaults.dataId, 

1154 check=check, 

1155 datasets=[parent_dataset_type], 

1156 ) 

1157 builder = self._makeQueryBuilder(summary) 

1158 # Add the dataset subquery to the query, telling the QueryBuilder 

1159 # to include the rank of the selected collection in the results 

1160 # only if we need to findFirst. Note that if any of the 

1161 # collections are actually wildcard expressions, and 

1162 # findFirst=True, this will raise TypeError for us. 

1163 builder.joinDataset(parent_dataset_type, collections, isResult=True, findFirst=findFirst) 

1164 query = builder.finish() 

1165 parent_results.append( 

1166 queries.ParentDatasetQueryResults( 

1167 query, parent_dataset_type, components=components_for_parent 

1168 ) 

1169 ) 

1170 if not parent_results: 

1171 doomed_by.extend( 

1172 f"No registered dataset type matching {t!r} found, so no matching datasets can " 

1173 "exist in any collection." 

1174 for t in ensure_iterable(datasetType) 

1175 ) 

1176 return queries.ChainedDatasetQueryResults([], doomed_by=doomed_by) 

1177 elif len(parent_results) == 1: 

1178 return parent_results[0] 

1179 else: 

1180 return queries.ChainedDatasetQueryResults(parent_results) 

1181 

1182 def queryDataIds( 

1183 self, 

1184 dimensions: Union[Iterable[Union[Dimension, str]], Dimension, str], 

1185 *, 

1186 dataId: Optional[DataId] = None, 

1187 datasets: Any = None, 

1188 collections: Any = None, 

1189 where: str = "", 

1190 components: Optional[bool] = None, 

1191 bind: Optional[Mapping[str, Any]] = None, 

1192 check: bool = True, 

1193 **kwargs: Any, 

1194 ) -> queries.DataCoordinateQueryResults: 

1195 # Docstring inherited from lsst.daf.butler.registry.Registry 

1196 dimensions = ensure_iterable(dimensions) 

1197 requestedDimensions = self.dimensions.extract(dimensions) 

1198 doomed_by: list[str] = [] 

1199 data_id = self._standardize_query_data_id_args(dataId, doomed_by=doomed_by, **kwargs) 

1200 dataset_composition, collections = self._standardize_query_dataset_args( 

1201 datasets, collections, components, doomed_by=doomed_by 

1202 ) 

1203 summary = queries.QuerySummary( 

1204 requested=requestedDimensions, 

1205 data_id=data_id, 

1206 expression=where, 

1207 bind=bind, 

1208 defaults=self.defaults.dataId, 

1209 check=check, 

1210 datasets=dataset_composition.keys(), 

1211 ) 

1212 builder = self._makeQueryBuilder(summary, doomed_by=doomed_by) 

1213 for datasetType in dataset_composition.keys(): 

1214 builder.joinDataset(datasetType, collections, isResult=False) 

1215 query = builder.finish() 

1216 

1217 return queries.DataCoordinateQueryResults(query) 

1218 

1219 def queryDimensionRecords( 

1220 self, 

1221 element: Union[DimensionElement, str], 

1222 *, 

1223 dataId: Optional[DataId] = None, 

1224 datasets: Any = None, 

1225 collections: Any = None, 

1226 where: str = "", 

1227 components: Optional[bool] = None, 

1228 bind: Optional[Mapping[str, Any]] = None, 

1229 check: bool = True, 

1230 **kwargs: Any, 

1231 ) -> queries.DimensionRecordQueryResults: 

1232 # Docstring inherited from lsst.daf.butler.registry.Registry 

1233 if not isinstance(element, DimensionElement): 

1234 try: 

1235 element = self.dimensions[element] 

1236 except KeyError as e: 

1237 raise DimensionNameError( 

1238 f"No such dimension '{element}', available dimensions: " 

1239 + str(self.dimensions.getStaticElements()) 

1240 ) from e 

1241 doomed_by: list[str] = [] 

1242 data_id = self._standardize_query_data_id_args(dataId, doomed_by=doomed_by, **kwargs) 

1243 dataset_composition, collections = self._standardize_query_dataset_args( 

1244 datasets, collections, components, doomed_by=doomed_by 

1245 ) 

1246 summary = queries.QuerySummary( 

1247 requested=element.graph, 

1248 data_id=data_id, 

1249 expression=where, 

1250 bind=bind, 

1251 defaults=self.defaults.dataId, 

1252 check=check, 

1253 datasets=dataset_composition.keys(), 

1254 ) 

1255 builder = self._makeQueryBuilder(summary, doomed_by=doomed_by) 

1256 for datasetType in dataset_composition.keys(): 

1257 builder.joinDataset(datasetType, collections, isResult=False) 

1258 query = builder.finish().with_record_columns(element) 

1259 return queries.DatabaseDimensionRecordQueryResults(query, element) 

1260 

1261 def queryDatasetAssociations( 

1262 self, 

1263 datasetType: Union[str, DatasetType], 

1264 collections: Any = ..., 

1265 *, 

1266 collectionTypes: Iterable[CollectionType] = CollectionType.all(), 

1267 flattenChains: bool = False, 

1268 ) -> Iterator[DatasetAssociation]: 

1269 # Docstring inherited from lsst.daf.butler.registry.Registry 

1270 if collections is None: 

1271 if not self.defaults.collections: 

1272 raise NoDefaultCollectionError( 

1273 "No collections provided to queryDatasetAssociations, " 

1274 "and no defaults from registry construction." 

1275 ) 

1276 collections = self.defaults.collections 

1277 collections = CollectionWildcard.from_expression(collections) 

1278 backend = queries.SqlQueryBackend(self._db, self._managers) 

1279 parent_dataset_type, _ = backend.resolve_single_dataset_type_wildcard(datasetType, components=False) 

1280 timespan_tag = DatasetColumnTag(parent_dataset_type.name, "timespan") 

1281 collection_tag = DatasetColumnTag(parent_dataset_type.name, "collection") 

1282 for parent_collection_record in backend.resolve_collection_wildcard( 

1283 collections, 

1284 collection_types=frozenset(collectionTypes), 

1285 flatten_chains=flattenChains, 

1286 ): 

1287 # Resolve this possibly-chained collection into a list of 

1288 # non-CHAINED collections that actually hold datasets of this 

1289 # type. 

1290 candidate_collection_records = backend.resolve_dataset_collections( 

1291 parent_dataset_type, 

1292 CollectionWildcard.from_names([parent_collection_record.name]), 

1293 allow_calibration_collections=True, 

1294 governor_constraints={}, 

1295 ) 

1296 if not candidate_collection_records: 

1297 continue 

1298 with backend.context() as context: 

1299 relation = backend.make_dataset_query_relation( 

1300 parent_dataset_type, 

1301 candidate_collection_records, 

1302 columns={"dataset_id", "run", "timespan", "collection"}, 

1303 context=context, 

1304 ) 

1305 reader = queries.DatasetRefReader( 

1306 parent_dataset_type, 

1307 translate_collection=lambda k: self._managers.collections[k].name, 

1308 full=False, 

1309 ) 

1310 for row in context.fetch_iterable(relation): 

1311 ref = reader.read(row) 

1312 collection_record = self._managers.collections[row[collection_tag]] 

1313 if collection_record.type is CollectionType.CALIBRATION: 

1314 timespan = row[timespan_tag] 

1315 else: 

1316 # For backwards compatibility and (possibly?) user 

1317 # convenience we continue to define the timespan of a 

1318 # DatasetAssociation row for a non-CALIBRATION 

1319 # collection to be None rather than a fully unbounded 

1320 # timespan. 

1321 timespan = None 

1322 yield DatasetAssociation(ref=ref, collection=collection_record.name, timespan=timespan) 

1323 

1324 storageClasses: StorageClassFactory 

1325 """All storage classes known to the registry (`StorageClassFactory`). 

1326 """