Coverage for python/lsst/daf/butler/registries/sql.py: 12%

513 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-06-02 02:16 -0700

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ("SqlRegistry",) 

25 

26import contextlib 

27import logging 

28import warnings 

29from typing import ( 

30 TYPE_CHECKING, 

31 Any, 

32 Dict, 

33 Iterable, 

34 Iterator, 

35 List, 

36 Literal, 

37 Mapping, 

38 Optional, 

39 Sequence, 

40 Set, 

41 Union, 

42 cast, 

43) 

44 

45import sqlalchemy 

46from lsst.daf.relation import LeafRelation, Relation 

47from lsst.resources import ResourcePathExpression 

48from lsst.utils.ellipsis import Ellipsis 

49from lsst.utils.iteration import ensure_iterable 

50 

51from ..core import ( 

52 Config, 

53 DataCoordinate, 

54 DataId, 

55 DatasetAssociation, 

56 DatasetColumnTag, 

57 DatasetId, 

58 DatasetIdFactory, 

59 DatasetIdGenEnum, 

60 DatasetRef, 

61 DatasetType, 

62 Dimension, 

63 DimensionConfig, 

64 DimensionElement, 

65 DimensionGraph, 

66 DimensionRecord, 

67 DimensionUniverse, 

68 NamedKeyMapping, 

69 NameLookupMapping, 

70 Progress, 

71 StorageClassFactory, 

72 Timespan, 

73 ddl, 

74) 

75from ..core.utils import transactional 

76from ..registry import ( 

77 ArgumentError, 

78 CollectionExpressionError, 

79 CollectionSummary, 

80 CollectionType, 

81 CollectionTypeError, 

82 ConflictingDefinitionError, 

83 DataIdValueError, 

84 DatasetTypeError, 

85 DimensionNameError, 

86 InconsistentDataIdError, 

87 NoDefaultCollectionError, 

88 OrphanedRecordError, 

89 Registry, 

90 RegistryConfig, 

91 RegistryConsistencyError, 

92 RegistryDefaults, 

93 queries, 

94) 

95from ..registry.interfaces import ChainedCollectionRecord, RunRecord 

96from ..registry.managers import RegistryManagerInstances, RegistryManagerTypes 

97from ..registry.wildcards import CollectionWildcard, DatasetTypeWildcard 

98 

99if TYPE_CHECKING: 

100 from .._butlerConfig import ButlerConfig 

101 from ..registry._registry import CollectionArgType 

102 from ..registry.interfaces import ( 

103 CollectionRecord, 

104 Database, 

105 DatastoreRegistryBridgeManager, 

106 ObsCoreTableManager, 

107 ) 

108 

109 

110_LOG = logging.getLogger(__name__) 

111 

112 

113class SqlRegistry(Registry): 

114 """Registry implementation based on SQLAlchemy. 

115 

116 Parameters 

117 ---------- 

118 database : `Database` 

119 Database instance to store Registry. 

120 defaults : `RegistryDefaults` 

121 Default collection search path and/or output `~CollectionType.RUN` 

122 collection. 

123 managers : `RegistryManagerInstances` 

124 All the managers required for this registry. 

125 """ 

126 

127 defaultConfigFile: Optional[str] = None 

128 """Path to configuration defaults. Accessed within the ``configs`` resource 

129 or relative to a search path. Can be None if no defaults specified. 

130 """ 

131 

132 @classmethod 

133 def createFromConfig( 

134 cls, 

135 config: Optional[Union[RegistryConfig, str]] = None, 

136 dimensionConfig: Optional[Union[DimensionConfig, str]] = None, 

137 butlerRoot: Optional[ResourcePathExpression] = None, 

138 ) -> Registry: 

139 """Create registry database and return `SqlRegistry` instance. 

140 

141 This method initializes database contents, database must be empty 

142 prior to calling this method. 

143 

144 Parameters 

145 ---------- 

146 config : `RegistryConfig` or `str`, optional 

147 Registry configuration, if missing then default configuration will 

148 be loaded from registry.yaml. 

149 dimensionConfig : `DimensionConfig` or `str`, optional 

150 Dimensions configuration, if missing then default configuration 

151 will be loaded from dimensions.yaml. 

152 butlerRoot : convertible to `lsst.resources.ResourcePath`, optional 

153 Path to the repository root this `SqlRegistry` will manage. 

154 

155 Returns 

156 ------- 

157 registry : `SqlRegistry` 

158 A new `SqlRegistry` instance. 

159 """ 

160 config = cls.forceRegistryConfig(config) 

161 config.replaceRoot(butlerRoot) 

162 

163 if isinstance(dimensionConfig, str): 

164 dimensionConfig = DimensionConfig(dimensionConfig) 

165 elif dimensionConfig is None: 

166 dimensionConfig = DimensionConfig() 

167 elif not isinstance(dimensionConfig, DimensionConfig): 

168 raise TypeError(f"Incompatible Dimension configuration type: {type(dimensionConfig)}") 

169 

170 DatabaseClass = config.getDatabaseClass() 

171 database = DatabaseClass.fromUri( 

172 config.connectionString, origin=config.get("origin", 0), namespace=config.get("namespace") 

173 ) 

174 managerTypes = RegistryManagerTypes.fromConfig(config) 

175 managers = managerTypes.makeRepo(database, dimensionConfig) 

176 return cls(database, RegistryDefaults(), managers) 

177 

178 @classmethod 

179 def fromConfig( 

180 cls, 

181 config: Union[ButlerConfig, RegistryConfig, Config, str], 

182 butlerRoot: Optional[ResourcePathExpression] = None, 

183 writeable: bool = True, 

184 defaults: Optional[RegistryDefaults] = None, 

185 ) -> Registry: 

186 """Create `Registry` subclass instance from `config`. 

187 

188 Registry database must be initialized prior to calling this method. 

189 

190 Parameters 

191 ---------- 

192 config : `ButlerConfig`, `RegistryConfig`, `Config` or `str` 

193 Registry configuration 

194 butlerRoot : `lsst.resources.ResourcePathExpression`, optional 

195 Path to the repository root this `Registry` will manage. 

196 writeable : `bool`, optional 

197 If `True` (default) create a read-write connection to the database. 

198 defaults : `RegistryDefaults`, optional 

199 Default collection search path and/or output `~CollectionType.RUN` 

200 collection. 

201 

202 Returns 

203 ------- 

204 registry : `SqlRegistry` (subclass) 

205 A new `SqlRegistry` subclass instance. 

206 """ 

207 config = cls.forceRegistryConfig(config) 

208 config.replaceRoot(butlerRoot) 

209 DatabaseClass = config.getDatabaseClass() 

210 database = DatabaseClass.fromUri( 

211 config.connectionString, 

212 origin=config.get("origin", 0), 

213 namespace=config.get("namespace"), 

214 writeable=writeable, 

215 ) 

216 managerTypes = RegistryManagerTypes.fromConfig(config) 

217 with database.session(): 

218 managers = managerTypes.loadRepo(database) 

219 if defaults is None: 

220 defaults = RegistryDefaults() 

221 return cls(database, defaults, managers) 

222 

223 def __init__(self, database: Database, defaults: RegistryDefaults, managers: RegistryManagerInstances): 

224 self._db = database 

225 self._managers = managers 

226 self.storageClasses = StorageClassFactory() 

227 # Intentionally invoke property setter to initialize defaults. This 

228 # can only be done after most of the rest of Registry has already been 

229 # initialized, and must be done before the property getter is used. 

230 self.defaults = defaults 

231 # In the future DatasetIdFactory may become configurable and this 

232 # instance will need to be shared with datasets manager. 

233 self.datasetIdFactory = DatasetIdFactory() 

234 

235 def __str__(self) -> str: 

236 return str(self._db) 

237 

238 def __repr__(self) -> str: 

239 return f"SqlRegistry({self._db!r}, {self.dimensions!r})" 

240 

241 def isWriteable(self) -> bool: 

242 # Docstring inherited from lsst.daf.butler.registry.Registry 

243 return self._db.isWriteable() 

244 

245 def copy(self, defaults: Optional[RegistryDefaults] = None) -> Registry: 

246 # Docstring inherited from lsst.daf.butler.registry.Registry 

247 if defaults is None: 

248 # No need to copy, because `RegistryDefaults` is immutable; we 

249 # effectively copy on write. 

250 defaults = self.defaults 

251 return type(self)(self._db, defaults, self._managers) 

252 

253 @property 

254 def dimensions(self) -> DimensionUniverse: 

255 # Docstring inherited from lsst.daf.butler.registry.Registry 

256 return self._managers.dimensions.universe 

257 

258 def refresh(self) -> None: 

259 # Docstring inherited from lsst.daf.butler.registry.Registry 

260 with self._db.transaction(): 

261 self._managers.refresh() 

262 

263 @contextlib.contextmanager 

264 def transaction(self, *, savepoint: bool = False) -> Iterator[None]: 

265 # Docstring inherited from lsst.daf.butler.registry.Registry 

266 try: 

267 with self._db.transaction(savepoint=savepoint): 

268 yield 

269 except BaseException: 

270 # TODO: this clears the caches sometimes when we wouldn't actually 

271 # need to. Can we avoid that? 

272 self._managers.dimensions.clearCaches() 

273 raise 

274 

275 def resetConnectionPool(self) -> None: 

276 """Reset SQLAlchemy connection pool for `SqlRegistry` database. 

277 

278 This operation is useful when using registry with fork-based 

279 multiprocessing. To use registry across fork boundary one has to make 

280 sure that there are no currently active connections (no session or 

281 transaction is in progress) and connection pool is reset using this 

282 method. This method should be called by the child process immediately 

283 after the fork. 

284 """ 

285 self._db._engine.dispose() 

286 

287 def registerOpaqueTable(self, tableName: str, spec: ddl.TableSpec) -> None: 

288 """Add an opaque (to the `Registry`) table for use by a `Datastore` or 

289 other data repository client. 

290 

291 Opaque table records can be added via `insertOpaqueData`, retrieved via 

292 `fetchOpaqueData`, and removed via `deleteOpaqueData`. 

293 

294 Parameters 

295 ---------- 

296 tableName : `str` 

297 Logical name of the opaque table. This may differ from the 

298 actual name used in the database by a prefix and/or suffix. 

299 spec : `ddl.TableSpec` 

300 Specification for the table to be added. 

301 """ 

302 self._managers.opaque.register(tableName, spec) 

303 

304 @transactional 

305 def insertOpaqueData(self, tableName: str, *data: dict) -> None: 

306 """Insert records into an opaque table. 

307 

308 Parameters 

309 ---------- 

310 tableName : `str` 

311 Logical name of the opaque table. Must match the name used in a 

312 previous call to `registerOpaqueTable`. 

313 data 

314 Each additional positional argument is a dictionary that represents 

315 a single row to be added. 

316 """ 

317 self._managers.opaque[tableName].insert(*data) 

318 

319 def fetchOpaqueData(self, tableName: str, **where: Any) -> Iterator[Mapping[str, Any]]: 

320 """Retrieve records from an opaque table. 

321 

322 Parameters 

323 ---------- 

324 tableName : `str` 

325 Logical name of the opaque table. Must match the name used in a 

326 previous call to `registerOpaqueTable`. 

327 where 

328 Additional keyword arguments are interpreted as equality 

329 constraints that restrict the returned rows (combined with AND); 

330 keyword arguments are column names and values are the values they 

331 must have. 

332 

333 Yields 

334 ------ 

335 row : `dict` 

336 A dictionary representing a single result row. 

337 """ 

338 yield from self._managers.opaque[tableName].fetch(**where) 

339 

340 @transactional 

341 def deleteOpaqueData(self, tableName: str, **where: Any) -> None: 

342 """Remove records from an opaque table. 

343 

344 Parameters 

345 ---------- 

346 tableName : `str` 

347 Logical name of the opaque table. Must match the name used in a 

348 previous call to `registerOpaqueTable`. 

349 where 

350 Additional keyword arguments are interpreted as equality 

351 constraints that restrict the deleted rows (combined with AND); 

352 keyword arguments are column names and values are the values they 

353 must have. 

354 """ 

355 self._managers.opaque[tableName].delete(where.keys(), where) 

356 

357 def registerCollection( 

358 self, name: str, type: CollectionType = CollectionType.TAGGED, doc: Optional[str] = None 

359 ) -> bool: 

360 # Docstring inherited from lsst.daf.butler.registry.Registry 

361 _, registered = self._managers.collections.register(name, type, doc=doc) 

362 return registered 

363 

364 def getCollectionType(self, name: str) -> CollectionType: 

365 # Docstring inherited from lsst.daf.butler.registry.Registry 

366 return self._managers.collections.find(name).type 

367 

368 def _get_collection_record(self, name: str) -> CollectionRecord: 

369 # Docstring inherited from lsst.daf.butler.registry.Registry 

370 return self._managers.collections.find(name) 

371 

372 def registerRun(self, name: str, doc: Optional[str] = None) -> bool: 

373 # Docstring inherited from lsst.daf.butler.registry.Registry 

374 _, registered = self._managers.collections.register(name, CollectionType.RUN, doc=doc) 

375 return registered 

376 

377 @transactional 

378 def removeCollection(self, name: str) -> None: 

379 # Docstring inherited from lsst.daf.butler.registry.Registry 

380 self._managers.collections.remove(name) 

381 

382 def getCollectionChain(self, parent: str) -> tuple[str, ...]: 

383 # Docstring inherited from lsst.daf.butler.registry.Registry 

384 record = self._managers.collections.find(parent) 

385 if record.type is not CollectionType.CHAINED: 

386 raise CollectionTypeError(f"Collection '{parent}' has type {record.type.name}, not CHAINED.") 

387 assert isinstance(record, ChainedCollectionRecord) 

388 return record.children 

389 

390 @transactional 

391 def setCollectionChain(self, parent: str, children: Any, *, flatten: bool = False) -> None: 

392 # Docstring inherited from lsst.daf.butler.registry.Registry 

393 record = self._managers.collections.find(parent) 

394 if record.type is not CollectionType.CHAINED: 

395 raise CollectionTypeError(f"Collection '{parent}' has type {record.type.name}, not CHAINED.") 

396 assert isinstance(record, ChainedCollectionRecord) 

397 children = CollectionWildcard.from_expression(children).require_ordered() 

398 if children != record.children or flatten: 

399 record.update(self._managers.collections, children, flatten=flatten) 

400 

401 def getCollectionParentChains(self, collection: str) -> Set[str]: 

402 # Docstring inherited from lsst.daf.butler.registry.Registry 

403 return { 

404 record.name 

405 for record in self._managers.collections.getParentChains( 

406 self._managers.collections.find(collection).key 

407 ) 

408 } 

409 

410 def getCollectionDocumentation(self, collection: str) -> Optional[str]: 

411 # Docstring inherited from lsst.daf.butler.registry.Registry 

412 return self._managers.collections.getDocumentation(self._managers.collections.find(collection).key) 

413 

414 def setCollectionDocumentation(self, collection: str, doc: Optional[str]) -> None: 

415 # Docstring inherited from lsst.daf.butler.registry.Registry 

416 self._managers.collections.setDocumentation(self._managers.collections.find(collection).key, doc) 

417 

418 def getCollectionSummary(self, collection: str) -> CollectionSummary: 

419 # Docstring inherited from lsst.daf.butler.registry.Registry 

420 record = self._managers.collections.find(collection) 

421 return self._managers.datasets.getCollectionSummary(record) 

422 

423 def registerDatasetType(self, datasetType: DatasetType) -> bool: 

424 # Docstring inherited from lsst.daf.butler.registry.Registry 

425 _, inserted = self._managers.datasets.register(datasetType) 

426 return inserted 

427 

428 def removeDatasetType(self, name: str | tuple[str, ...]) -> None: 

429 # Docstring inherited from lsst.daf.butler.registry.Registry 

430 

431 for datasetTypeExpression in ensure_iterable(name): 

432 datasetTypes = list(self.queryDatasetTypes(datasetTypeExpression)) 

433 if not datasetTypes: 

434 _LOG.info("Dataset type %r not defined", datasetTypeExpression) 

435 else: 

436 for datasetType in datasetTypes: 

437 self._managers.datasets.remove(datasetType.name) 

438 _LOG.info("Removed dataset type %r", datasetType.name) 

439 

440 def getDatasetType(self, name: str) -> DatasetType: 

441 # Docstring inherited from lsst.daf.butler.registry.Registry 

442 parent_name, component = DatasetType.splitDatasetTypeName(name) 

443 storage = self._managers.datasets[parent_name] 

444 if component is None: 

445 return storage.datasetType 

446 else: 

447 return storage.datasetType.makeComponentDatasetType(component) 

448 

449 def supportsIdGenerationMode(self, mode: DatasetIdGenEnum) -> bool: 

450 # Docstring inherited from lsst.daf.butler.registry.Registry 

451 return self._managers.datasets.supportsIdGenerationMode(mode) 

452 

453 def findDataset( 

454 self, 

455 datasetType: Union[DatasetType, str], 

456 dataId: Optional[DataId] = None, 

457 *, 

458 collections: CollectionArgType | None = None, 

459 timespan: Optional[Timespan] = None, 

460 **kwargs: Any, 

461 ) -> Optional[DatasetRef]: 

462 # Docstring inherited from lsst.daf.butler.registry.Registry 

463 if collections is None: 

464 if not self.defaults.collections: 

465 raise NoDefaultCollectionError( 

466 "No collections provided to findDataset, and no defaults from registry construction." 

467 ) 

468 collections = self.defaults.collections 

469 backend = queries.SqlQueryBackend(self._db, self._managers) 

470 collection_wildcard = CollectionWildcard.from_expression(collections, require_ordered=True) 

471 if collection_wildcard.empty(): 

472 return None 

473 matched_collections = backend.resolve_collection_wildcard(collection_wildcard) 

474 parent_dataset_type, components = backend.resolve_single_dataset_type_wildcard( 

475 datasetType, components_deprecated=False 

476 ) 

477 if len(components) > 1: 

478 raise DatasetTypeError( 

479 f"findDataset requires exactly one dataset type; got multiple components {components} " 

480 f"for parent dataset type {parent_dataset_type.name}." 

481 ) 

482 component = components[0] 

483 dataId = DataCoordinate.standardize( 

484 dataId, 

485 graph=parent_dataset_type.dimensions, 

486 universe=self.dimensions, 

487 defaults=self.defaults.dataId, 

488 **kwargs, 

489 ) 

490 governor_constraints = {name: {cast(str, dataId[name])} for name in dataId.graph.governors.names} 

491 (filtered_collections,) = backend.filter_dataset_collections( 

492 [parent_dataset_type], 

493 matched_collections, 

494 governor_constraints=governor_constraints, 

495 ).values() 

496 if not filtered_collections: 

497 return None 

498 if timespan is None: 

499 filtered_collections = [ 

500 collection_record 

501 for collection_record in filtered_collections 

502 if collection_record.type is not CollectionType.CALIBRATION 

503 ] 

504 if filtered_collections: 

505 requested_columns = {"dataset_id", "run", "collection"} 

506 with backend.context() as context: 

507 predicate = context.make_data_coordinate_predicate( 

508 dataId.subset(parent_dataset_type.dimensions), full=False 

509 ) 

510 if timespan is not None: 

511 requested_columns.add("timespan") 

512 predicate = predicate.logical_and( 

513 context.make_timespan_overlap_predicate( 

514 DatasetColumnTag(parent_dataset_type.name, "timespan"), timespan 

515 ) 

516 ) 

517 relation = backend.make_dataset_query_relation( 

518 parent_dataset_type, filtered_collections, requested_columns, context 

519 ).with_rows_satisfying(predicate) 

520 rows = list(context.fetch_iterable(relation)) 

521 else: 

522 rows = [] 

523 if not rows: 

524 return None 

525 elif len(rows) == 1: 

526 best_row = rows[0] 

527 else: 

528 rank_by_collection_key = {record.key: n for n, record in enumerate(filtered_collections)} 

529 collection_tag = DatasetColumnTag(parent_dataset_type.name, "collection") 

530 row_iter = iter(rows) 

531 best_row = next(row_iter) 

532 best_rank = rank_by_collection_key[best_row[collection_tag]] 

533 have_tie = False 

534 for row in row_iter: 

535 if (rank := rank_by_collection_key[row[collection_tag]]) < best_rank: 

536 best_row = row 

537 best_rank = rank 

538 have_tie = False 

539 elif rank == best_rank: 

540 have_tie = True 

541 assert timespan is not None, "Rank ties should be impossible given DB constraints." 

542 if have_tie: 

543 raise LookupError( 

544 f"Ambiguous calibration lookup for {parent_dataset_type.name} in collections " 

545 f"{collection_wildcard.strings} with timespan {timespan}." 

546 ) 

547 reader = queries.DatasetRefReader( 

548 parent_dataset_type, 

549 translate_collection=lambda k: self._managers.collections[k].name, 

550 ) 

551 ref = reader.read(best_row, data_id=dataId) 

552 if component is not None: 

553 ref = ref.makeComponentRef(component) 

554 return ref 

555 

556 @transactional 

557 def insertDatasets( 

558 self, 

559 datasetType: Union[DatasetType, str], 

560 dataIds: Iterable[DataId], 

561 run: Optional[str] = None, 

562 expand: bool = True, 

563 idGenerationMode: DatasetIdGenEnum = DatasetIdGenEnum.UNIQUE, 

564 ) -> List[DatasetRef]: 

565 # Docstring inherited from lsst.daf.butler.registry.Registry 

566 if isinstance(datasetType, DatasetType): 

567 storage = self._managers.datasets.find(datasetType.name) 

568 if storage is None: 

569 raise DatasetTypeError(f"DatasetType '{datasetType}' has not been registered.") 

570 else: 

571 storage = self._managers.datasets.find(datasetType) 

572 if storage is None: 

573 raise DatasetTypeError(f"DatasetType with name '{datasetType}' has not been registered.") 

574 if run is None: 

575 if self.defaults.run is None: 

576 raise NoDefaultCollectionError( 

577 "No run provided to insertDatasets, and no default from registry construction." 

578 ) 

579 run = self.defaults.run 

580 runRecord = self._managers.collections.find(run) 

581 if runRecord.type is not CollectionType.RUN: 

582 raise CollectionTypeError( 

583 f"Given collection is of type {runRecord.type.name}; RUN collection required." 

584 ) 

585 assert isinstance(runRecord, RunRecord) 

586 progress = Progress("daf.butler.Registry.insertDatasets", level=logging.DEBUG) 

587 if expand: 

588 expandedDataIds = [ 

589 self.expandDataId(dataId, graph=storage.datasetType.dimensions) 

590 for dataId in progress.wrap(dataIds, f"Expanding {storage.datasetType.name} data IDs") 

591 ] 

592 else: 

593 expandedDataIds = [ 

594 DataCoordinate.standardize(dataId, graph=storage.datasetType.dimensions) for dataId in dataIds 

595 ] 

596 try: 

597 refs = list(storage.insert(runRecord, expandedDataIds, idGenerationMode)) 

598 if self._managers.obscore: 

599 context = queries.SqlQueryContext(self._db, self._managers.column_types) 

600 self._managers.obscore.add_datasets(refs, context) 

601 except sqlalchemy.exc.IntegrityError as err: 

602 raise ConflictingDefinitionError( 

603 "A database constraint failure was triggered by inserting " 

604 f"one or more datasets of type {storage.datasetType} into " 

605 f"collection '{run}'. " 

606 "This probably means a dataset with the same data ID " 

607 "and dataset type already exists, but it may also mean a " 

608 "dimension row is missing." 

609 ) from err 

610 return refs 

611 

612 @transactional 

613 def _importDatasets( 

614 self, 

615 datasets: Iterable[DatasetRef], 

616 expand: bool = True, 

617 ) -> List[DatasetRef]: 

618 # Docstring inherited from lsst.daf.butler.registry.Registry 

619 datasets = list(datasets) 

620 if not datasets: 

621 # nothing to do 

622 return [] 

623 

624 # find dataset type 

625 datasetTypes = set(dataset.datasetType for dataset in datasets) 

626 if len(datasetTypes) != 1: 

627 raise DatasetTypeError(f"Multiple dataset types in input datasets: {datasetTypes}") 

628 datasetType = datasetTypes.pop() 

629 

630 # get storage handler for this dataset type 

631 storage = self._managers.datasets.find(datasetType.name) 

632 if storage is None: 

633 raise DatasetTypeError(f"DatasetType '{datasetType}' has not been registered.") 

634 

635 # find run name 

636 runs = set(dataset.run for dataset in datasets) 

637 if len(runs) != 1: 

638 raise ValueError(f"Multiple run names in input datasets: {runs}") 

639 run = runs.pop() 

640 

641 runRecord = self._managers.collections.find(run) 

642 if runRecord.type is not CollectionType.RUN: 

643 raise CollectionTypeError( 

644 f"Given collection '{runRecord.name}' is of type {runRecord.type.name};" 

645 " RUN collection required." 

646 ) 

647 assert isinstance(runRecord, RunRecord) 

648 

649 progress = Progress("daf.butler.Registry.insertDatasets", level=logging.DEBUG) 

650 if expand: 

651 expandedDatasets = [ 

652 dataset.expanded(self.expandDataId(dataset.dataId, graph=storage.datasetType.dimensions)) 

653 for dataset in progress.wrap(datasets, f"Expanding {storage.datasetType.name} data IDs") 

654 ] 

655 else: 

656 expandedDatasets = [ 

657 DatasetRef(datasetType, dataset.dataId, id=dataset.id, run=dataset.run, conform=True) 

658 for dataset in datasets 

659 ] 

660 

661 try: 

662 refs = list(storage.import_(runRecord, expandedDatasets)) 

663 if self._managers.obscore: 

664 context = queries.SqlQueryContext(self._db, self._managers.column_types) 

665 self._managers.obscore.add_datasets(refs, context) 

666 except sqlalchemy.exc.IntegrityError as err: 

667 raise ConflictingDefinitionError( 

668 "A database constraint failure was triggered by inserting " 

669 f"one or more datasets of type {storage.datasetType} into " 

670 f"collection '{run}'. " 

671 "This probably means a dataset with the same data ID " 

672 "and dataset type already exists, but it may also mean a " 

673 "dimension row is missing." 

674 ) from err 

675 # Check that imported dataset IDs match the input 

676 for imported_ref, input_ref in zip(refs, datasets): 

677 if imported_ref.id != input_ref.id: 

678 raise RegistryConsistencyError( 

679 "Imported dataset ID differs from input dataset ID, " 

680 f"input ref: {input_ref}, imported ref: {imported_ref}" 

681 ) 

682 return refs 

683 

684 def getDataset(self, id: DatasetId) -> Optional[DatasetRef]: 

685 # Docstring inherited from lsst.daf.butler.registry.Registry 

686 return self._managers.datasets.getDatasetRef(id) 

687 

688 @transactional 

689 def removeDatasets(self, refs: Iterable[DatasetRef]) -> None: 

690 # Docstring inherited from lsst.daf.butler.registry.Registry 

691 progress = Progress("lsst.daf.butler.Registry.removeDatasets", level=logging.DEBUG) 

692 for datasetType, refsForType in progress.iter_item_chunks( 

693 DatasetRef.groupByType(refs).items(), desc="Removing datasets by type" 

694 ): 

695 storage = self._managers.datasets[datasetType.name] 

696 try: 

697 storage.delete(refsForType) 

698 except sqlalchemy.exc.IntegrityError as err: 

699 raise OrphanedRecordError( 

700 "One or more datasets is still present in one or more Datastores." 

701 ) from err 

702 

703 @transactional 

704 def associate(self, collection: str, refs: Iterable[DatasetRef]) -> None: 

705 # Docstring inherited from lsst.daf.butler.registry.Registry 

706 progress = Progress("lsst.daf.butler.Registry.associate", level=logging.DEBUG) 

707 collectionRecord = self._managers.collections.find(collection) 

708 if collectionRecord.type is not CollectionType.TAGGED: 

709 raise CollectionTypeError( 

710 f"Collection '{collection}' has type {collectionRecord.type.name}, not TAGGED." 

711 ) 

712 for datasetType, refsForType in progress.iter_item_chunks( 

713 DatasetRef.groupByType(refs).items(), desc="Associating datasets by type" 

714 ): 

715 storage = self._managers.datasets[datasetType.name] 

716 try: 

717 storage.associate(collectionRecord, refsForType) 

718 if self._managers.obscore: 

719 # If a TAGGED collection is being monitored by ObsCore 

720 # manager then we may need to save the dataset. 

721 context = queries.SqlQueryContext(self._db, self._managers.column_types) 

722 self._managers.obscore.associate(refsForType, collectionRecord, context) 

723 except sqlalchemy.exc.IntegrityError as err: 

724 raise ConflictingDefinitionError( 

725 f"Constraint violation while associating dataset of type {datasetType.name} with " 

726 f"collection {collection}. This probably means that one or more datasets with the same " 

727 "dataset type and data ID already exist in the collection, but it may also indicate " 

728 "that the datasets do not exist." 

729 ) from err 

730 

731 @transactional 

732 def disassociate(self, collection: str, refs: Iterable[DatasetRef]) -> None: 

733 # Docstring inherited from lsst.daf.butler.registry.Registry 

734 progress = Progress("lsst.daf.butler.Registry.disassociate", level=logging.DEBUG) 

735 collectionRecord = self._managers.collections.find(collection) 

736 if collectionRecord.type is not CollectionType.TAGGED: 

737 raise CollectionTypeError( 

738 f"Collection '{collection}' has type {collectionRecord.type.name}; expected TAGGED." 

739 ) 

740 for datasetType, refsForType in progress.iter_item_chunks( 

741 DatasetRef.groupByType(refs).items(), desc="Disassociating datasets by type" 

742 ): 

743 storage = self._managers.datasets[datasetType.name] 

744 storage.disassociate(collectionRecord, refsForType) 

745 if self._managers.obscore: 

746 self._managers.obscore.disassociate(refsForType, collectionRecord) 

747 

748 @transactional 

749 def certify(self, collection: str, refs: Iterable[DatasetRef], timespan: Timespan) -> None: 

750 # Docstring inherited from lsst.daf.butler.registry.Registry 

751 progress = Progress("lsst.daf.butler.Registry.certify", level=logging.DEBUG) 

752 collectionRecord = self._managers.collections.find(collection) 

753 for datasetType, refsForType in progress.iter_item_chunks( 

754 DatasetRef.groupByType(refs).items(), desc="Certifying datasets by type" 

755 ): 

756 storage = self._managers.datasets[datasetType.name] 

757 storage.certify( 

758 collectionRecord, 

759 refsForType, 

760 timespan, 

761 context=queries.SqlQueryContext(self._db, self._managers.column_types), 

762 ) 

763 

764 @transactional 

765 def decertify( 

766 self, 

767 collection: str, 

768 datasetType: Union[str, DatasetType], 

769 timespan: Timespan, 

770 *, 

771 dataIds: Optional[Iterable[DataId]] = None, 

772 ) -> None: 

773 # Docstring inherited from lsst.daf.butler.registry.Registry 

774 collectionRecord = self._managers.collections.find(collection) 

775 if isinstance(datasetType, str): 

776 storage = self._managers.datasets[datasetType] 

777 else: 

778 storage = self._managers.datasets[datasetType.name] 

779 standardizedDataIds = None 

780 if dataIds is not None: 

781 standardizedDataIds = [ 

782 DataCoordinate.standardize(d, graph=storage.datasetType.dimensions) for d in dataIds 

783 ] 

784 storage.decertify( 

785 collectionRecord, 

786 timespan, 

787 dataIds=standardizedDataIds, 

788 context=queries.SqlQueryContext(self._db, self._managers.column_types), 

789 ) 

790 

791 def getDatastoreBridgeManager(self) -> DatastoreRegistryBridgeManager: 

792 """Return an object that allows a new `Datastore` instance to 

793 communicate with this `Registry`. 

794 

795 Returns 

796 ------- 

797 manager : `DatastoreRegistryBridgeManager` 

798 Object that mediates communication between this `Registry` and its 

799 associated datastores. 

800 """ 

801 return self._managers.datastores 

802 

803 def getDatasetLocations(self, ref: DatasetRef) -> Iterable[str]: 

804 # Docstring inherited from lsst.daf.butler.registry.Registry 

805 return self._managers.datastores.findDatastores(ref) 

806 

807 def expandDataId( 

808 self, 

809 dataId: Optional[DataId] = None, 

810 *, 

811 graph: Optional[DimensionGraph] = None, 

812 records: Optional[NameLookupMapping[DimensionElement, Optional[DimensionRecord]]] = None, 

813 withDefaults: bool = True, 

814 **kwargs: Any, 

815 ) -> DataCoordinate: 

816 # Docstring inherited from lsst.daf.butler.registry.Registry 

817 if not withDefaults: 

818 defaults = None 

819 else: 

820 defaults = self.defaults.dataId 

821 try: 

822 standardized = DataCoordinate.standardize( 

823 dataId, graph=graph, universe=self.dimensions, defaults=defaults, **kwargs 

824 ) 

825 except KeyError as exc: 

826 # This means either kwargs have some odd name or required 

827 # dimension is missing. 

828 raise DimensionNameError(str(exc)) from exc 

829 if standardized.hasRecords(): 

830 return standardized 

831 if records is None: 

832 records = {} 

833 elif isinstance(records, NamedKeyMapping): 

834 records = records.byName() 

835 else: 

836 records = dict(records) 

837 if isinstance(dataId, DataCoordinate) and dataId.hasRecords(): 

838 records.update(dataId.records.byName()) 

839 keys = standardized.byName() 

840 context = queries.SqlQueryContext(self._db, self._managers.column_types) 

841 for element in standardized.graph.primaryKeyTraversalOrder: 

842 record = records.get(element.name, ...) # Use ... to mean not found; None might mean NULL 

843 if record is ...: 

844 if isinstance(element, Dimension) and keys.get(element.name) is None: 

845 if element in standardized.graph.required: 

846 raise DimensionNameError( 

847 f"No value or null value for required dimension {element.name}." 

848 ) 

849 keys[element.name] = None 

850 record = None 

851 else: 

852 storage = self._managers.dimensions[element] 

853 record = storage.fetch_one(DataCoordinate.standardize(keys, graph=element.graph), context) 

854 records[element.name] = record 

855 if record is not None: 

856 for d in element.implied: 

857 value = getattr(record, d.name) 

858 if keys.setdefault(d.name, value) != value: 

859 raise InconsistentDataIdError( 

860 f"Data ID {standardized} has {d.name}={keys[d.name]!r}, " 

861 f"but {element.name} implies {d.name}={value!r}." 

862 ) 

863 else: 

864 if element in standardized.graph.required: 

865 raise DataIdValueError( 

866 f"Could not fetch record for required dimension {element.name} via keys {keys}." 

867 ) 

868 if element.alwaysJoin: 

869 raise InconsistentDataIdError( 

870 f"Could not fetch record for element {element.name} via keys {keys}, ", 

871 "but it is marked alwaysJoin=True; this means one or more dimensions are not " 

872 "related.", 

873 ) 

874 for d in element.implied: 

875 keys.setdefault(d.name, None) 

876 records.setdefault(d.name, None) 

877 return DataCoordinate.standardize(keys, graph=standardized.graph).expanded(records=records) 

878 

879 def insertDimensionData( 

880 self, 

881 element: Union[DimensionElement, str], 

882 *data: Union[Mapping[str, Any], DimensionRecord], 

883 conform: bool = True, 

884 replace: bool = False, 

885 skip_existing: bool = False, 

886 ) -> None: 

887 # Docstring inherited from lsst.daf.butler.registry.Registry 

888 if conform: 

889 if isinstance(element, str): 

890 element = self.dimensions[element] 

891 records = [ 

892 row if isinstance(row, DimensionRecord) else element.RecordClass(**row) for row in data 

893 ] 

894 else: 

895 # Ignore typing since caller said to trust them with conform=False. 

896 records = data # type: ignore 

897 storage = self._managers.dimensions[element] 

898 storage.insert(*records, replace=replace, skip_existing=skip_existing) 

899 

900 def syncDimensionData( 

901 self, 

902 element: Union[DimensionElement, str], 

903 row: Union[Mapping[str, Any], DimensionRecord], 

904 conform: bool = True, 

905 update: bool = False, 

906 ) -> Union[bool, Dict[str, Any]]: 

907 # Docstring inherited from lsst.daf.butler.registry.Registry 

908 if conform: 

909 if isinstance(element, str): 

910 element = self.dimensions[element] 

911 record = row if isinstance(row, DimensionRecord) else element.RecordClass(**row) 

912 else: 

913 # Ignore typing since caller said to trust them with conform=False. 

914 record = row # type: ignore 

915 storage = self._managers.dimensions[element] 

916 return storage.sync(record, update=update) 

917 

918 def queryDatasetTypes( 

919 self, 

920 expression: Any = ..., 

921 *, 

922 components: Optional[bool] = None, 

923 missing: Optional[List[str]] = None, 

924 ) -> Iterable[DatasetType]: 

925 # Docstring inherited from lsst.daf.butler.registry.Registry 

926 wildcard = DatasetTypeWildcard.from_expression(expression) 

927 composition_dict = self._managers.datasets.resolve_wildcard( 

928 wildcard, 

929 components=components, 

930 missing=missing, 

931 ) 

932 result: list[DatasetType] = [] 

933 for parent_dataset_type, components_for_parent in composition_dict.items(): 

934 result.extend( 

935 parent_dataset_type.makeComponentDatasetType(c) if c is not None else parent_dataset_type 

936 for c in components_for_parent 

937 ) 

938 return result 

939 

940 def queryCollections( 

941 self, 

942 expression: Any = ..., 

943 datasetType: Optional[DatasetType] = None, 

944 collectionTypes: Union[Iterable[CollectionType], CollectionType] = CollectionType.all(), 

945 flattenChains: bool = False, 

946 includeChains: Optional[bool] = None, 

947 ) -> Sequence[str]: 

948 # Docstring inherited from lsst.daf.butler.registry.Registry 

949 

950 # Right now the datasetTypes argument is completely ignored, but that 

951 # is consistent with its [lack of] guarantees. DM-24939 or a follow-up 

952 # ticket will take care of that. 

953 try: 

954 wildcard = CollectionWildcard.from_expression(expression) 

955 except TypeError as exc: 

956 raise CollectionExpressionError(f"Invalid collection expression '{expression}'") from exc 

957 collectionTypes = ensure_iterable(collectionTypes) 

958 return [ 

959 record.name 

960 for record in self._managers.collections.resolve_wildcard( 

961 wildcard, 

962 collection_types=frozenset(collectionTypes), 

963 flatten_chains=flattenChains, 

964 include_chains=includeChains, 

965 ) 

966 ] 

967 

968 def _makeQueryBuilder( 

969 self, 

970 summary: queries.QuerySummary, 

971 doomed_by: Iterable[str] = (), 

972 ) -> queries.QueryBuilder: 

973 """Return a `QueryBuilder` instance capable of constructing and 

974 managing more complex queries than those obtainable via `Registry` 

975 interfaces. 

976 

977 This is an advanced interface; downstream code should prefer 

978 `Registry.queryDataIds` and `Registry.queryDatasets` whenever those 

979 are sufficient. 

980 

981 Parameters 

982 ---------- 

983 summary : `queries.QuerySummary` 

984 Object describing and categorizing the full set of dimensions that 

985 will be included in the query. 

986 doomed_by : `Iterable` of `str`, optional 

987 A list of diagnostic messages that indicate why the query is going 

988 to yield no results and should not even be executed. If an empty 

989 container (default) the query will be executed unless other code 

990 determines that it is doomed. 

991 

992 Returns 

993 ------- 

994 builder : `queries.QueryBuilder` 

995 Object that can be used to construct and perform advanced queries. 

996 """ 

997 doomed_by = list(doomed_by) 

998 backend = queries.SqlQueryBackend(self._db, self._managers) 

999 context = backend.context() 

1000 relation: Relation | None = None 

1001 if doomed_by: 

1002 relation = LeafRelation.make_doomed(context.sql_engine, set(), doomed_by) 

1003 return queries.QueryBuilder( 

1004 summary, 

1005 backend=backend, 

1006 context=context, 

1007 relation=relation, 

1008 ) 

1009 

1010 def _standardize_query_data_id_args( 

1011 self, data_id: DataId | None, *, doomed_by: list[str], **kwargs: Any 

1012 ) -> DataCoordinate: 

1013 """Preprocess the data ID arguments passed to query* methods. 

1014 

1015 Parameters 

1016 ---------- 

1017 data_id : `DataId` or `None` 

1018 Data ID that constrains the query results. 

1019 doomed_by : `list` [ `str` ] 

1020 List to append messages indicating why the query is doomed to 

1021 yield no results. 

1022 **kwargs 

1023 Additional data ID key-value pairs, extending and overriding 

1024 ``data_id``. 

1025 

1026 Returns 

1027 ------- 

1028 data_id : `DataCoordinate` 

1029 Standardized data ID. Will be fully expanded unless expansion 

1030 fails, in which case a message will be appended to ``doomed_by`` 

1031 on return. 

1032 """ 

1033 try: 

1034 return self.expandDataId(data_id, **kwargs) 

1035 except DataIdValueError as err: 

1036 doomed_by.append(str(err)) 

1037 return DataCoordinate.standardize( 

1038 data_id, **kwargs, universe=self.dimensions, defaults=self.defaults.dataId 

1039 ) 

1040 

1041 def _standardize_query_dataset_args( 

1042 self, 

1043 datasets: Any, 

1044 collections: CollectionArgType | None, 

1045 components: bool | None, 

1046 mode: Literal["find_first"] | Literal["find_all"] | Literal["constrain"] = "constrain", 

1047 *, 

1048 doomed_by: list[str], 

1049 ) -> tuple[dict[DatasetType, list[str | None]], CollectionWildcard | None]: 

1050 """Preprocess dataset arguments passed to query* methods. 

1051 

1052 Parameters 

1053 ---------- 

1054 datasets : `DatasetType`, `str`, `re.Pattern`, or iterable of these 

1055 Expression identifying dataset types. See `queryDatasetTypes` for 

1056 details. 

1057 collections : `str`, `re.Pattern`, or iterable of these 

1058 Expression identifying collections to be searched. See 

1059 `queryCollections` for details. 

1060 components : `bool`, optional 

1061 If `True`, apply all expression patterns to component dataset type 

1062 names as well. If `False`, never apply patterns to components. 

1063 If `None` (default), apply patterns to components only if their 

1064 parent datasets were not matched by the expression. 

1065 Fully-specified component datasets (`str` or `DatasetType` 

1066 instances) are always included. 

1067 

1068 Values other than `False` are deprecated, and only `False` will be 

1069 supported after v26. After v27 this argument will be removed 

1070 entirely. 

1071 mode : `str`, optional 

1072 The way in which datasets are being used in this query; one of: 

1073 

1074 - "find_first": this is a query for the first dataset in an 

1075 ordered list of collections. Prohibits collection wildcards, 

1076 but permits dataset type wildcards. 

1077 

1078 - "find_all": this is a query for all datasets in all matched 

1079 collections. Permits collection and dataset type wildcards. 

1080 

1081 - "constrain": this is a query for something other than datasets, 

1082 with results constrained by dataset existence. Permits 

1083 collection wildcards and prohibits ``...`` as a dataset type 

1084 wildcard. 

1085 doomed_by : `list` [ `str` ] 

1086 List to append messages indicating why the query is doomed to 

1087 yield no results. 

1088 

1089 Returns 

1090 ------- 

1091 composition : `defaultdict` [ `DatasetType`, `list` [ `str` ] ] 

1092 Dictionary mapping parent dataset type to `list` of components 

1093 matched for that dataset type (or `None` for the parent itself). 

1094 collections : `CollectionWildcard` 

1095 Processed collection expression. 

1096 """ 

1097 composition: dict[DatasetType, list[str | None]] = {} 

1098 collection_wildcard: CollectionWildcard | None = None 

1099 if datasets is not None: 

1100 if collections is None: 

1101 if not self.defaults.collections: 

1102 raise NoDefaultCollectionError("No collections, and no registry default collections.") 

1103 collection_wildcard = CollectionWildcard.from_expression(self.defaults.collections) 

1104 else: 

1105 collection_wildcard = CollectionWildcard.from_expression(collections) 

1106 if mode == "find_first" and collection_wildcard.patterns: 

1107 raise TypeError( 

1108 f"Collection pattern(s) {collection_wildcard.patterns} not allowed in this context." 

1109 ) 

1110 missing: list[str] = [] 

1111 composition = self._managers.datasets.resolve_wildcard( 

1112 datasets, components=components, missing=missing, explicit_only=(mode == "constrain") 

1113 ) 

1114 if missing and mode == "constrain": 

1115 # After v26 this should raise MissingDatasetTypeError, to be 

1116 # implemented on DM-36303. 

1117 warnings.warn( 

1118 f"Dataset type(s) {missing} are not registered; this will be an error after v26.", 

1119 FutureWarning, 

1120 ) 

1121 doomed_by.extend(f"Dataset type {name} is not registered." for name in missing) 

1122 elif collections: 

1123 # I think this check should actually be `collections is not None`, 

1124 # but it looks like some CLI scripts use empty tuple as default. 

1125 raise ArgumentError(f"Cannot pass 'collections' (='{collections}') without 'datasets'.") 

1126 return composition, collection_wildcard 

1127 

1128 def queryDatasets( 

1129 self, 

1130 datasetType: Any, 

1131 *, 

1132 collections: CollectionArgType | None = None, 

1133 dimensions: Optional[Iterable[Union[Dimension, str]]] = None, 

1134 dataId: Optional[DataId] = None, 

1135 where: str = "", 

1136 findFirst: bool = False, 

1137 components: Optional[bool] = None, 

1138 bind: Optional[Mapping[str, Any]] = None, 

1139 check: bool = True, 

1140 **kwargs: Any, 

1141 ) -> queries.DatasetQueryResults: 

1142 # Docstring inherited from lsst.daf.butler.registry.Registry 

1143 doomed_by: list[str] = [] 

1144 data_id = self._standardize_query_data_id_args(dataId, doomed_by=doomed_by, **kwargs) 

1145 dataset_composition, collection_wildcard = self._standardize_query_dataset_args( 

1146 datasetType, 

1147 collections, 

1148 components, 

1149 mode="find_first" if findFirst else "find_all", 

1150 doomed_by=doomed_by, 

1151 ) 

1152 if collection_wildcard is not None and collection_wildcard.empty(): 

1153 doomed_by.append("No datasets can be found because collection list is empty.") 

1154 return queries.ChainedDatasetQueryResults([], doomed_by=doomed_by) 

1155 parent_results: list[queries.ParentDatasetQueryResults] = [] 

1156 for parent_dataset_type, components_for_parent in dataset_composition.items(): 

1157 # The full set of dimensions in the query is the combination of 

1158 # those needed for the DatasetType and those explicitly requested, 

1159 # if any. 

1160 dimension_names = set(parent_dataset_type.dimensions.names) 

1161 if dimensions is not None: 

1162 dimension_names.update(self.dimensions.extract(dimensions).names) 

1163 # Construct the summary structure needed to construct a 

1164 # QueryBuilder. 

1165 summary = queries.QuerySummary( 

1166 requested=DimensionGraph(self.dimensions, names=dimension_names), 

1167 column_types=self._managers.column_types, 

1168 data_id=data_id, 

1169 expression=where, 

1170 bind=bind, 

1171 defaults=self.defaults.dataId, 

1172 check=check, 

1173 datasets=[parent_dataset_type], 

1174 ) 

1175 builder = self._makeQueryBuilder(summary) 

1176 # Add the dataset subquery to the query, telling the QueryBuilder 

1177 # to include the rank of the selected collection in the results 

1178 # only if we need to findFirst. Note that if any of the 

1179 # collections are actually wildcard expressions, and 

1180 # findFirst=True, this will raise TypeError for us. 

1181 builder.joinDataset(parent_dataset_type, collection_wildcard, isResult=True, findFirst=findFirst) 

1182 query = builder.finish() 

1183 parent_results.append( 

1184 queries.ParentDatasetQueryResults( 

1185 query, parent_dataset_type, components=components_for_parent 

1186 ) 

1187 ) 

1188 if not parent_results: 

1189 doomed_by.extend( 

1190 f"No registered dataset type matching {t!r} found, so no matching datasets can " 

1191 "exist in any collection." 

1192 for t in ensure_iterable(datasetType) 

1193 ) 

1194 return queries.ChainedDatasetQueryResults([], doomed_by=doomed_by) 

1195 elif len(parent_results) == 1: 

1196 return parent_results[0] 

1197 else: 

1198 return queries.ChainedDatasetQueryResults(parent_results) 

1199 

1200 def queryDataIds( 

1201 self, 

1202 dimensions: Union[Iterable[Union[Dimension, str]], Dimension, str], 

1203 *, 

1204 dataId: Optional[DataId] = None, 

1205 datasets: Any = None, 

1206 collections: CollectionArgType | None = None, 

1207 where: str = "", 

1208 components: Optional[bool] = None, 

1209 bind: Optional[Mapping[str, Any]] = None, 

1210 check: bool = True, 

1211 **kwargs: Any, 

1212 ) -> queries.DataCoordinateQueryResults: 

1213 # Docstring inherited from lsst.daf.butler.registry.Registry 

1214 dimensions = ensure_iterable(dimensions) 

1215 requestedDimensions = self.dimensions.extract(dimensions) 

1216 doomed_by: list[str] = [] 

1217 data_id = self._standardize_query_data_id_args(dataId, doomed_by=doomed_by, **kwargs) 

1218 dataset_composition, collection_wildcard = self._standardize_query_dataset_args( 

1219 datasets, collections, components, doomed_by=doomed_by 

1220 ) 

1221 if collection_wildcard is not None and collection_wildcard.empty(): 

1222 doomed_by.append("No data coordinates can be found because collection list is empty.") 

1223 summary = queries.QuerySummary( 

1224 requested=requestedDimensions, 

1225 column_types=self._managers.column_types, 

1226 data_id=data_id, 

1227 expression=where, 

1228 bind=bind, 

1229 defaults=self.defaults.dataId, 

1230 check=check, 

1231 datasets=dataset_composition.keys(), 

1232 ) 

1233 builder = self._makeQueryBuilder(summary, doomed_by=doomed_by) 

1234 for datasetType in dataset_composition.keys(): 

1235 builder.joinDataset(datasetType, collection_wildcard, isResult=False) 

1236 query = builder.finish() 

1237 

1238 return queries.DataCoordinateQueryResults(query) 

1239 

1240 def queryDimensionRecords( 

1241 self, 

1242 element: Union[DimensionElement, str], 

1243 *, 

1244 dataId: Optional[DataId] = None, 

1245 datasets: Any = None, 

1246 collections: CollectionArgType | None = None, 

1247 where: str = "", 

1248 components: Optional[bool] = None, 

1249 bind: Optional[Mapping[str, Any]] = None, 

1250 check: bool = True, 

1251 **kwargs: Any, 

1252 ) -> queries.DimensionRecordQueryResults: 

1253 # Docstring inherited from lsst.daf.butler.registry.Registry 

1254 if not isinstance(element, DimensionElement): 

1255 try: 

1256 element = self.dimensions[element] 

1257 except KeyError as e: 

1258 raise DimensionNameError( 

1259 f"No such dimension '{element}', available dimensions: " 

1260 + str(self.dimensions.getStaticElements()) 

1261 ) from e 

1262 doomed_by: list[str] = [] 

1263 data_id = self._standardize_query_data_id_args(dataId, doomed_by=doomed_by, **kwargs) 

1264 dataset_composition, collection_wildcard = self._standardize_query_dataset_args( 

1265 datasets, collections, components, doomed_by=doomed_by 

1266 ) 

1267 if collection_wildcard is not None and collection_wildcard.empty(): 

1268 doomed_by.append("No dimension records can be found because collection list is empty.") 

1269 summary = queries.QuerySummary( 

1270 requested=element.graph, 

1271 column_types=self._managers.column_types, 

1272 data_id=data_id, 

1273 expression=where, 

1274 bind=bind, 

1275 defaults=self.defaults.dataId, 

1276 check=check, 

1277 datasets=dataset_composition.keys(), 

1278 ) 

1279 builder = self._makeQueryBuilder(summary, doomed_by=doomed_by) 

1280 for datasetType in dataset_composition.keys(): 

1281 builder.joinDataset(datasetType, collection_wildcard, isResult=False) 

1282 query = builder.finish().with_record_columns(element) 

1283 return queries.DatabaseDimensionRecordQueryResults(query, element) 

1284 

1285 def queryDatasetAssociations( 

1286 self, 

1287 datasetType: Union[str, DatasetType], 

1288 collections: CollectionArgType | None = Ellipsis, 

1289 *, 

1290 collectionTypes: Iterable[CollectionType] = CollectionType.all(), 

1291 flattenChains: bool = False, 

1292 ) -> Iterator[DatasetAssociation]: 

1293 # Docstring inherited from lsst.daf.butler.registry.Registry 

1294 if collections is None: 

1295 if not self.defaults.collections: 

1296 raise NoDefaultCollectionError( 

1297 "No collections provided to queryDatasetAssociations, " 

1298 "and no defaults from registry construction." 

1299 ) 

1300 collections = self.defaults.collections 

1301 collection_wildcard = CollectionWildcard.from_expression(collections) 

1302 backend = queries.SqlQueryBackend(self._db, self._managers) 

1303 parent_dataset_type, _ = backend.resolve_single_dataset_type_wildcard(datasetType, components=False) 

1304 timespan_tag = DatasetColumnTag(parent_dataset_type.name, "timespan") 

1305 collection_tag = DatasetColumnTag(parent_dataset_type.name, "collection") 

1306 for parent_collection_record in backend.resolve_collection_wildcard( 

1307 collection_wildcard, 

1308 collection_types=frozenset(collectionTypes), 

1309 flatten_chains=flattenChains, 

1310 ): 

1311 # Resolve this possibly-chained collection into a list of 

1312 # non-CHAINED collections that actually hold datasets of this 

1313 # type. 

1314 candidate_collection_records = backend.resolve_dataset_collections( 

1315 parent_dataset_type, 

1316 CollectionWildcard.from_names([parent_collection_record.name]), 

1317 allow_calibration_collections=True, 

1318 governor_constraints={}, 

1319 ) 

1320 if not candidate_collection_records: 

1321 continue 

1322 with backend.context() as context: 

1323 relation = backend.make_dataset_query_relation( 

1324 parent_dataset_type, 

1325 candidate_collection_records, 

1326 columns={"dataset_id", "run", "timespan", "collection"}, 

1327 context=context, 

1328 ) 

1329 reader = queries.DatasetRefReader( 

1330 parent_dataset_type, 

1331 translate_collection=lambda k: self._managers.collections[k].name, 

1332 full=False, 

1333 ) 

1334 for row in context.fetch_iterable(relation): 

1335 ref = reader.read(row) 

1336 collection_record = self._managers.collections[row[collection_tag]] 

1337 if collection_record.type is CollectionType.CALIBRATION: 

1338 timespan = row[timespan_tag] 

1339 else: 

1340 # For backwards compatibility and (possibly?) user 

1341 # convenience we continue to define the timespan of a 

1342 # DatasetAssociation row for a non-CALIBRATION 

1343 # collection to be None rather than a fully unbounded 

1344 # timespan. 

1345 timespan = None 

1346 yield DatasetAssociation(ref=ref, collection=collection_record.name, timespan=timespan) 

1347 

1348 @property 

1349 def obsCoreTableManager(self) -> ObsCoreTableManager | None: 

1350 # Docstring inherited from lsst.daf.butler.registry.Registry 

1351 return self._managers.obscore 

1352 

1353 storageClasses: StorageClassFactory 

1354 """All storage classes known to the registry (`StorageClassFactory`). 

1355 """