Coverage for python/lsst/daf/butler/registries/sql.py: 16%

515 statements  

« prev     ^ index     » next       coverage.py v7.3.1, created at 2023-10-02 08:00 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28from __future__ import annotations 

29 

30__all__ = ("SqlRegistry",) 

31 

32import contextlib 

33import logging 

34import warnings 

35from collections.abc import Iterable, Iterator, Mapping, Sequence 

36from typing import TYPE_CHECKING, Any, Literal, cast 

37 

38import sqlalchemy 

39from lsst.daf.relation import LeafRelation, Relation 

40from lsst.resources import ResourcePathExpression 

41from lsst.utils.introspection import find_outside_stacklevel 

42from lsst.utils.iteration import ensure_iterable 

43 

44from ..core import ( 

45 Config, 

46 DataCoordinate, 

47 DataId, 

48 DatasetAssociation, 

49 DatasetColumnTag, 

50 DatasetId, 

51 DatasetIdGenEnum, 

52 DatasetRef, 

53 DatasetType, 

54 Dimension, 

55 DimensionConfig, 

56 DimensionElement, 

57 DimensionGraph, 

58 DimensionRecord, 

59 DimensionUniverse, 

60 NamedKeyMapping, 

61 NameLookupMapping, 

62 Progress, 

63 StorageClassFactory, 

64 Timespan, 

65 ddl, 

66) 

67from ..core.utils import transactional 

68from ..registry import ( 

69 ArgumentError, 

70 CollectionExpressionError, 

71 CollectionSummary, 

72 CollectionType, 

73 CollectionTypeError, 

74 ConflictingDefinitionError, 

75 DataIdValueError, 

76 DatasetTypeError, 

77 DimensionNameError, 

78 InconsistentDataIdError, 

79 NoDefaultCollectionError, 

80 OrphanedRecordError, 

81 RegistryConfig, 

82 RegistryConsistencyError, 

83 RegistryDefaults, 

84 _ButlerRegistry, 

85 queries, 

86) 

87from ..registry.interfaces import ChainedCollectionRecord, RunRecord 

88from ..registry.managers import RegistryManagerInstances, RegistryManagerTypes 

89from ..registry.wildcards import CollectionWildcard, DatasetTypeWildcard 

90 

91if TYPE_CHECKING: 

92 from .._butlerConfig import ButlerConfig 

93 from ..registry._registry import CollectionArgType 

94 from ..registry.interfaces import ( 

95 CollectionRecord, 

96 Database, 

97 DatastoreRegistryBridgeManager, 

98 ObsCoreTableManager, 

99 ) 

100 

101 

102_LOG = logging.getLogger(__name__) 

103 

104 

105class SqlRegistry(_ButlerRegistry): 

106 """Registry implementation based on SQLAlchemy. 

107 

108 Parameters 

109 ---------- 

110 database : `Database` 

111 Database instance to store Registry. 

112 defaults : `RegistryDefaults` 

113 Default collection search path and/or output `~CollectionType.RUN` 

114 collection. 

115 managers : `RegistryManagerInstances` 

116 All the managers required for this registry. 

117 """ 

118 

119 defaultConfigFile: str | None = None 

120 """Path to configuration defaults. Accessed within the ``configs`` resource 

121 or relative to a search path. Can be None if no defaults specified. 

122 """ 

123 

124 @classmethod 

125 def createFromConfig( 

126 cls, 

127 config: RegistryConfig | str | None = None, 

128 dimensionConfig: DimensionConfig | str | None = None, 

129 butlerRoot: ResourcePathExpression | None = None, 

130 ) -> _ButlerRegistry: 

131 """Create registry database and return `SqlRegistry` instance. 

132 

133 This method initializes database contents, database must be empty 

134 prior to calling this method. 

135 

136 Parameters 

137 ---------- 

138 config : `RegistryConfig` or `str`, optional 

139 Registry configuration, if missing then default configuration will 

140 be loaded from registry.yaml. 

141 dimensionConfig : `DimensionConfig` or `str`, optional 

142 Dimensions configuration, if missing then default configuration 

143 will be loaded from dimensions.yaml. 

144 butlerRoot : convertible to `lsst.resources.ResourcePath`, optional 

145 Path to the repository root this `SqlRegistry` will manage. 

146 

147 Returns 

148 ------- 

149 registry : `SqlRegistry` 

150 A new `SqlRegistry` instance. 

151 """ 

152 config = cls.forceRegistryConfig(config) 

153 config.replaceRoot(butlerRoot) 

154 

155 if isinstance(dimensionConfig, str): 

156 dimensionConfig = DimensionConfig(dimensionConfig) 

157 elif dimensionConfig is None: 

158 dimensionConfig = DimensionConfig() 

159 elif not isinstance(dimensionConfig, DimensionConfig): 

160 raise TypeError(f"Incompatible Dimension configuration type: {type(dimensionConfig)}") 

161 

162 DatabaseClass = config.getDatabaseClass() 

163 database = DatabaseClass.fromUri( 

164 config.connectionString, origin=config.get("origin", 0), namespace=config.get("namespace") 

165 ) 

166 managerTypes = RegistryManagerTypes.fromConfig(config) 

167 managers = managerTypes.makeRepo(database, dimensionConfig) 

168 return cls(database, RegistryDefaults(), managers) 

169 

170 @classmethod 

171 def fromConfig( 

172 cls, 

173 config: ButlerConfig | RegistryConfig | Config | str, 

174 butlerRoot: ResourcePathExpression | None = None, 

175 writeable: bool = True, 

176 defaults: RegistryDefaults | None = None, 

177 ) -> _ButlerRegistry: 

178 """Create `Registry` subclass instance from `config`. 

179 

180 Registry database must be initialized prior to calling this method. 

181 

182 Parameters 

183 ---------- 

184 config : `ButlerConfig`, `RegistryConfig`, `Config` or `str` 

185 Registry configuration 

186 butlerRoot : `lsst.resources.ResourcePathExpression`, optional 

187 Path to the repository root this `Registry` will manage. 

188 writeable : `bool`, optional 

189 If `True` (default) create a read-write connection to the database. 

190 defaults : `RegistryDefaults`, optional 

191 Default collection search path and/or output `~CollectionType.RUN` 

192 collection. 

193 

194 Returns 

195 ------- 

196 registry : `SqlRegistry` (subclass) 

197 A new `SqlRegistry` subclass instance. 

198 """ 

199 config = cls.forceRegistryConfig(config) 

200 config.replaceRoot(butlerRoot) 

201 DatabaseClass = config.getDatabaseClass() 

202 database = DatabaseClass.fromUri( 

203 config.connectionString, 

204 origin=config.get("origin", 0), 

205 namespace=config.get("namespace"), 

206 writeable=writeable, 

207 ) 

208 managerTypes = RegistryManagerTypes.fromConfig(config) 

209 with database.session(): 

210 managers = managerTypes.loadRepo(database) 

211 if defaults is None: 

212 defaults = RegistryDefaults() 

213 return cls(database, defaults, managers) 

214 

215 def __init__(self, database: Database, defaults: RegistryDefaults, managers: RegistryManagerInstances): 

216 self._db = database 

217 self._managers = managers 

218 self.storageClasses = StorageClassFactory() 

219 # Intentionally invoke property setter to initialize defaults. This 

220 # can only be done after most of the rest of Registry has already been 

221 # initialized, and must be done before the property getter is used. 

222 self.defaults = defaults 

223 

224 def __str__(self) -> str: 

225 return str(self._db) 

226 

227 def __repr__(self) -> str: 

228 return f"SqlRegistry({self._db!r}, {self.dimensions!r})" 

229 

230 def isWriteable(self) -> bool: 

231 # Docstring inherited from lsst.daf.butler.registry.Registry 

232 return self._db.isWriteable() 

233 

234 def copy(self, defaults: RegistryDefaults | None = None) -> _ButlerRegistry: 

235 # Docstring inherited from lsst.daf.butler.registry.Registry 

236 if defaults is None: 

237 # No need to copy, because `RegistryDefaults` is immutable; we 

238 # effectively copy on write. 

239 defaults = self.defaults 

240 return type(self)(self._db, defaults, self._managers) 

241 

242 @property 

243 def dimensions(self) -> DimensionUniverse: 

244 # Docstring inherited from lsst.daf.butler.registry.Registry 

245 return self._managers.dimensions.universe 

246 

247 def refresh(self) -> None: 

248 # Docstring inherited from lsst.daf.butler.registry.Registry 

249 with self._db.transaction(): 

250 self._managers.refresh() 

251 

252 @contextlib.contextmanager 

253 def transaction(self, *, savepoint: bool = False) -> Iterator[None]: 

254 # Docstring inherited from lsst.daf.butler.registry.Registry 

255 try: 

256 with self._db.transaction(savepoint=savepoint): 

257 yield 

258 except BaseException: 

259 # TODO: this clears the caches sometimes when we wouldn't actually 

260 # need to. Can we avoid that? 

261 self._managers.dimensions.clearCaches() 

262 raise 

263 

264 def resetConnectionPool(self) -> None: 

265 """Reset SQLAlchemy connection pool for `SqlRegistry` database. 

266 

267 This operation is useful when using registry with fork-based 

268 multiprocessing. To use registry across fork boundary one has to make 

269 sure that there are no currently active connections (no session or 

270 transaction is in progress) and connection pool is reset using this 

271 method. This method should be called by the child process immediately 

272 after the fork. 

273 """ 

274 self._db._engine.dispose() 

275 

276 def registerOpaqueTable(self, tableName: str, spec: ddl.TableSpec) -> None: 

277 """Add an opaque (to the `Registry`) table for use by a `Datastore` or 

278 other data repository client. 

279 

280 Opaque table records can be added via `insertOpaqueData`, retrieved via 

281 `fetchOpaqueData`, and removed via `deleteOpaqueData`. 

282 

283 Parameters 

284 ---------- 

285 tableName : `str` 

286 Logical name of the opaque table. This may differ from the 

287 actual name used in the database by a prefix and/or suffix. 

288 spec : `ddl.TableSpec` 

289 Specification for the table to be added. 

290 """ 

291 self._managers.opaque.register(tableName, spec) 

292 

293 @transactional 

294 def insertOpaqueData(self, tableName: str, *data: dict) -> None: 

295 """Insert records into an opaque table. 

296 

297 Parameters 

298 ---------- 

299 tableName : `str` 

300 Logical name of the opaque table. Must match the name used in a 

301 previous call to `registerOpaqueTable`. 

302 data 

303 Each additional positional argument is a dictionary that represents 

304 a single row to be added. 

305 """ 

306 self._managers.opaque[tableName].insert(*data) 

307 

308 def fetchOpaqueData(self, tableName: str, **where: Any) -> Iterator[Mapping[str, Any]]: 

309 """Retrieve records from an opaque table. 

310 

311 Parameters 

312 ---------- 

313 tableName : `str` 

314 Logical name of the opaque table. Must match the name used in a 

315 previous call to `registerOpaqueTable`. 

316 where 

317 Additional keyword arguments are interpreted as equality 

318 constraints that restrict the returned rows (combined with AND); 

319 keyword arguments are column names and values are the values they 

320 must have. 

321 

322 Yields 

323 ------ 

324 row : `dict` 

325 A dictionary representing a single result row. 

326 """ 

327 yield from self._managers.opaque[tableName].fetch(**where) 

328 

329 @transactional 

330 def deleteOpaqueData(self, tableName: str, **where: Any) -> None: 

331 """Remove records from an opaque table. 

332 

333 Parameters 

334 ---------- 

335 tableName : `str` 

336 Logical name of the opaque table. Must match the name used in a 

337 previous call to `registerOpaqueTable`. 

338 where 

339 Additional keyword arguments are interpreted as equality 

340 constraints that restrict the deleted rows (combined with AND); 

341 keyword arguments are column names and values are the values they 

342 must have. 

343 """ 

344 self._managers.opaque[tableName].delete(where.keys(), where) 

345 

346 def registerCollection( 

347 self, name: str, type: CollectionType = CollectionType.TAGGED, doc: str | None = None 

348 ) -> bool: 

349 # Docstring inherited from lsst.daf.butler.registry.Registry 

350 _, registered = self._managers.collections.register(name, type, doc=doc) 

351 return registered 

352 

353 def getCollectionType(self, name: str) -> CollectionType: 

354 # Docstring inherited from lsst.daf.butler.registry.Registry 

355 return self._managers.collections.find(name).type 

356 

357 def _get_collection_record(self, name: str) -> CollectionRecord: 

358 # Docstring inherited from lsst.daf.butler.registry.Registry 

359 return self._managers.collections.find(name) 

360 

361 def registerRun(self, name: str, doc: str | None = None) -> bool: 

362 # Docstring inherited from lsst.daf.butler.registry.Registry 

363 _, registered = self._managers.collections.register(name, CollectionType.RUN, doc=doc) 

364 return registered 

365 

366 @transactional 

367 def removeCollection(self, name: str) -> None: 

368 # Docstring inherited from lsst.daf.butler.registry.Registry 

369 self._managers.collections.remove(name) 

370 

371 def getCollectionChain(self, parent: str) -> tuple[str, ...]: 

372 # Docstring inherited from lsst.daf.butler.registry.Registry 

373 record = self._managers.collections.find(parent) 

374 if record.type is not CollectionType.CHAINED: 

375 raise CollectionTypeError(f"Collection '{parent}' has type {record.type.name}, not CHAINED.") 

376 assert isinstance(record, ChainedCollectionRecord) 

377 return record.children 

378 

379 @transactional 

380 def setCollectionChain(self, parent: str, children: Any, *, flatten: bool = False) -> None: 

381 # Docstring inherited from lsst.daf.butler.registry.Registry 

382 record = self._managers.collections.find(parent) 

383 if record.type is not CollectionType.CHAINED: 

384 raise CollectionTypeError(f"Collection '{parent}' has type {record.type.name}, not CHAINED.") 

385 assert isinstance(record, ChainedCollectionRecord) 

386 children = CollectionWildcard.from_expression(children).require_ordered() 

387 if children != record.children or flatten: 

388 record.update(self._managers.collections, children, flatten=flatten) 

389 

390 def getCollectionParentChains(self, collection: str) -> set[str]: 

391 # Docstring inherited from lsst.daf.butler.registry.Registry 

392 return { 

393 record.name 

394 for record in self._managers.collections.getParentChains( 

395 self._managers.collections.find(collection).key 

396 ) 

397 } 

398 

399 def getCollectionDocumentation(self, collection: str) -> str | None: 

400 # Docstring inherited from lsst.daf.butler.registry.Registry 

401 return self._managers.collections.getDocumentation(self._managers.collections.find(collection).key) 

402 

403 def setCollectionDocumentation(self, collection: str, doc: str | None) -> None: 

404 # Docstring inherited from lsst.daf.butler.registry.Registry 

405 self._managers.collections.setDocumentation(self._managers.collections.find(collection).key, doc) 

406 

407 def getCollectionSummary(self, collection: str) -> CollectionSummary: 

408 # Docstring inherited from lsst.daf.butler.registry.Registry 

409 record = self._managers.collections.find(collection) 

410 return self._managers.datasets.getCollectionSummary(record) 

411 

412 def registerDatasetType(self, datasetType: DatasetType) -> bool: 

413 # Docstring inherited from lsst.daf.butler.registry.Registry 

414 _, inserted = self._managers.datasets.register(datasetType) 

415 return inserted 

416 

417 def removeDatasetType(self, name: str | tuple[str, ...]) -> None: 

418 # Docstring inherited from lsst.daf.butler.registry.Registry 

419 

420 for datasetTypeExpression in ensure_iterable(name): 

421 # Catch any warnings from the caller specifying a component 

422 # dataset type. This will result in an error later but the 

423 # warning could be confusing when the caller is not querying 

424 # anything. 

425 with warnings.catch_warnings(): 

426 warnings.simplefilter("ignore", category=FutureWarning) 

427 datasetTypes = list(self.queryDatasetTypes(datasetTypeExpression)) 

428 if not datasetTypes: 

429 _LOG.info("Dataset type %r not defined", datasetTypeExpression) 

430 else: 

431 for datasetType in datasetTypes: 

432 self._managers.datasets.remove(datasetType.name) 

433 _LOG.info("Removed dataset type %r", datasetType.name) 

434 

435 def getDatasetType(self, name: str) -> DatasetType: 

436 # Docstring inherited from lsst.daf.butler.registry.Registry 

437 parent_name, component = DatasetType.splitDatasetTypeName(name) 

438 storage = self._managers.datasets[parent_name] 

439 if component is None: 

440 return storage.datasetType 

441 else: 

442 return storage.datasetType.makeComponentDatasetType(component) 

443 

444 def supportsIdGenerationMode(self, mode: DatasetIdGenEnum) -> bool: 

445 # Docstring inherited from lsst.daf.butler.registry.Registry 

446 return self._managers.datasets.supportsIdGenerationMode(mode) 

447 

448 def findDataset( 

449 self, 

450 datasetType: DatasetType | str, 

451 dataId: DataId | None = None, 

452 *, 

453 collections: CollectionArgType | None = None, 

454 timespan: Timespan | None = None, 

455 **kwargs: Any, 

456 ) -> DatasetRef | None: 

457 # Docstring inherited from lsst.daf.butler.registry.Registry 

458 if collections is None: 

459 if not self.defaults.collections: 

460 raise NoDefaultCollectionError( 

461 "No collections provided to findDataset, and no defaults from registry construction." 

462 ) 

463 collections = self.defaults.collections 

464 backend = queries.SqlQueryBackend(self._db, self._managers) 

465 collection_wildcard = CollectionWildcard.from_expression(collections, require_ordered=True) 

466 if collection_wildcard.empty(): 

467 return None 

468 matched_collections = backend.resolve_collection_wildcard(collection_wildcard) 

469 parent_dataset_type, components = backend.resolve_single_dataset_type_wildcard( 

470 datasetType, components_deprecated=False 

471 ) 

472 if len(components) > 1: 

473 raise DatasetTypeError( 

474 f"findDataset requires exactly one dataset type; got multiple components {components} " 

475 f"for parent dataset type {parent_dataset_type.name}." 

476 ) 

477 component = components[0] 

478 dataId = DataCoordinate.standardize( 

479 dataId, 

480 graph=parent_dataset_type.dimensions, 

481 universe=self.dimensions, 

482 defaults=self.defaults.dataId, 

483 **kwargs, 

484 ) 

485 governor_constraints = {name: {cast(str, dataId[name])} for name in dataId.graph.governors.names} 

486 (filtered_collections,) = backend.filter_dataset_collections( 

487 [parent_dataset_type], 

488 matched_collections, 

489 governor_constraints=governor_constraints, 

490 ).values() 

491 if not filtered_collections: 

492 return None 

493 if timespan is None: 

494 filtered_collections = [ 

495 collection_record 

496 for collection_record in filtered_collections 

497 if collection_record.type is not CollectionType.CALIBRATION 

498 ] 

499 if filtered_collections: 

500 requested_columns = {"dataset_id", "run", "collection"} 

501 with backend.context() as context: 

502 predicate = context.make_data_coordinate_predicate( 

503 dataId.subset(parent_dataset_type.dimensions), full=False 

504 ) 

505 if timespan is not None: 

506 requested_columns.add("timespan") 

507 predicate = predicate.logical_and( 

508 context.make_timespan_overlap_predicate( 

509 DatasetColumnTag(parent_dataset_type.name, "timespan"), timespan 

510 ) 

511 ) 

512 relation = backend.make_dataset_query_relation( 

513 parent_dataset_type, filtered_collections, requested_columns, context 

514 ).with_rows_satisfying(predicate) 

515 rows = list(context.fetch_iterable(relation)) 

516 else: 

517 rows = [] 

518 if not rows: 

519 return None 

520 elif len(rows) == 1: 

521 best_row = rows[0] 

522 else: 

523 rank_by_collection_key = {record.key: n for n, record in enumerate(filtered_collections)} 

524 collection_tag = DatasetColumnTag(parent_dataset_type.name, "collection") 

525 row_iter = iter(rows) 

526 best_row = next(row_iter) 

527 best_rank = rank_by_collection_key[best_row[collection_tag]] 

528 have_tie = False 

529 for row in row_iter: 

530 if (rank := rank_by_collection_key[row[collection_tag]]) < best_rank: 

531 best_row = row 

532 best_rank = rank 

533 have_tie = False 

534 elif rank == best_rank: 

535 have_tie = True 

536 assert timespan is not None, "Rank ties should be impossible given DB constraints." 

537 if have_tie: 

538 raise LookupError( 

539 f"Ambiguous calibration lookup for {parent_dataset_type.name} in collections " 

540 f"{collection_wildcard.strings} with timespan {timespan}." 

541 ) 

542 reader = queries.DatasetRefReader( 

543 parent_dataset_type, 

544 translate_collection=lambda k: self._managers.collections[k].name, 

545 ) 

546 ref = reader.read(best_row, data_id=dataId) 

547 if component is not None: 

548 ref = ref.makeComponentRef(component) 

549 return ref 

550 

551 @transactional 

552 def insertDatasets( 

553 self, 

554 datasetType: DatasetType | str, 

555 dataIds: Iterable[DataId], 

556 run: str | None = None, 

557 expand: bool = True, 

558 idGenerationMode: DatasetIdGenEnum = DatasetIdGenEnum.UNIQUE, 

559 ) -> list[DatasetRef]: 

560 # Docstring inherited from lsst.daf.butler.registry.Registry 

561 if isinstance(datasetType, DatasetType): 

562 storage = self._managers.datasets.find(datasetType.name) 

563 if storage is None: 

564 raise DatasetTypeError(f"DatasetType '{datasetType}' has not been registered.") 

565 else: 

566 storage = self._managers.datasets.find(datasetType) 

567 if storage is None: 

568 raise DatasetTypeError(f"DatasetType with name '{datasetType}' has not been registered.") 

569 if run is None: 

570 if self.defaults.run is None: 

571 raise NoDefaultCollectionError( 

572 "No run provided to insertDatasets, and no default from registry construction." 

573 ) 

574 run = self.defaults.run 

575 runRecord = self._managers.collections.find(run) 

576 if runRecord.type is not CollectionType.RUN: 

577 raise CollectionTypeError( 

578 f"Given collection is of type {runRecord.type.name}; RUN collection required." 

579 ) 

580 assert isinstance(runRecord, RunRecord) 

581 progress = Progress("daf.butler.Registry.insertDatasets", level=logging.DEBUG) 

582 if expand: 

583 expandedDataIds = [ 

584 self.expandDataId(dataId, graph=storage.datasetType.dimensions) 

585 for dataId in progress.wrap(dataIds, f"Expanding {storage.datasetType.name} data IDs") 

586 ] 

587 else: 

588 expandedDataIds = [ 

589 DataCoordinate.standardize(dataId, graph=storage.datasetType.dimensions) for dataId in dataIds 

590 ] 

591 try: 

592 refs = list(storage.insert(runRecord, expandedDataIds, idGenerationMode)) 

593 if self._managers.obscore: 

594 context = queries.SqlQueryContext(self._db, self._managers.column_types) 

595 self._managers.obscore.add_datasets(refs, context) 

596 except sqlalchemy.exc.IntegrityError as err: 

597 raise ConflictingDefinitionError( 

598 "A database constraint failure was triggered by inserting " 

599 f"one or more datasets of type {storage.datasetType} into " 

600 f"collection '{run}'. " 

601 "This probably means a dataset with the same data ID " 

602 "and dataset type already exists, but it may also mean a " 

603 "dimension row is missing." 

604 ) from err 

605 return refs 

606 

607 @transactional 

608 def _importDatasets( 

609 self, 

610 datasets: Iterable[DatasetRef], 

611 expand: bool = True, 

612 ) -> list[DatasetRef]: 

613 # Docstring inherited from lsst.daf.butler.registry.Registry 

614 datasets = list(datasets) 

615 if not datasets: 

616 # nothing to do 

617 return [] 

618 

619 # find dataset type 

620 datasetTypes = {dataset.datasetType for dataset in datasets} 

621 if len(datasetTypes) != 1: 

622 raise DatasetTypeError(f"Multiple dataset types in input datasets: {datasetTypes}") 

623 datasetType = datasetTypes.pop() 

624 

625 # get storage handler for this dataset type 

626 storage = self._managers.datasets.find(datasetType.name) 

627 if storage is None: 

628 raise DatasetTypeError(f"DatasetType '{datasetType}' has not been registered.") 

629 

630 # find run name 

631 runs = {dataset.run for dataset in datasets} 

632 if len(runs) != 1: 

633 raise ValueError(f"Multiple run names in input datasets: {runs}") 

634 run = runs.pop() 

635 

636 runRecord = self._managers.collections.find(run) 

637 if runRecord.type is not CollectionType.RUN: 

638 raise CollectionTypeError( 

639 f"Given collection '{runRecord.name}' is of type {runRecord.type.name};" 

640 " RUN collection required." 

641 ) 

642 assert isinstance(runRecord, RunRecord) 

643 

644 progress = Progress("daf.butler.Registry.insertDatasets", level=logging.DEBUG) 

645 if expand: 

646 expandedDatasets = [ 

647 dataset.expanded(self.expandDataId(dataset.dataId, graph=storage.datasetType.dimensions)) 

648 for dataset in progress.wrap(datasets, f"Expanding {storage.datasetType.name} data IDs") 

649 ] 

650 else: 

651 expandedDatasets = [ 

652 DatasetRef(datasetType, dataset.dataId, id=dataset.id, run=dataset.run, conform=True) 

653 for dataset in datasets 

654 ] 

655 

656 try: 

657 refs = list(storage.import_(runRecord, expandedDatasets)) 

658 if self._managers.obscore: 

659 context = queries.SqlQueryContext(self._db, self._managers.column_types) 

660 self._managers.obscore.add_datasets(refs, context) 

661 except sqlalchemy.exc.IntegrityError as err: 

662 raise ConflictingDefinitionError( 

663 "A database constraint failure was triggered by inserting " 

664 f"one or more datasets of type {storage.datasetType} into " 

665 f"collection '{run}'. " 

666 "This probably means a dataset with the same data ID " 

667 "and dataset type already exists, but it may also mean a " 

668 "dimension row is missing." 

669 ) from err 

670 # Check that imported dataset IDs match the input 

671 for imported_ref, input_ref in zip(refs, datasets, strict=True): 

672 if imported_ref.id != input_ref.id: 

673 raise RegistryConsistencyError( 

674 "Imported dataset ID differs from input dataset ID, " 

675 f"input ref: {input_ref}, imported ref: {imported_ref}" 

676 ) 

677 return refs 

678 

679 def getDataset(self, id: DatasetId) -> DatasetRef | None: 

680 # Docstring inherited from lsst.daf.butler.registry.Registry 

681 return self._managers.datasets.getDatasetRef(id) 

682 

683 @transactional 

684 def removeDatasets(self, refs: Iterable[DatasetRef]) -> None: 

685 # Docstring inherited from lsst.daf.butler.registry.Registry 

686 progress = Progress("lsst.daf.butler.Registry.removeDatasets", level=logging.DEBUG) 

687 for datasetType, refsForType in progress.iter_item_chunks( 

688 DatasetRef.iter_by_type(refs), desc="Removing datasets by type" 

689 ): 

690 storage = self._managers.datasets[datasetType.name] 

691 try: 

692 storage.delete(refsForType) 

693 except sqlalchemy.exc.IntegrityError as err: 

694 raise OrphanedRecordError( 

695 "One or more datasets is still present in one or more Datastores." 

696 ) from err 

697 

698 @transactional 

699 def associate(self, collection: str, refs: Iterable[DatasetRef]) -> None: 

700 # Docstring inherited from lsst.daf.butler.registry.Registry 

701 progress = Progress("lsst.daf.butler.Registry.associate", level=logging.DEBUG) 

702 collectionRecord = self._managers.collections.find(collection) 

703 if collectionRecord.type is not CollectionType.TAGGED: 

704 raise CollectionTypeError( 

705 f"Collection '{collection}' has type {collectionRecord.type.name}, not TAGGED." 

706 ) 

707 for datasetType, refsForType in progress.iter_item_chunks( 

708 DatasetRef.iter_by_type(refs), desc="Associating datasets by type" 

709 ): 

710 storage = self._managers.datasets[datasetType.name] 

711 try: 

712 storage.associate(collectionRecord, refsForType) 

713 if self._managers.obscore: 

714 # If a TAGGED collection is being monitored by ObsCore 

715 # manager then we may need to save the dataset. 

716 context = queries.SqlQueryContext(self._db, self._managers.column_types) 

717 self._managers.obscore.associate(refsForType, collectionRecord, context) 

718 except sqlalchemy.exc.IntegrityError as err: 

719 raise ConflictingDefinitionError( 

720 f"Constraint violation while associating dataset of type {datasetType.name} with " 

721 f"collection {collection}. This probably means that one or more datasets with the same " 

722 "dataset type and data ID already exist in the collection, but it may also indicate " 

723 "that the datasets do not exist." 

724 ) from err 

725 

726 @transactional 

727 def disassociate(self, collection: str, refs: Iterable[DatasetRef]) -> None: 

728 # Docstring inherited from lsst.daf.butler.registry.Registry 

729 progress = Progress("lsst.daf.butler.Registry.disassociate", level=logging.DEBUG) 

730 collectionRecord = self._managers.collections.find(collection) 

731 if collectionRecord.type is not CollectionType.TAGGED: 

732 raise CollectionTypeError( 

733 f"Collection '{collection}' has type {collectionRecord.type.name}; expected TAGGED." 

734 ) 

735 for datasetType, refsForType in progress.iter_item_chunks( 

736 DatasetRef.iter_by_type(refs), desc="Disassociating datasets by type" 

737 ): 

738 storage = self._managers.datasets[datasetType.name] 

739 storage.disassociate(collectionRecord, refsForType) 

740 if self._managers.obscore: 

741 self._managers.obscore.disassociate(refsForType, collectionRecord) 

742 

743 @transactional 

744 def certify(self, collection: str, refs: Iterable[DatasetRef], timespan: Timespan) -> None: 

745 # Docstring inherited from lsst.daf.butler.registry.Registry 

746 progress = Progress("lsst.daf.butler.Registry.certify", level=logging.DEBUG) 

747 collectionRecord = self._managers.collections.find(collection) 

748 for datasetType, refsForType in progress.iter_item_chunks( 

749 DatasetRef.iter_by_type(refs), desc="Certifying datasets by type" 

750 ): 

751 storage = self._managers.datasets[datasetType.name] 

752 storage.certify( 

753 collectionRecord, 

754 refsForType, 

755 timespan, 

756 context=queries.SqlQueryContext(self._db, self._managers.column_types), 

757 ) 

758 

759 @transactional 

760 def decertify( 

761 self, 

762 collection: str, 

763 datasetType: str | DatasetType, 

764 timespan: Timespan, 

765 *, 

766 dataIds: Iterable[DataId] | None = None, 

767 ) -> None: 

768 # Docstring inherited from lsst.daf.butler.registry.Registry 

769 collectionRecord = self._managers.collections.find(collection) 

770 if isinstance(datasetType, str): 

771 storage = self._managers.datasets[datasetType] 

772 else: 

773 storage = self._managers.datasets[datasetType.name] 

774 standardizedDataIds = None 

775 if dataIds is not None: 

776 standardizedDataIds = [ 

777 DataCoordinate.standardize(d, graph=storage.datasetType.dimensions) for d in dataIds 

778 ] 

779 storage.decertify( 

780 collectionRecord, 

781 timespan, 

782 dataIds=standardizedDataIds, 

783 context=queries.SqlQueryContext(self._db, self._managers.column_types), 

784 ) 

785 

786 def getDatastoreBridgeManager(self) -> DatastoreRegistryBridgeManager: 

787 """Return an object that allows a new `Datastore` instance to 

788 communicate with this `Registry`. 

789 

790 Returns 

791 ------- 

792 manager : `DatastoreRegistryBridgeManager` 

793 Object that mediates communication between this `Registry` and its 

794 associated datastores. 

795 """ 

796 return self._managers.datastores 

797 

798 def getDatasetLocations(self, ref: DatasetRef) -> Iterable[str]: 

799 # Docstring inherited from lsst.daf.butler.registry.Registry 

800 return self._managers.datastores.findDatastores(ref) 

801 

802 def expandDataId( 

803 self, 

804 dataId: DataId | None = None, 

805 *, 

806 graph: DimensionGraph | None = None, 

807 records: NameLookupMapping[DimensionElement, DimensionRecord | None] | None = None, 

808 withDefaults: bool = True, 

809 **kwargs: Any, 

810 ) -> DataCoordinate: 

811 # Docstring inherited from lsst.daf.butler.registry.Registry 

812 if not withDefaults: 

813 defaults = None 

814 else: 

815 defaults = self.defaults.dataId 

816 try: 

817 standardized = DataCoordinate.standardize( 

818 dataId, graph=graph, universe=self.dimensions, defaults=defaults, **kwargs 

819 ) 

820 except KeyError as exc: 

821 # This means either kwargs have some odd name or required 

822 # dimension is missing. 

823 raise DimensionNameError(str(exc)) from exc 

824 if standardized.hasRecords(): 

825 return standardized 

826 if records is None: 

827 records = {} 

828 elif isinstance(records, NamedKeyMapping): 

829 records = records.byName() 

830 else: 

831 records = dict(records) 

832 if isinstance(dataId, DataCoordinate) and dataId.hasRecords(): 

833 records.update(dataId.records.byName()) 

834 keys = standardized.byName() 

835 context = queries.SqlQueryContext(self._db, self._managers.column_types) 

836 for element in standardized.graph.primaryKeyTraversalOrder: 

837 record = records.get(element.name, ...) # Use ... to mean not found; None might mean NULL 

838 if record is ...: 

839 if isinstance(element, Dimension) and keys.get(element.name) is None: 

840 if element in standardized.graph.required: 

841 raise DimensionNameError( 

842 f"No value or null value for required dimension {element.name}." 

843 ) 

844 keys[element.name] = None 

845 record = None 

846 else: 

847 storage = self._managers.dimensions[element] 

848 record = storage.fetch_one(DataCoordinate.standardize(keys, graph=element.graph), context) 

849 records[element.name] = record 

850 if record is not None: 

851 for d in element.implied: 

852 value = getattr(record, d.name) 

853 if keys.setdefault(d.name, value) != value: 

854 raise InconsistentDataIdError( 

855 f"Data ID {standardized} has {d.name}={keys[d.name]!r}, " 

856 f"but {element.name} implies {d.name}={value!r}." 

857 ) 

858 else: 

859 if element in standardized.graph.required: 

860 raise DataIdValueError( 

861 f"Could not fetch record for required dimension {element.name} via keys {keys}." 

862 ) 

863 if element.alwaysJoin: 

864 raise InconsistentDataIdError( 

865 f"Could not fetch record for element {element.name} via keys {keys}, ", 

866 "but it is marked alwaysJoin=True; this means one or more dimensions are not " 

867 "related.", 

868 ) 

869 for d in element.implied: 

870 keys.setdefault(d.name, None) 

871 records.setdefault(d.name, None) 

872 return DataCoordinate.standardize(keys, graph=standardized.graph).expanded(records=records) 

873 

874 def insertDimensionData( 

875 self, 

876 element: DimensionElement | str, 

877 *data: Mapping[str, Any] | DimensionRecord, 

878 conform: bool = True, 

879 replace: bool = False, 

880 skip_existing: bool = False, 

881 ) -> None: 

882 # Docstring inherited from lsst.daf.butler.registry.Registry 

883 if conform: 

884 if isinstance(element, str): 

885 element = self.dimensions[element] 

886 records = [ 

887 row if isinstance(row, DimensionRecord) else element.RecordClass(**row) for row in data 

888 ] 

889 else: 

890 # Ignore typing since caller said to trust them with conform=False. 

891 records = data # type: ignore 

892 storage = self._managers.dimensions[element] 

893 storage.insert(*records, replace=replace, skip_existing=skip_existing) 

894 

895 def syncDimensionData( 

896 self, 

897 element: DimensionElement | str, 

898 row: Mapping[str, Any] | DimensionRecord, 

899 conform: bool = True, 

900 update: bool = False, 

901 ) -> bool | dict[str, Any]: 

902 # Docstring inherited from lsst.daf.butler.registry.Registry 

903 if conform: 

904 if isinstance(element, str): 

905 element = self.dimensions[element] 

906 record = row if isinstance(row, DimensionRecord) else element.RecordClass(**row) 

907 else: 

908 # Ignore typing since caller said to trust them with conform=False. 

909 record = row # type: ignore 

910 storage = self._managers.dimensions[element] 

911 return storage.sync(record, update=update) 

912 

913 def queryDatasetTypes( 

914 self, 

915 expression: Any = ..., 

916 *, 

917 components: bool | None = False, 

918 missing: list[str] | None = None, 

919 ) -> Iterable[DatasetType]: 

920 # Docstring inherited from lsst.daf.butler.registry.Registry 

921 wildcard = DatasetTypeWildcard.from_expression(expression) 

922 composition_dict = self._managers.datasets.resolve_wildcard( 

923 wildcard, 

924 components=components, 

925 missing=missing, 

926 ) 

927 result: list[DatasetType] = [] 

928 for parent_dataset_type, components_for_parent in composition_dict.items(): 

929 result.extend( 

930 parent_dataset_type.makeComponentDatasetType(c) if c is not None else parent_dataset_type 

931 for c in components_for_parent 

932 ) 

933 return result 

934 

935 def queryCollections( 

936 self, 

937 expression: Any = ..., 

938 datasetType: DatasetType | None = None, 

939 collectionTypes: Iterable[CollectionType] | CollectionType = CollectionType.all(), 

940 flattenChains: bool = False, 

941 includeChains: bool | None = None, 

942 ) -> Sequence[str]: 

943 # Docstring inherited from lsst.daf.butler.registry.Registry 

944 

945 # Right now the datasetTypes argument is completely ignored, but that 

946 # is consistent with its [lack of] guarantees. DM-24939 or a follow-up 

947 # ticket will take care of that. 

948 try: 

949 wildcard = CollectionWildcard.from_expression(expression) 

950 except TypeError as exc: 

951 raise CollectionExpressionError(f"Invalid collection expression '{expression}'") from exc 

952 collectionTypes = ensure_iterable(collectionTypes) 

953 return [ 

954 record.name 

955 for record in self._managers.collections.resolve_wildcard( 

956 wildcard, 

957 collection_types=frozenset(collectionTypes), 

958 flatten_chains=flattenChains, 

959 include_chains=includeChains, 

960 ) 

961 ] 

962 

963 def _makeQueryBuilder( 

964 self, 

965 summary: queries.QuerySummary, 

966 doomed_by: Iterable[str] = (), 

967 ) -> queries.QueryBuilder: 

968 """Return a `QueryBuilder` instance capable of constructing and 

969 managing more complex queries than those obtainable via `Registry` 

970 interfaces. 

971 

972 This is an advanced interface; downstream code should prefer 

973 `Registry.queryDataIds` and `Registry.queryDatasets` whenever those 

974 are sufficient. 

975 

976 Parameters 

977 ---------- 

978 summary : `queries.QuerySummary` 

979 Object describing and categorizing the full set of dimensions that 

980 will be included in the query. 

981 doomed_by : `~collections.abc.Iterable` of `str`, optional 

982 A list of diagnostic messages that indicate why the query is going 

983 to yield no results and should not even be executed. If an empty 

984 container (default) the query will be executed unless other code 

985 determines that it is doomed. 

986 

987 Returns 

988 ------- 

989 builder : `queries.QueryBuilder` 

990 Object that can be used to construct and perform advanced queries. 

991 """ 

992 doomed_by = list(doomed_by) 

993 backend = queries.SqlQueryBackend(self._db, self._managers) 

994 context = backend.context() 

995 relation: Relation | None = None 

996 if doomed_by: 

997 relation = LeafRelation.make_doomed(context.sql_engine, set(), doomed_by) 

998 return queries.QueryBuilder( 

999 summary, 

1000 backend=backend, 

1001 context=context, 

1002 relation=relation, 

1003 ) 

1004 

1005 def _standardize_query_data_id_args( 

1006 self, data_id: DataId | None, *, doomed_by: list[str], **kwargs: Any 

1007 ) -> DataCoordinate: 

1008 """Preprocess the data ID arguments passed to query* methods. 

1009 

1010 Parameters 

1011 ---------- 

1012 data_id : `DataId` or `None` 

1013 Data ID that constrains the query results. 

1014 doomed_by : `list` [ `str` ] 

1015 List to append messages indicating why the query is doomed to 

1016 yield no results. 

1017 **kwargs 

1018 Additional data ID key-value pairs, extending and overriding 

1019 ``data_id``. 

1020 

1021 Returns 

1022 ------- 

1023 data_id : `DataCoordinate` 

1024 Standardized data ID. Will be fully expanded unless expansion 

1025 fails, in which case a message will be appended to ``doomed_by`` 

1026 on return. 

1027 """ 

1028 try: 

1029 return self.expandDataId(data_id, **kwargs) 

1030 except DataIdValueError as err: 

1031 doomed_by.append(str(err)) 

1032 return DataCoordinate.standardize( 

1033 data_id, **kwargs, universe=self.dimensions, defaults=self.defaults.dataId 

1034 ) 

1035 

1036 def _standardize_query_dataset_args( 

1037 self, 

1038 datasets: Any, 

1039 collections: CollectionArgType | None, 

1040 components: bool | None, 

1041 mode: Literal["find_first"] | Literal["find_all"] | Literal["constrain"] = "constrain", 

1042 *, 

1043 doomed_by: list[str], 

1044 ) -> tuple[dict[DatasetType, list[str | None]], CollectionWildcard | None]: 

1045 """Preprocess dataset arguments passed to query* methods. 

1046 

1047 Parameters 

1048 ---------- 

1049 datasets : `DatasetType`, `str`, `re.Pattern`, or iterable of these 

1050 Expression identifying dataset types. See `queryDatasetTypes` for 

1051 details. 

1052 collections : `str`, `re.Pattern`, or iterable of these 

1053 Expression identifying collections to be searched. See 

1054 `queryCollections` for details. 

1055 components : `bool`, optional 

1056 If `True`, apply all expression patterns to component dataset type 

1057 names as well. If `False`, never apply patterns to components. 

1058 If `None` (default), apply patterns to components only if their 

1059 parent datasets were not matched by the expression. 

1060 Fully-specified component datasets (`str` or `DatasetType` 

1061 instances) are always included. 

1062 

1063 Values other than `False` are deprecated, and only `False` will be 

1064 supported after v26. After v27 this argument will be removed 

1065 entirely. 

1066 mode : `str`, optional 

1067 The way in which datasets are being used in this query; one of: 

1068 

1069 - "find_first": this is a query for the first dataset in an 

1070 ordered list of collections. Prohibits collection wildcards, 

1071 but permits dataset type wildcards. 

1072 

1073 - "find_all": this is a query for all datasets in all matched 

1074 collections. Permits collection and dataset type wildcards. 

1075 

1076 - "constrain": this is a query for something other than datasets, 

1077 with results constrained by dataset existence. Permits 

1078 collection wildcards and prohibits ``...`` as a dataset type 

1079 wildcard. 

1080 doomed_by : `list` [ `str` ] 

1081 List to append messages indicating why the query is doomed to 

1082 yield no results. 

1083 

1084 Returns 

1085 ------- 

1086 composition : `defaultdict` [ `DatasetType`, `list` [ `str` ] ] 

1087 Dictionary mapping parent dataset type to `list` of components 

1088 matched for that dataset type (or `None` for the parent itself). 

1089 collections : `CollectionWildcard` 

1090 Processed collection expression. 

1091 """ 

1092 composition: dict[DatasetType, list[str | None]] = {} 

1093 collection_wildcard: CollectionWildcard | None = None 

1094 if datasets is not None: 

1095 if collections is None: 

1096 if not self.defaults.collections: 

1097 raise NoDefaultCollectionError("No collections, and no registry default collections.") 

1098 collection_wildcard = CollectionWildcard.from_expression(self.defaults.collections) 

1099 else: 

1100 collection_wildcard = CollectionWildcard.from_expression(collections) 

1101 if mode == "find_first" and collection_wildcard.patterns: 

1102 raise TypeError( 

1103 f"Collection pattern(s) {collection_wildcard.patterns} not allowed in this context." 

1104 ) 

1105 missing: list[str] = [] 

1106 composition = self._managers.datasets.resolve_wildcard( 

1107 datasets, components=components, missing=missing, explicit_only=(mode == "constrain") 

1108 ) 

1109 if missing and mode == "constrain": 

1110 # After v26 this should raise MissingDatasetTypeError, to be 

1111 # implemented on DM-36303. 

1112 warnings.warn( 

1113 f"Dataset type(s) {missing} are not registered; this will be an error after v26.", 

1114 FutureWarning, 

1115 stacklevel=find_outside_stacklevel("lsst.daf.butler"), 

1116 ) 

1117 doomed_by.extend(f"Dataset type {name} is not registered." for name in missing) 

1118 elif collections: 

1119 # I think this check should actually be `collections is not None`, 

1120 # but it looks like some CLI scripts use empty tuple as default. 

1121 raise ArgumentError(f"Cannot pass 'collections' (='{collections}') without 'datasets'.") 

1122 return composition, collection_wildcard 

1123 

1124 def queryDatasets( 

1125 self, 

1126 datasetType: Any, 

1127 *, 

1128 collections: CollectionArgType | None = None, 

1129 dimensions: Iterable[Dimension | str] | None = None, 

1130 dataId: DataId | None = None, 

1131 where: str = "", 

1132 findFirst: bool = False, 

1133 components: bool | None = False, 

1134 bind: Mapping[str, Any] | None = None, 

1135 check: bool = True, 

1136 **kwargs: Any, 

1137 ) -> queries.DatasetQueryResults: 

1138 # Docstring inherited from lsst.daf.butler.registry.Registry 

1139 doomed_by: list[str] = [] 

1140 data_id = self._standardize_query_data_id_args(dataId, doomed_by=doomed_by, **kwargs) 

1141 dataset_composition, collection_wildcard = self._standardize_query_dataset_args( 

1142 datasetType, 

1143 collections, 

1144 components, 

1145 mode="find_first" if findFirst else "find_all", 

1146 doomed_by=doomed_by, 

1147 ) 

1148 if collection_wildcard is not None and collection_wildcard.empty(): 

1149 doomed_by.append("No datasets can be found because collection list is empty.") 

1150 return queries.ChainedDatasetQueryResults([], doomed_by=doomed_by) 

1151 parent_results: list[queries.ParentDatasetQueryResults] = [] 

1152 for parent_dataset_type, components_for_parent in dataset_composition.items(): 

1153 # The full set of dimensions in the query is the combination of 

1154 # those needed for the DatasetType and those explicitly requested, 

1155 # if any. 

1156 dimension_names = set(parent_dataset_type.dimensions.names) 

1157 if dimensions is not None: 

1158 dimension_names.update(self.dimensions.extract(dimensions).names) 

1159 # Construct the summary structure needed to construct a 

1160 # QueryBuilder. 

1161 summary = queries.QuerySummary( 

1162 requested=DimensionGraph(self.dimensions, names=dimension_names), 

1163 column_types=self._managers.column_types, 

1164 data_id=data_id, 

1165 expression=where, 

1166 bind=bind, 

1167 defaults=self.defaults.dataId, 

1168 check=check, 

1169 datasets=[parent_dataset_type], 

1170 ) 

1171 builder = self._makeQueryBuilder(summary) 

1172 # Add the dataset subquery to the query, telling the QueryBuilder 

1173 # to include the rank of the selected collection in the results 

1174 # only if we need to findFirst. Note that if any of the 

1175 # collections are actually wildcard expressions, and 

1176 # findFirst=True, this will raise TypeError for us. 

1177 builder.joinDataset(parent_dataset_type, collection_wildcard, isResult=True, findFirst=findFirst) 

1178 query = builder.finish() 

1179 parent_results.append( 

1180 queries.ParentDatasetQueryResults( 

1181 query, parent_dataset_type, components=components_for_parent 

1182 ) 

1183 ) 

1184 if not parent_results: 

1185 doomed_by.extend( 

1186 f"No registered dataset type matching {t!r} found, so no matching datasets can " 

1187 "exist in any collection." 

1188 for t in ensure_iterable(datasetType) 

1189 ) 

1190 return queries.ChainedDatasetQueryResults([], doomed_by=doomed_by) 

1191 elif len(parent_results) == 1: 

1192 return parent_results[0] 

1193 else: 

1194 return queries.ChainedDatasetQueryResults(parent_results) 

1195 

1196 def queryDataIds( 

1197 self, 

1198 dimensions: Iterable[Dimension | str] | Dimension | str, 

1199 *, 

1200 dataId: DataId | None = None, 

1201 datasets: Any = None, 

1202 collections: CollectionArgType | None = None, 

1203 where: str = "", 

1204 components: bool | None = None, 

1205 bind: Mapping[str, Any] | None = None, 

1206 check: bool = True, 

1207 **kwargs: Any, 

1208 ) -> queries.DataCoordinateQueryResults: 

1209 # Docstring inherited from lsst.daf.butler.registry.Registry 

1210 dimensions = ensure_iterable(dimensions) 

1211 requestedDimensions = self.dimensions.extract(dimensions) 

1212 doomed_by: list[str] = [] 

1213 data_id = self._standardize_query_data_id_args(dataId, doomed_by=doomed_by, **kwargs) 

1214 dataset_composition, collection_wildcard = self._standardize_query_dataset_args( 

1215 datasets, collections, components, doomed_by=doomed_by 

1216 ) 

1217 if collection_wildcard is not None and collection_wildcard.empty(): 

1218 doomed_by.append("No data coordinates can be found because collection list is empty.") 

1219 summary = queries.QuerySummary( 

1220 requested=requestedDimensions, 

1221 column_types=self._managers.column_types, 

1222 data_id=data_id, 

1223 expression=where, 

1224 bind=bind, 

1225 defaults=self.defaults.dataId, 

1226 check=check, 

1227 datasets=dataset_composition.keys(), 

1228 ) 

1229 builder = self._makeQueryBuilder(summary, doomed_by=doomed_by) 

1230 for datasetType in dataset_composition: 

1231 builder.joinDataset(datasetType, collection_wildcard, isResult=False) 

1232 query = builder.finish() 

1233 

1234 return queries.DataCoordinateQueryResults(query) 

1235 

1236 def queryDimensionRecords( 

1237 self, 

1238 element: DimensionElement | str, 

1239 *, 

1240 dataId: DataId | None = None, 

1241 datasets: Any = None, 

1242 collections: CollectionArgType | None = None, 

1243 where: str = "", 

1244 components: bool | None = None, 

1245 bind: Mapping[str, Any] | None = None, 

1246 check: bool = True, 

1247 **kwargs: Any, 

1248 ) -> queries.DimensionRecordQueryResults: 

1249 # Docstring inherited from lsst.daf.butler.registry.Registry 

1250 if not isinstance(element, DimensionElement): 

1251 try: 

1252 element = self.dimensions[element] 

1253 except KeyError as e: 

1254 raise DimensionNameError( 

1255 f"No such dimension '{element}', available dimensions: " 

1256 + str(self.dimensions.getStaticElements()) 

1257 ) from e 

1258 doomed_by: list[str] = [] 

1259 data_id = self._standardize_query_data_id_args(dataId, doomed_by=doomed_by, **kwargs) 

1260 dataset_composition, collection_wildcard = self._standardize_query_dataset_args( 

1261 datasets, collections, components, doomed_by=doomed_by 

1262 ) 

1263 if collection_wildcard is not None and collection_wildcard.empty(): 

1264 doomed_by.append("No dimension records can be found because collection list is empty.") 

1265 summary = queries.QuerySummary( 

1266 requested=element.graph, 

1267 column_types=self._managers.column_types, 

1268 data_id=data_id, 

1269 expression=where, 

1270 bind=bind, 

1271 defaults=self.defaults.dataId, 

1272 check=check, 

1273 datasets=dataset_composition.keys(), 

1274 ) 

1275 builder = self._makeQueryBuilder(summary, doomed_by=doomed_by) 

1276 for datasetType in dataset_composition: 

1277 builder.joinDataset(datasetType, collection_wildcard, isResult=False) 

1278 query = builder.finish().with_record_columns(element) 

1279 return queries.DatabaseDimensionRecordQueryResults(query, element) 

1280 

1281 def queryDatasetAssociations( 

1282 self, 

1283 datasetType: str | DatasetType, 

1284 collections: CollectionArgType | None = ..., 

1285 *, 

1286 collectionTypes: Iterable[CollectionType] = CollectionType.all(), 

1287 flattenChains: bool = False, 

1288 ) -> Iterator[DatasetAssociation]: 

1289 # Docstring inherited from lsst.daf.butler.registry.Registry 

1290 if collections is None: 

1291 if not self.defaults.collections: 

1292 raise NoDefaultCollectionError( 

1293 "No collections provided to queryDatasetAssociations, " 

1294 "and no defaults from registry construction." 

1295 ) 

1296 collections = self.defaults.collections 

1297 collection_wildcard = CollectionWildcard.from_expression(collections) 

1298 backend = queries.SqlQueryBackend(self._db, self._managers) 

1299 parent_dataset_type, _ = backend.resolve_single_dataset_type_wildcard(datasetType, components=False) 

1300 timespan_tag = DatasetColumnTag(parent_dataset_type.name, "timespan") 

1301 collection_tag = DatasetColumnTag(parent_dataset_type.name, "collection") 

1302 for parent_collection_record in backend.resolve_collection_wildcard( 

1303 collection_wildcard, 

1304 collection_types=frozenset(collectionTypes), 

1305 flatten_chains=flattenChains, 

1306 ): 

1307 # Resolve this possibly-chained collection into a list of 

1308 # non-CHAINED collections that actually hold datasets of this 

1309 # type. 

1310 candidate_collection_records = backend.resolve_dataset_collections( 

1311 parent_dataset_type, 

1312 CollectionWildcard.from_names([parent_collection_record.name]), 

1313 allow_calibration_collections=True, 

1314 governor_constraints={}, 

1315 ) 

1316 if not candidate_collection_records: 

1317 continue 

1318 with backend.context() as context: 

1319 relation = backend.make_dataset_query_relation( 

1320 parent_dataset_type, 

1321 candidate_collection_records, 

1322 columns={"dataset_id", "run", "timespan", "collection"}, 

1323 context=context, 

1324 ) 

1325 reader = queries.DatasetRefReader( 

1326 parent_dataset_type, 

1327 translate_collection=lambda k: self._managers.collections[k].name, 

1328 full=False, 

1329 ) 

1330 for row in context.fetch_iterable(relation): 

1331 ref = reader.read(row) 

1332 collection_record = self._managers.collections[row[collection_tag]] 

1333 if collection_record.type is CollectionType.CALIBRATION: 

1334 timespan = row[timespan_tag] 

1335 else: 

1336 # For backwards compatibility and (possibly?) user 

1337 # convenience we continue to define the timespan of a 

1338 # DatasetAssociation row for a non-CALIBRATION 

1339 # collection to be None rather than a fully unbounded 

1340 # timespan. 

1341 timespan = None 

1342 yield DatasetAssociation(ref=ref, collection=collection_record.name, timespan=timespan) 

1343 

1344 @property 

1345 def obsCoreTableManager(self) -> ObsCoreTableManager | None: 

1346 # Docstring inherited from lsst.daf.butler.registry.Registry 

1347 return self._managers.obscore 

1348 

1349 storageClasses: StorageClassFactory 

1350 """All storage classes known to the registry (`StorageClassFactory`). 

1351 """