Coverage for python/lsst/daf/butler/registries/sql.py: 13%
503 statements
« prev ^ index » next coverage.py v6.5.0, created at 2023-03-31 02:41 -0700
« prev ^ index » next coverage.py v6.5.0, created at 2023-03-31 02:41 -0700
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = ("SqlRegistry",)
26import contextlib
27import logging
28import warnings
29from typing import (
30 TYPE_CHECKING,
31 Any,
32 Dict,
33 Iterable,
34 Iterator,
35 List,
36 Literal,
37 Mapping,
38 Optional,
39 Sequence,
40 Set,
41 Union,
42 cast,
43)
45import sqlalchemy
46from lsst.daf.relation import LeafRelation, Relation
47from lsst.resources import ResourcePathExpression
48from lsst.utils.iteration import ensure_iterable
50from ..core import (
51 Config,
52 DataCoordinate,
53 DataId,
54 DatasetAssociation,
55 DatasetColumnTag,
56 DatasetId,
57 DatasetRef,
58 DatasetType,
59 Dimension,
60 DimensionConfig,
61 DimensionElement,
62 DimensionGraph,
63 DimensionRecord,
64 DimensionUniverse,
65 NamedKeyMapping,
66 NameLookupMapping,
67 Progress,
68 StorageClassFactory,
69 Timespan,
70 ddl,
71)
72from ..core.utils import transactional
73from ..registry import (
74 ArgumentError,
75 CollectionExpressionError,
76 CollectionSummary,
77 CollectionType,
78 CollectionTypeError,
79 ConflictingDefinitionError,
80 DataIdValueError,
81 DatasetTypeError,
82 DimensionNameError,
83 InconsistentDataIdError,
84 NoDefaultCollectionError,
85 OrphanedRecordError,
86 Registry,
87 RegistryConfig,
88 RegistryDefaults,
89 queries,
90)
91from ..registry.interfaces import ChainedCollectionRecord, DatasetIdFactory, DatasetIdGenEnum, RunRecord
92from ..registry.managers import RegistryManagerInstances, RegistryManagerTypes
93from ..registry.wildcards import CollectionWildcard, DatasetTypeWildcard
95if TYPE_CHECKING:
96 from .._butlerConfig import ButlerConfig
97 from ..registry.interfaces import (
98 CollectionRecord,
99 Database,
100 DatastoreRegistryBridgeManager,
101 ObsCoreTableManager,
102 )
105_LOG = logging.getLogger(__name__)
108class SqlRegistry(Registry):
109 """Registry implementation based on SQLAlchemy.
111 Parameters
112 ----------
113 database : `Database`
114 Database instance to store Registry.
115 defaults : `RegistryDefaults`
116 Default collection search path and/or output `~CollectionType.RUN`
117 collection.
118 managers : `RegistryManagerInstances`
119 All the managers required for this registry.
120 """
122 defaultConfigFile: Optional[str] = None
123 """Path to configuration defaults. Accessed within the ``configs`` resource
124 or relative to a search path. Can be None if no defaults specified.
125 """
127 @classmethod
128 def createFromConfig(
129 cls,
130 config: Optional[Union[RegistryConfig, str]] = None,
131 dimensionConfig: Optional[Union[DimensionConfig, str]] = None,
132 butlerRoot: Optional[ResourcePathExpression] = None,
133 ) -> Registry:
134 """Create registry database and return `SqlRegistry` instance.
136 This method initializes database contents, database must be empty
137 prior to calling this method.
139 Parameters
140 ----------
141 config : `RegistryConfig` or `str`, optional
142 Registry configuration, if missing then default configuration will
143 be loaded from registry.yaml.
144 dimensionConfig : `DimensionConfig` or `str`, optional
145 Dimensions configuration, if missing then default configuration
146 will be loaded from dimensions.yaml.
147 butlerRoot : convertible to `lsst.resources.ResourcePath`, optional
148 Path to the repository root this `SqlRegistry` will manage.
150 Returns
151 -------
152 registry : `SqlRegistry`
153 A new `SqlRegistry` instance.
154 """
155 config = cls.forceRegistryConfig(config)
156 config.replaceRoot(butlerRoot)
158 if isinstance(dimensionConfig, str):
159 dimensionConfig = DimensionConfig(dimensionConfig)
160 elif dimensionConfig is None:
161 dimensionConfig = DimensionConfig()
162 elif not isinstance(dimensionConfig, DimensionConfig):
163 raise TypeError(f"Incompatible Dimension configuration type: {type(dimensionConfig)}")
165 DatabaseClass = config.getDatabaseClass()
166 database = DatabaseClass.fromUri(
167 str(config.connectionString), origin=config.get("origin", 0), namespace=config.get("namespace")
168 )
169 managerTypes = RegistryManagerTypes.fromConfig(config)
170 managers = managerTypes.makeRepo(database, dimensionConfig)
171 return cls(database, RegistryDefaults(), managers)
173 @classmethod
174 def fromConfig(
175 cls,
176 config: Union[ButlerConfig, RegistryConfig, Config, str],
177 butlerRoot: Optional[ResourcePathExpression] = None,
178 writeable: bool = True,
179 defaults: Optional[RegistryDefaults] = None,
180 ) -> Registry:
181 """Create `Registry` subclass instance from `config`.
183 Registry database must be initialized prior to calling this method.
185 Parameters
186 ----------
187 config : `ButlerConfig`, `RegistryConfig`, `Config` or `str`
188 Registry configuration
189 butlerRoot : `lsst.resources.ResourcePathExpression`, optional
190 Path to the repository root this `Registry` will manage.
191 writeable : `bool`, optional
192 If `True` (default) create a read-write connection to the database.
193 defaults : `RegistryDefaults`, optional
194 Default collection search path and/or output `~CollectionType.RUN`
195 collection.
197 Returns
198 -------
199 registry : `SqlRegistry` (subclass)
200 A new `SqlRegistry` subclass instance.
201 """
202 config = cls.forceRegistryConfig(config)
203 config.replaceRoot(butlerRoot)
204 DatabaseClass = config.getDatabaseClass()
205 database = DatabaseClass.fromUri(
206 config.connectionString.render_as_string(hide_password=False),
207 origin=config.get("origin", 0),
208 namespace=config.get("namespace"),
209 writeable=writeable,
210 )
211 managerTypes = RegistryManagerTypes.fromConfig(config)
212 with database.session():
213 managers = managerTypes.loadRepo(database)
214 if defaults is None:
215 defaults = RegistryDefaults()
216 return cls(database, defaults, managers)
218 def __init__(self, database: Database, defaults: RegistryDefaults, managers: RegistryManagerInstances):
219 self._db = database
220 self._managers = managers
221 self.storageClasses = StorageClassFactory()
222 # Intentionally invoke property setter to initialize defaults. This
223 # can only be done after most of the rest of Registry has already been
224 # initialized, and must be done before the property getter is used.
225 self.defaults = defaults
226 # In the future DatasetIdFactory may become configurable and this
227 # instance will need to be shared with datasets manager.
228 self.datasetIdFactory = DatasetIdFactory()
230 def __str__(self) -> str:
231 return str(self._db)
233 def __repr__(self) -> str:
234 return f"SqlRegistry({self._db!r}, {self.dimensions!r})"
236 def isWriteable(self) -> bool:
237 # Docstring inherited from lsst.daf.butler.registry.Registry
238 return self._db.isWriteable()
240 def copy(self, defaults: Optional[RegistryDefaults] = None) -> Registry:
241 # Docstring inherited from lsst.daf.butler.registry.Registry
242 if defaults is None:
243 # No need to copy, because `RegistryDefaults` is immutable; we
244 # effectively copy on write.
245 defaults = self.defaults
246 return type(self)(self._db, defaults, self._managers)
248 @property
249 def dimensions(self) -> DimensionUniverse:
250 # Docstring inherited from lsst.daf.butler.registry.Registry
251 return self._managers.dimensions.universe
253 def refresh(self) -> None:
254 # Docstring inherited from lsst.daf.butler.registry.Registry
255 with self._db.transaction():
256 self._managers.refresh()
258 @contextlib.contextmanager
259 def transaction(self, *, savepoint: bool = False) -> Iterator[None]:
260 # Docstring inherited from lsst.daf.butler.registry.Registry
261 try:
262 with self._db.transaction(savepoint=savepoint):
263 yield
264 except BaseException:
265 # TODO: this clears the caches sometimes when we wouldn't actually
266 # need to. Can we avoid that?
267 self._managers.dimensions.clearCaches()
268 raise
270 def resetConnectionPool(self) -> None:
271 """Reset SQLAlchemy connection pool for `SqlRegistry` database.
273 This operation is useful when using registry with fork-based
274 multiprocessing. To use registry across fork boundary one has to make
275 sure that there are no currently active connections (no session or
276 transaction is in progress) and connection pool is reset using this
277 method. This method should be called by the child process immediately
278 after the fork.
279 """
280 self._db._engine.dispose()
282 def registerOpaqueTable(self, tableName: str, spec: ddl.TableSpec) -> None:
283 """Add an opaque (to the `Registry`) table for use by a `Datastore` or
284 other data repository client.
286 Opaque table records can be added via `insertOpaqueData`, retrieved via
287 `fetchOpaqueData`, and removed via `deleteOpaqueData`.
289 Parameters
290 ----------
291 tableName : `str`
292 Logical name of the opaque table. This may differ from the
293 actual name used in the database by a prefix and/or suffix.
294 spec : `ddl.TableSpec`
295 Specification for the table to be added.
296 """
297 self._managers.opaque.register(tableName, spec)
299 @transactional
300 def insertOpaqueData(self, tableName: str, *data: dict) -> None:
301 """Insert records into an opaque table.
303 Parameters
304 ----------
305 tableName : `str`
306 Logical name of the opaque table. Must match the name used in a
307 previous call to `registerOpaqueTable`.
308 data
309 Each additional positional argument is a dictionary that represents
310 a single row to be added.
311 """
312 self._managers.opaque[tableName].insert(*data)
314 def fetchOpaqueData(self, tableName: str, **where: Any) -> Iterator[Mapping[str, Any]]:
315 """Retrieve records from an opaque table.
317 Parameters
318 ----------
319 tableName : `str`
320 Logical name of the opaque table. Must match the name used in a
321 previous call to `registerOpaqueTable`.
322 where
323 Additional keyword arguments are interpreted as equality
324 constraints that restrict the returned rows (combined with AND);
325 keyword arguments are column names and values are the values they
326 must have.
328 Yields
329 ------
330 row : `dict`
331 A dictionary representing a single result row.
332 """
333 yield from self._managers.opaque[tableName].fetch(**where)
335 @transactional
336 def deleteOpaqueData(self, tableName: str, **where: Any) -> None:
337 """Remove records from an opaque table.
339 Parameters
340 ----------
341 tableName : `str`
342 Logical name of the opaque table. Must match the name used in a
343 previous call to `registerOpaqueTable`.
344 where
345 Additional keyword arguments are interpreted as equality
346 constraints that restrict the deleted rows (combined with AND);
347 keyword arguments are column names and values are the values they
348 must have.
349 """
350 self._managers.opaque[tableName].delete(where.keys(), where)
352 def registerCollection(
353 self, name: str, type: CollectionType = CollectionType.TAGGED, doc: Optional[str] = None
354 ) -> bool:
355 # Docstring inherited from lsst.daf.butler.registry.Registry
356 _, registered = self._managers.collections.register(name, type, doc=doc)
357 return registered
359 def getCollectionType(self, name: str) -> CollectionType:
360 # Docstring inherited from lsst.daf.butler.registry.Registry
361 return self._managers.collections.find(name).type
363 def _get_collection_record(self, name: str) -> CollectionRecord:
364 # Docstring inherited from lsst.daf.butler.registry.Registry
365 return self._managers.collections.find(name)
367 def registerRun(self, name: str, doc: Optional[str] = None) -> bool:
368 # Docstring inherited from lsst.daf.butler.registry.Registry
369 _, registered = self._managers.collections.register(name, CollectionType.RUN, doc=doc)
370 return registered
372 @transactional
373 def removeCollection(self, name: str) -> None:
374 # Docstring inherited from lsst.daf.butler.registry.Registry
375 self._managers.collections.remove(name)
377 def getCollectionChain(self, parent: str) -> tuple[str, ...]:
378 # Docstring inherited from lsst.daf.butler.registry.Registry
379 record = self._managers.collections.find(parent)
380 if record.type is not CollectionType.CHAINED:
381 raise CollectionTypeError(f"Collection '{parent}' has type {record.type.name}, not CHAINED.")
382 assert isinstance(record, ChainedCollectionRecord)
383 return record.children
385 @transactional
386 def setCollectionChain(self, parent: str, children: Any, *, flatten: bool = False) -> None:
387 # Docstring inherited from lsst.daf.butler.registry.Registry
388 record = self._managers.collections.find(parent)
389 if record.type is not CollectionType.CHAINED:
390 raise CollectionTypeError(f"Collection '{parent}' has type {record.type.name}, not CHAINED.")
391 assert isinstance(record, ChainedCollectionRecord)
392 children = CollectionWildcard.from_expression(children).require_ordered()
393 if children != record.children or flatten:
394 record.update(self._managers.collections, children, flatten=flatten)
396 def getCollectionParentChains(self, collection: str) -> Set[str]:
397 # Docstring inherited from lsst.daf.butler.registry.Registry
398 return {
399 record.name
400 for record in self._managers.collections.getParentChains(
401 self._managers.collections.find(collection).key
402 )
403 }
405 def getCollectionDocumentation(self, collection: str) -> Optional[str]:
406 # Docstring inherited from lsst.daf.butler.registry.Registry
407 return self._managers.collections.getDocumentation(self._managers.collections.find(collection).key)
409 def setCollectionDocumentation(self, collection: str, doc: Optional[str]) -> None:
410 # Docstring inherited from lsst.daf.butler.registry.Registry
411 self._managers.collections.setDocumentation(self._managers.collections.find(collection).key, doc)
413 def getCollectionSummary(self, collection: str) -> CollectionSummary:
414 # Docstring inherited from lsst.daf.butler.registry.Registry
415 record = self._managers.collections.find(collection)
416 return self._managers.datasets.getCollectionSummary(record)
418 def registerDatasetType(self, datasetType: DatasetType) -> bool:
419 # Docstring inherited from lsst.daf.butler.registry.Registry
420 _, inserted = self._managers.datasets.register(datasetType)
421 return inserted
423 def removeDatasetType(self, name: str | tuple[str, ...]) -> None:
424 # Docstring inherited from lsst.daf.butler.registry.Registry
426 for datasetTypeExpression in ensure_iterable(name):
427 datasetTypes = list(self.queryDatasetTypes(datasetTypeExpression))
428 if not datasetTypes:
429 _LOG.info("Dataset type %r not defined", datasetTypeExpression)
430 else:
431 for datasetType in datasetTypes:
432 self._managers.datasets.remove(datasetType.name)
433 _LOG.info("Removed dataset type %r", datasetType.name)
435 def getDatasetType(self, name: str) -> DatasetType:
436 # Docstring inherited from lsst.daf.butler.registry.Registry
437 parent_name, component = DatasetType.splitDatasetTypeName(name)
438 storage = self._managers.datasets[parent_name]
439 if component is None:
440 return storage.datasetType
441 else:
442 return storage.datasetType.makeComponentDatasetType(component)
444 def supportsIdGenerationMode(self, mode: DatasetIdGenEnum) -> bool:
445 # Docstring inherited from lsst.daf.butler.registry.Registry
446 return self._managers.datasets.supportsIdGenerationMode(mode)
448 def findDataset(
449 self,
450 datasetType: Union[DatasetType, str],
451 dataId: Optional[DataId] = None,
452 *,
453 collections: Any = None,
454 timespan: Optional[Timespan] = None,
455 **kwargs: Any,
456 ) -> Optional[DatasetRef]:
457 # Docstring inherited from lsst.daf.butler.registry.Registry
458 if collections is None:
459 if not self.defaults.collections:
460 raise NoDefaultCollectionError(
461 "No collections provided to findDataset, and no defaults from registry construction."
462 )
463 collections = self.defaults.collections
464 backend = queries.SqlQueryBackend(self._db, self._managers)
465 collection_wildcard = CollectionWildcard.from_expression(collections, require_ordered=True)
466 matched_collections = backend.resolve_collection_wildcard(collection_wildcard)
467 parent_dataset_type, components = backend.resolve_single_dataset_type_wildcard(
468 datasetType, components_deprecated=False
469 )
470 if len(components) > 1:
471 raise DatasetTypeError(
472 f"findDataset requires exactly one dataset type; got multiple components {components} "
473 f"for parent dataset type {parent_dataset_type.name}."
474 )
475 component = components[0]
476 dataId = DataCoordinate.standardize(
477 dataId,
478 graph=parent_dataset_type.dimensions,
479 universe=self.dimensions,
480 defaults=self.defaults.dataId,
481 **kwargs,
482 )
483 governor_constraints = {name: {cast(str, dataId[name])} for name in dataId.graph.governors.names}
484 (filtered_collections,) = backend.filter_dataset_collections(
485 [parent_dataset_type],
486 matched_collections,
487 governor_constraints=governor_constraints,
488 ).values()
489 if not filtered_collections:
490 return None
491 if timespan is None:
492 filtered_collections = [
493 collection_record
494 for collection_record in filtered_collections
495 if collection_record.type is not CollectionType.CALIBRATION
496 ]
497 if filtered_collections:
498 requested_columns = {"dataset_id", "run", "collection"}
499 with backend.context() as context:
500 predicate = context.make_data_coordinate_predicate(
501 dataId.subset(parent_dataset_type.dimensions), full=False
502 )
503 if timespan is not None:
504 requested_columns.add("timespan")
505 predicate = predicate.logical_and(
506 context.make_timespan_overlap_predicate(
507 DatasetColumnTag(parent_dataset_type.name, "timespan"), timespan
508 )
509 )
510 relation = backend.make_dataset_query_relation(
511 parent_dataset_type, filtered_collections, requested_columns, context
512 ).with_rows_satisfying(predicate)
513 rows = list(context.fetch_iterable(relation))
514 else:
515 rows = []
516 if not rows:
517 return None
518 elif len(rows) == 1:
519 best_row = rows[0]
520 else:
521 rank_by_collection_key = {record.key: n for n, record in enumerate(filtered_collections)}
522 collection_tag = DatasetColumnTag(parent_dataset_type.name, "collection")
523 row_iter = iter(rows)
524 best_row = next(row_iter)
525 best_rank = rank_by_collection_key[best_row[collection_tag]]
526 have_tie = False
527 for row in row_iter:
528 if (rank := rank_by_collection_key[row[collection_tag]]) < best_rank:
529 best_row = row
530 best_rank = rank
531 have_tie = False
532 elif rank == best_rank:
533 have_tie = True
534 assert timespan is not None, "Rank ties should be impossible given DB constraints."
535 if have_tie:
536 raise LookupError(
537 f"Ambiguous calibration lookup for {parent_dataset_type.name} in collections "
538 f"{collection_wildcard.strings} with timespan {timespan}."
539 )
540 reader = queries.DatasetRefReader(
541 parent_dataset_type,
542 translate_collection=lambda k: self._managers.collections[k].name,
543 )
544 ref = reader.read(best_row, data_id=dataId)
545 if component is not None:
546 ref = ref.makeComponentRef(component)
547 return ref
549 @transactional
550 def insertDatasets(
551 self,
552 datasetType: Union[DatasetType, str],
553 dataIds: Iterable[DataId],
554 run: Optional[str] = None,
555 expand: bool = True,
556 idGenerationMode: DatasetIdGenEnum = DatasetIdGenEnum.UNIQUE,
557 ) -> List[DatasetRef]:
558 # Docstring inherited from lsst.daf.butler.registry.Registry
559 if isinstance(datasetType, DatasetType):
560 storage = self._managers.datasets.find(datasetType.name)
561 if storage is None:
562 raise DatasetTypeError(f"DatasetType '{datasetType}' has not been registered.")
563 else:
564 storage = self._managers.datasets.find(datasetType)
565 if storage is None:
566 raise DatasetTypeError(f"DatasetType with name '{datasetType}' has not been registered.")
567 if run is None:
568 if self.defaults.run is None:
569 raise NoDefaultCollectionError(
570 "No run provided to insertDatasets, and no default from registry construction."
571 )
572 run = self.defaults.run
573 runRecord = self._managers.collections.find(run)
574 if runRecord.type is not CollectionType.RUN:
575 raise CollectionTypeError(
576 f"Given collection is of type {runRecord.type.name}; RUN collection required."
577 )
578 assert isinstance(runRecord, RunRecord)
579 progress = Progress("daf.butler.Registry.insertDatasets", level=logging.DEBUG)
580 if expand:
581 expandedDataIds = [
582 self.expandDataId(dataId, graph=storage.datasetType.dimensions)
583 for dataId in progress.wrap(dataIds, f"Expanding {storage.datasetType.name} data IDs")
584 ]
585 else:
586 expandedDataIds = [
587 DataCoordinate.standardize(dataId, graph=storage.datasetType.dimensions) for dataId in dataIds
588 ]
589 try:
590 refs = list(storage.insert(runRecord, expandedDataIds, idGenerationMode))
591 if self._managers.obscore:
592 context = queries.SqlQueryContext(self._db, self._managers.column_types)
593 self._managers.obscore.add_datasets(refs, context)
594 except sqlalchemy.exc.IntegrityError as err:
595 raise ConflictingDefinitionError(
596 "A database constraint failure was triggered by inserting "
597 f"one or more datasets of type {storage.datasetType} into "
598 f"collection '{run}'. "
599 "This probably means a dataset with the same data ID "
600 "and dataset type already exists, but it may also mean a "
601 "dimension row is missing."
602 ) from err
603 return refs
605 @transactional
606 def _importDatasets(
607 self,
608 datasets: Iterable[DatasetRef],
609 expand: bool = True,
610 idGenerationMode: DatasetIdGenEnum = DatasetIdGenEnum.UNIQUE,
611 reuseIds: bool = False,
612 ) -> List[DatasetRef]:
613 # Docstring inherited from lsst.daf.butler.registry.Registry
614 datasets = list(datasets)
615 if not datasets:
616 # nothing to do
617 return []
619 # find dataset type
620 datasetTypes = set(dataset.datasetType for dataset in datasets)
621 if len(datasetTypes) != 1:
622 raise DatasetTypeError(f"Multiple dataset types in input datasets: {datasetTypes}")
623 datasetType = datasetTypes.pop()
625 # get storage handler for this dataset type
626 storage = self._managers.datasets.find(datasetType.name)
627 if storage is None:
628 raise DatasetTypeError(f"DatasetType '{datasetType}' has not been registered.")
630 # find run name
631 runs = set(dataset.run for dataset in datasets)
632 if len(runs) != 1:
633 raise ValueError(f"Multiple run names in input datasets: {runs}")
634 run = runs.pop()
635 if run is None:
636 if self.defaults.run is None:
637 raise NoDefaultCollectionError(
638 "No run provided to ingestDatasets, and no default from registry construction."
639 )
640 run = self.defaults.run
642 runRecord = self._managers.collections.find(run)
643 if runRecord.type is not CollectionType.RUN:
644 raise CollectionTypeError(
645 f"Given collection '{runRecord.name}' is of type {runRecord.type.name};"
646 " RUN collection required."
647 )
648 assert isinstance(runRecord, RunRecord)
650 progress = Progress("daf.butler.Registry.insertDatasets", level=logging.DEBUG)
651 if expand:
652 expandedDatasets = [
653 dataset.expanded(self.expandDataId(dataset.dataId, graph=storage.datasetType.dimensions))
654 for dataset in progress.wrap(datasets, f"Expanding {storage.datasetType.name} data IDs")
655 ]
656 else:
657 expandedDatasets = [
658 DatasetRef(datasetType, dataset.dataId, id=dataset.id, run=dataset.run, conform=True)
659 for dataset in datasets
660 ]
662 try:
663 refs = list(storage.import_(runRecord, expandedDatasets, idGenerationMode, reuseIds))
664 if self._managers.obscore:
665 context = queries.SqlQueryContext(self._db, self._managers.column_types)
666 self._managers.obscore.add_datasets(refs, context)
667 except sqlalchemy.exc.IntegrityError as err:
668 raise ConflictingDefinitionError(
669 "A database constraint failure was triggered by inserting "
670 f"one or more datasets of type {storage.datasetType} into "
671 f"collection '{run}'. "
672 "This probably means a dataset with the same data ID "
673 "and dataset type already exists, but it may also mean a "
674 "dimension row is missing."
675 ) from err
676 return refs
678 def getDataset(self, id: DatasetId) -> Optional[DatasetRef]:
679 # Docstring inherited from lsst.daf.butler.registry.Registry
680 return self._managers.datasets.getDatasetRef(id)
682 @transactional
683 def removeDatasets(self, refs: Iterable[DatasetRef]) -> None:
684 # Docstring inherited from lsst.daf.butler.registry.Registry
685 progress = Progress("lsst.daf.butler.Registry.removeDatasets", level=logging.DEBUG)
686 for datasetType, refsForType in progress.iter_item_chunks(
687 DatasetRef.groupByType(refs).items(), desc="Removing datasets by type"
688 ):
689 storage = self._managers.datasets[datasetType.name]
690 try:
691 storage.delete(refsForType)
692 except sqlalchemy.exc.IntegrityError as err:
693 raise OrphanedRecordError(
694 "One or more datasets is still present in one or more Datastores."
695 ) from err
697 @transactional
698 def associate(self, collection: str, refs: Iterable[DatasetRef]) -> None:
699 # Docstring inherited from lsst.daf.butler.registry.Registry
700 progress = Progress("lsst.daf.butler.Registry.associate", level=logging.DEBUG)
701 collectionRecord = self._managers.collections.find(collection)
702 if collectionRecord.type is not CollectionType.TAGGED:
703 raise CollectionTypeError(
704 f"Collection '{collection}' has type {collectionRecord.type.name}, not TAGGED."
705 )
706 for datasetType, refsForType in progress.iter_item_chunks(
707 DatasetRef.groupByType(refs).items(), desc="Associating datasets by type"
708 ):
709 storage = self._managers.datasets[datasetType.name]
710 try:
711 storage.associate(collectionRecord, refsForType)
712 if self._managers.obscore:
713 # If a TAGGED collection is being monitored by ObsCore
714 # manager then we may need to save the dataset.
715 context = queries.SqlQueryContext(self._db, self._managers.column_types)
716 self._managers.obscore.associate(refsForType, collectionRecord, context)
717 except sqlalchemy.exc.IntegrityError as err:
718 raise ConflictingDefinitionError(
719 f"Constraint violation while associating dataset of type {datasetType.name} with "
720 f"collection {collection}. This probably means that one or more datasets with the same "
721 "dataset type and data ID already exist in the collection, but it may also indicate "
722 "that the datasets do not exist."
723 ) from err
725 @transactional
726 def disassociate(self, collection: str, refs: Iterable[DatasetRef]) -> None:
727 # Docstring inherited from lsst.daf.butler.registry.Registry
728 progress = Progress("lsst.daf.butler.Registry.disassociate", level=logging.DEBUG)
729 collectionRecord = self._managers.collections.find(collection)
730 if collectionRecord.type is not CollectionType.TAGGED:
731 raise CollectionTypeError(
732 f"Collection '{collection}' has type {collectionRecord.type.name}; expected TAGGED."
733 )
734 for datasetType, refsForType in progress.iter_item_chunks(
735 DatasetRef.groupByType(refs).items(), desc="Disassociating datasets by type"
736 ):
737 storage = self._managers.datasets[datasetType.name]
738 storage.disassociate(collectionRecord, refsForType)
739 if self._managers.obscore:
740 self._managers.obscore.disassociate(refsForType, collectionRecord)
742 @transactional
743 def certify(self, collection: str, refs: Iterable[DatasetRef], timespan: Timespan) -> None:
744 # Docstring inherited from lsst.daf.butler.registry.Registry
745 progress = Progress("lsst.daf.butler.Registry.certify", level=logging.DEBUG)
746 collectionRecord = self._managers.collections.find(collection)
747 for datasetType, refsForType in progress.iter_item_chunks(
748 DatasetRef.groupByType(refs).items(), desc="Certifying datasets by type"
749 ):
750 storage = self._managers.datasets[datasetType.name]
751 storage.certify(
752 collectionRecord,
753 refsForType,
754 timespan,
755 context=queries.SqlQueryContext(self._db, self._managers.column_types),
756 )
758 @transactional
759 def decertify(
760 self,
761 collection: str,
762 datasetType: Union[str, DatasetType],
763 timespan: Timespan,
764 *,
765 dataIds: Optional[Iterable[DataId]] = None,
766 ) -> None:
767 # Docstring inherited from lsst.daf.butler.registry.Registry
768 collectionRecord = self._managers.collections.find(collection)
769 if isinstance(datasetType, str):
770 storage = self._managers.datasets[datasetType]
771 else:
772 storage = self._managers.datasets[datasetType.name]
773 standardizedDataIds = None
774 if dataIds is not None:
775 standardizedDataIds = [
776 DataCoordinate.standardize(d, graph=storage.datasetType.dimensions) for d in dataIds
777 ]
778 storage.decertify(
779 collectionRecord,
780 timespan,
781 dataIds=standardizedDataIds,
782 context=queries.SqlQueryContext(self._db, self._managers.column_types),
783 )
785 def getDatastoreBridgeManager(self) -> DatastoreRegistryBridgeManager:
786 """Return an object that allows a new `Datastore` instance to
787 communicate with this `Registry`.
789 Returns
790 -------
791 manager : `DatastoreRegistryBridgeManager`
792 Object that mediates communication between this `Registry` and its
793 associated datastores.
794 """
795 return self._managers.datastores
797 def getDatasetLocations(self, ref: DatasetRef) -> Iterable[str]:
798 # Docstring inherited from lsst.daf.butler.registry.Registry
799 return self._managers.datastores.findDatastores(ref)
801 def expandDataId(
802 self,
803 dataId: Optional[DataId] = None,
804 *,
805 graph: Optional[DimensionGraph] = None,
806 records: Optional[NameLookupMapping[DimensionElement, Optional[DimensionRecord]]] = None,
807 withDefaults: bool = True,
808 **kwargs: Any,
809 ) -> DataCoordinate:
810 # Docstring inherited from lsst.daf.butler.registry.Registry
811 if not withDefaults:
812 defaults = None
813 else:
814 defaults = self.defaults.dataId
815 try:
816 standardized = DataCoordinate.standardize(
817 dataId, graph=graph, universe=self.dimensions, defaults=defaults, **kwargs
818 )
819 except KeyError as exc:
820 # This means either kwargs have some odd name or required
821 # dimension is missing.
822 raise DimensionNameError(str(exc)) from exc
823 if standardized.hasRecords():
824 return standardized
825 if records is None:
826 records = {}
827 elif isinstance(records, NamedKeyMapping):
828 records = records.byName()
829 else:
830 records = dict(records)
831 if isinstance(dataId, DataCoordinate) and dataId.hasRecords():
832 records.update(dataId.records.byName())
833 keys = standardized.byName()
834 context = queries.SqlQueryContext(self._db, self._managers.column_types)
835 for element in standardized.graph.primaryKeyTraversalOrder:
836 record = records.get(element.name, ...) # Use ... to mean not found; None might mean NULL
837 if record is ...:
838 if isinstance(element, Dimension) and keys.get(element.name) is None:
839 if element in standardized.graph.required:
840 raise DimensionNameError(
841 f"No value or null value for required dimension {element.name}."
842 )
843 keys[element.name] = None
844 record = None
845 else:
846 storage = self._managers.dimensions[element]
847 record = storage.fetch_one(DataCoordinate.standardize(keys, graph=element.graph), context)
848 records[element.name] = record
849 if record is not None:
850 for d in element.implied:
851 value = getattr(record, d.name)
852 if keys.setdefault(d.name, value) != value:
853 raise InconsistentDataIdError(
854 f"Data ID {standardized} has {d.name}={keys[d.name]!r}, "
855 f"but {element.name} implies {d.name}={value!r}."
856 )
857 else:
858 if element in standardized.graph.required:
859 raise DataIdValueError(
860 f"Could not fetch record for required dimension {element.name} via keys {keys}."
861 )
862 if element.alwaysJoin:
863 raise InconsistentDataIdError(
864 f"Could not fetch record for element {element.name} via keys {keys}, ",
865 "but it is marked alwaysJoin=True; this means one or more dimensions are not "
866 "related.",
867 )
868 for d in element.implied:
869 keys.setdefault(d.name, None)
870 records.setdefault(d.name, None)
871 return DataCoordinate.standardize(keys, graph=standardized.graph).expanded(records=records)
873 def insertDimensionData(
874 self,
875 element: Union[DimensionElement, str],
876 *data: Union[Mapping[str, Any], DimensionRecord],
877 conform: bool = True,
878 replace: bool = False,
879 skip_existing: bool = False,
880 ) -> None:
881 # Docstring inherited from lsst.daf.butler.registry.Registry
882 if conform:
883 if isinstance(element, str):
884 element = self.dimensions[element]
885 records = [
886 row if isinstance(row, DimensionRecord) else element.RecordClass(**row) for row in data
887 ]
888 else:
889 # Ignore typing since caller said to trust them with conform=False.
890 records = data # type: ignore
891 storage = self._managers.dimensions[element]
892 storage.insert(*records, replace=replace, skip_existing=skip_existing)
894 def syncDimensionData(
895 self,
896 element: Union[DimensionElement, str],
897 row: Union[Mapping[str, Any], DimensionRecord],
898 conform: bool = True,
899 update: bool = False,
900 ) -> Union[bool, Dict[str, Any]]:
901 # Docstring inherited from lsst.daf.butler.registry.Registry
902 if conform:
903 if isinstance(element, str):
904 element = self.dimensions[element]
905 record = row if isinstance(row, DimensionRecord) else element.RecordClass(**row)
906 else:
907 # Ignore typing since caller said to trust them with conform=False.
908 record = row # type: ignore
909 storage = self._managers.dimensions[element]
910 return storage.sync(record, update=update)
912 def queryDatasetTypes(
913 self,
914 expression: Any = ...,
915 *,
916 components: Optional[bool] = None,
917 missing: Optional[List[str]] = None,
918 ) -> Iterable[DatasetType]:
919 # Docstring inherited from lsst.daf.butler.registry.Registry
920 wildcard = DatasetTypeWildcard.from_expression(expression)
921 composition_dict = self._managers.datasets.resolve_wildcard(
922 wildcard,
923 components=components,
924 missing=missing,
925 )
926 result: list[DatasetType] = []
927 for parent_dataset_type, components_for_parent in composition_dict.items():
928 result.extend(
929 parent_dataset_type.makeComponentDatasetType(c) if c is not None else parent_dataset_type
930 for c in components_for_parent
931 )
932 return result
934 def queryCollections(
935 self,
936 expression: Any = ...,
937 datasetType: Optional[DatasetType] = None,
938 collectionTypes: Union[Iterable[CollectionType], CollectionType] = CollectionType.all(),
939 flattenChains: bool = False,
940 includeChains: Optional[bool] = None,
941 ) -> Sequence[str]:
942 # Docstring inherited from lsst.daf.butler.registry.Registry
944 # Right now the datasetTypes argument is completely ignored, but that
945 # is consistent with its [lack of] guarantees. DM-24939 or a follow-up
946 # ticket will take care of that.
947 try:
948 wildcard = CollectionWildcard.from_expression(expression)
949 except TypeError as exc:
950 raise CollectionExpressionError(f"Invalid collection expression '{expression}'") from exc
951 collectionTypes = ensure_iterable(collectionTypes)
952 return [
953 record.name
954 for record in self._managers.collections.resolve_wildcard(
955 wildcard,
956 collection_types=frozenset(collectionTypes),
957 flatten_chains=flattenChains,
958 include_chains=includeChains,
959 )
960 ]
962 def _makeQueryBuilder(
963 self,
964 summary: queries.QuerySummary,
965 doomed_by: Iterable[str] = (),
966 ) -> queries.QueryBuilder:
967 """Return a `QueryBuilder` instance capable of constructing and
968 managing more complex queries than those obtainable via `Registry`
969 interfaces.
971 This is an advanced interface; downstream code should prefer
972 `Registry.queryDataIds` and `Registry.queryDatasets` whenever those
973 are sufficient.
975 Parameters
976 ----------
977 summary : `queries.QuerySummary`
978 Object describing and categorizing the full set of dimensions that
979 will be included in the query.
980 doomed_by : `Iterable` of `str`, optional
981 A list of diagnostic messages that indicate why the query is going
982 to yield no results and should not even be executed. If an empty
983 container (default) the query will be executed unless other code
984 determines that it is doomed.
986 Returns
987 -------
988 builder : `queries.QueryBuilder`
989 Object that can be used to construct and perform advanced queries.
990 """
991 doomed_by = list(doomed_by)
992 backend = queries.SqlQueryBackend(self._db, self._managers)
993 context = backend.context()
994 relation: Relation | None = None
995 if doomed_by:
996 relation = LeafRelation.make_doomed(context.sql_engine, set(), doomed_by)
997 return queries.QueryBuilder(
998 summary,
999 backend=backend,
1000 context=context,
1001 relation=relation,
1002 )
1004 def _standardize_query_data_id_args(
1005 self, data_id: DataId | None, *, doomed_by: list[str], **kwargs: Any
1006 ) -> DataCoordinate:
1007 """Preprocess the data ID arguments passed to query* methods.
1009 Parameters
1010 ----------
1011 data_id : `DataId` or `None`
1012 Data ID that constrains the query results.
1013 doomed_by : `list` [ `str` ]
1014 List to append messages indicating why the query is doomed to
1015 yield no results.
1016 **kwargs
1017 Additional data ID key-value pairs, extending and overriding
1018 ``data_id``.
1020 Returns
1021 -------
1022 data_id : `DataCoordinate`
1023 Standardized data ID. Will be fully expanded unless expansion
1024 fails, in which case a message will be appended to ``doomed_by``
1025 on return.
1026 """
1027 try:
1028 return self.expandDataId(data_id, **kwargs)
1029 except DataIdValueError as err:
1030 doomed_by.append(str(err))
1031 return DataCoordinate.standardize(
1032 data_id, **kwargs, universe=self.dimensions, defaults=self.defaults.dataId
1033 )
1035 def _standardize_query_dataset_args(
1036 self,
1037 datasets: Any,
1038 collections: Any,
1039 components: bool | None,
1040 mode: Literal["find_first"] | Literal["find_all"] | Literal["constrain"] = "constrain",
1041 *,
1042 doomed_by: list[str],
1043 ) -> tuple[dict[DatasetType, list[str | None]], CollectionWildcard | None]:
1044 """Preprocess dataset arguments passed to query* methods.
1046 Parameters
1047 ----------
1048 datasets : `DatasetType`, `str`, `re.Pattern`, or iterable of these
1049 Expression identifying dataset types. See `queryDatasetTypes` for
1050 details.
1051 collections : `str`, `re.Pattern`, or iterable of these
1052 Expression identifying collections to be searched. See
1053 `queryCollections` for details.
1054 components : `bool`, optional
1055 If `True`, apply all expression patterns to component dataset type
1056 names as well. If `False`, never apply patterns to components.
1057 If `None` (default), apply patterns to components only if their
1058 parent datasets were not matched by the expression.
1059 Fully-specified component datasets (`str` or `DatasetType`
1060 instances) are always included.
1062 Values other than `False` are deprecated, and only `False` will be
1063 supported after v26. After v27 this argument will be removed
1064 entirely.
1065 mode : `str`, optional
1066 The way in which datasets are being used in this query; one of:
1068 - "find_first": this is a query for the first dataset in an
1069 ordered list of collections. Prohibits collection wildcards,
1070 but permits dataset type wildcards.
1072 - "find_all": this is a query for all datasets in all matched
1073 collections. Permits collection and dataset type wildcards.
1075 - "constrain": this is a query for something other than datasets,
1076 with results constrained by dataset existence. Permits
1077 collection wildcards and prohibits ``...`` as a dataset type
1078 wildcard.
1079 doomed_by : `list` [ `str` ]
1080 List to append messages indicating why the query is doomed to
1081 yield no results.
1083 Returns
1084 -------
1085 composition : `defaultdict` [ `DatasetType`, `list` [ `str` ] ]
1086 Dictionary mapping parent dataset type to `list` of components
1087 matched for that dataset type (or `None` for the parent itself).
1088 collections : `CollectionWildcard`
1089 Processed collection expression.
1090 """
1091 composition: dict[DatasetType, list[str | None]] = {}
1092 if datasets is not None:
1093 if not collections:
1094 if not self.defaults.collections:
1095 raise NoDefaultCollectionError("No collections, and no registry default collections.")
1096 collections = self.defaults.collections
1097 else:
1098 collections = CollectionWildcard.from_expression(collections)
1099 if mode == "find_first" and collections.patterns:
1100 raise TypeError(
1101 f"Collection pattern(s) {collections.patterns} not allowed in this context."
1102 )
1103 missing: list[str] = []
1104 composition = self._managers.datasets.resolve_wildcard(
1105 datasets, components=components, missing=missing, explicit_only=(mode == "constrain")
1106 )
1107 if missing and mode == "constrain":
1108 # After v26 this should raise MissingDatasetTypeError, to be
1109 # implemented on DM-36303.
1110 warnings.warn(
1111 f"Dataset type(s) {missing} are not registered; this will be an error after v26.",
1112 FutureWarning,
1113 )
1114 doomed_by.extend(f"Dataset type {name} is not registered." for name in missing)
1115 elif collections:
1116 raise ArgumentError(f"Cannot pass 'collections' (='{collections}') without 'datasets'.")
1117 return composition, collections
1119 def queryDatasets(
1120 self,
1121 datasetType: Any,
1122 *,
1123 collections: Any = None,
1124 dimensions: Optional[Iterable[Union[Dimension, str]]] = None,
1125 dataId: Optional[DataId] = None,
1126 where: str = "",
1127 findFirst: bool = False,
1128 components: Optional[bool] = None,
1129 bind: Optional[Mapping[str, Any]] = None,
1130 check: bool = True,
1131 **kwargs: Any,
1132 ) -> queries.DatasetQueryResults:
1133 # Docstring inherited from lsst.daf.butler.registry.Registry
1134 doomed_by: list[str] = []
1135 data_id = self._standardize_query_data_id_args(dataId, doomed_by=doomed_by, **kwargs)
1136 dataset_composition, collections = self._standardize_query_dataset_args(
1137 datasetType,
1138 collections,
1139 components,
1140 mode="find_first" if findFirst else "find_all",
1141 doomed_by=doomed_by,
1142 )
1143 parent_results: list[queries.ParentDatasetQueryResults] = []
1144 for parent_dataset_type, components_for_parent in dataset_composition.items():
1145 # The full set of dimensions in the query is the combination of
1146 # those needed for the DatasetType and those explicitly requested,
1147 # if any.
1148 dimension_names = set(parent_dataset_type.dimensions.names)
1149 if dimensions is not None:
1150 dimension_names.update(self.dimensions.extract(dimensions).names)
1151 # Construct the summary structure needed to construct a
1152 # QueryBuilder.
1153 summary = queries.QuerySummary(
1154 requested=DimensionGraph(self.dimensions, names=dimension_names),
1155 data_id=data_id,
1156 expression=where,
1157 bind=bind,
1158 defaults=self.defaults.dataId,
1159 check=check,
1160 datasets=[parent_dataset_type],
1161 )
1162 builder = self._makeQueryBuilder(summary)
1163 # Add the dataset subquery to the query, telling the QueryBuilder
1164 # to include the rank of the selected collection in the results
1165 # only if we need to findFirst. Note that if any of the
1166 # collections are actually wildcard expressions, and
1167 # findFirst=True, this will raise TypeError for us.
1168 builder.joinDataset(parent_dataset_type, collections, isResult=True, findFirst=findFirst)
1169 query = builder.finish()
1170 parent_results.append(
1171 queries.ParentDatasetQueryResults(
1172 query, parent_dataset_type, components=components_for_parent
1173 )
1174 )
1175 if not parent_results:
1176 doomed_by.extend(
1177 f"No registered dataset type matching {t!r} found, so no matching datasets can "
1178 "exist in any collection."
1179 for t in ensure_iterable(datasetType)
1180 )
1181 return queries.ChainedDatasetQueryResults([], doomed_by=doomed_by)
1182 elif len(parent_results) == 1:
1183 return parent_results[0]
1184 else:
1185 return queries.ChainedDatasetQueryResults(parent_results)
1187 def queryDataIds(
1188 self,
1189 dimensions: Union[Iterable[Union[Dimension, str]], Dimension, str],
1190 *,
1191 dataId: Optional[DataId] = None,
1192 datasets: Any = None,
1193 collections: Any = None,
1194 where: str = "",
1195 components: Optional[bool] = None,
1196 bind: Optional[Mapping[str, Any]] = None,
1197 check: bool = True,
1198 **kwargs: Any,
1199 ) -> queries.DataCoordinateQueryResults:
1200 # Docstring inherited from lsst.daf.butler.registry.Registry
1201 dimensions = ensure_iterable(dimensions)
1202 requestedDimensions = self.dimensions.extract(dimensions)
1203 doomed_by: list[str] = []
1204 data_id = self._standardize_query_data_id_args(dataId, doomed_by=doomed_by, **kwargs)
1205 dataset_composition, collections = self._standardize_query_dataset_args(
1206 datasets, collections, components, doomed_by=doomed_by
1207 )
1208 summary = queries.QuerySummary(
1209 requested=requestedDimensions,
1210 data_id=data_id,
1211 expression=where,
1212 bind=bind,
1213 defaults=self.defaults.dataId,
1214 check=check,
1215 datasets=dataset_composition.keys(),
1216 )
1217 builder = self._makeQueryBuilder(summary, doomed_by=doomed_by)
1218 for datasetType in dataset_composition.keys():
1219 builder.joinDataset(datasetType, collections, isResult=False)
1220 query = builder.finish()
1222 return queries.DataCoordinateQueryResults(query)
1224 def queryDimensionRecords(
1225 self,
1226 element: Union[DimensionElement, str],
1227 *,
1228 dataId: Optional[DataId] = None,
1229 datasets: Any = None,
1230 collections: Any = None,
1231 where: str = "",
1232 components: Optional[bool] = None,
1233 bind: Optional[Mapping[str, Any]] = None,
1234 check: bool = True,
1235 **kwargs: Any,
1236 ) -> queries.DimensionRecordQueryResults:
1237 # Docstring inherited from lsst.daf.butler.registry.Registry
1238 if not isinstance(element, DimensionElement):
1239 try:
1240 element = self.dimensions[element]
1241 except KeyError as e:
1242 raise DimensionNameError(
1243 f"No such dimension '{element}', available dimensions: "
1244 + str(self.dimensions.getStaticElements())
1245 ) from e
1246 doomed_by: list[str] = []
1247 data_id = self._standardize_query_data_id_args(dataId, doomed_by=doomed_by, **kwargs)
1248 dataset_composition, collections = self._standardize_query_dataset_args(
1249 datasets, collections, components, doomed_by=doomed_by
1250 )
1251 summary = queries.QuerySummary(
1252 requested=element.graph,
1253 data_id=data_id,
1254 expression=where,
1255 bind=bind,
1256 defaults=self.defaults.dataId,
1257 check=check,
1258 datasets=dataset_composition.keys(),
1259 )
1260 builder = self._makeQueryBuilder(summary, doomed_by=doomed_by)
1261 for datasetType in dataset_composition.keys():
1262 builder.joinDataset(datasetType, collections, isResult=False)
1263 query = builder.finish().with_record_columns(element)
1264 return queries.DatabaseDimensionRecordQueryResults(query, element)
1266 def queryDatasetAssociations(
1267 self,
1268 datasetType: Union[str, DatasetType],
1269 collections: Any = ...,
1270 *,
1271 collectionTypes: Iterable[CollectionType] = CollectionType.all(),
1272 flattenChains: bool = False,
1273 ) -> Iterator[DatasetAssociation]:
1274 # Docstring inherited from lsst.daf.butler.registry.Registry
1275 if collections is None:
1276 if not self.defaults.collections:
1277 raise NoDefaultCollectionError(
1278 "No collections provided to queryDatasetAssociations, "
1279 "and no defaults from registry construction."
1280 )
1281 collections = self.defaults.collections
1282 collections = CollectionWildcard.from_expression(collections)
1283 backend = queries.SqlQueryBackend(self._db, self._managers)
1284 parent_dataset_type, _ = backend.resolve_single_dataset_type_wildcard(datasetType, components=False)
1285 timespan_tag = DatasetColumnTag(parent_dataset_type.name, "timespan")
1286 collection_tag = DatasetColumnTag(parent_dataset_type.name, "collection")
1287 for parent_collection_record in backend.resolve_collection_wildcard(
1288 collections,
1289 collection_types=frozenset(collectionTypes),
1290 flatten_chains=flattenChains,
1291 ):
1292 # Resolve this possibly-chained collection into a list of
1293 # non-CHAINED collections that actually hold datasets of this
1294 # type.
1295 candidate_collection_records = backend.resolve_dataset_collections(
1296 parent_dataset_type,
1297 CollectionWildcard.from_names([parent_collection_record.name]),
1298 allow_calibration_collections=True,
1299 governor_constraints={},
1300 )
1301 if not candidate_collection_records:
1302 continue
1303 with backend.context() as context:
1304 relation = backend.make_dataset_query_relation(
1305 parent_dataset_type,
1306 candidate_collection_records,
1307 columns={"dataset_id", "run", "timespan", "collection"},
1308 context=context,
1309 )
1310 reader = queries.DatasetRefReader(
1311 parent_dataset_type,
1312 translate_collection=lambda k: self._managers.collections[k].name,
1313 full=False,
1314 )
1315 for row in context.fetch_iterable(relation):
1316 ref = reader.read(row)
1317 collection_record = self._managers.collections[row[collection_tag]]
1318 if collection_record.type is CollectionType.CALIBRATION:
1319 timespan = row[timespan_tag]
1320 else:
1321 # For backwards compatibility and (possibly?) user
1322 # convenience we continue to define the timespan of a
1323 # DatasetAssociation row for a non-CALIBRATION
1324 # collection to be None rather than a fully unbounded
1325 # timespan.
1326 timespan = None
1327 yield DatasetAssociation(ref=ref, collection=collection_record.name, timespan=timespan)
1329 @property
1330 def obsCoreTableManager(self) -> ObsCoreTableManager | None:
1331 # Docstring inherited from lsst.daf.butler.registry.Registry
1332 return self._managers.obscore
1334 storageClasses: StorageClassFactory
1335 """All storage classes known to the registry (`StorageClassFactory`).
1336 """