Coverage for python/lsst/daf/butler/registries/sql.py: 13%
503 statements
« prev ^ index » next coverage.py v7.2.5, created at 2023-05-02 09:50 +0000
« prev ^ index » next coverage.py v7.2.5, created at 2023-05-02 09:50 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = ("SqlRegistry",)
26import contextlib
27import logging
28import warnings
29from typing import (
30 TYPE_CHECKING,
31 Any,
32 Dict,
33 Iterable,
34 Iterator,
35 List,
36 Literal,
37 Mapping,
38 Optional,
39 Sequence,
40 Set,
41 Union,
42 cast,
43)
45import sqlalchemy
46from lsst.daf.relation import LeafRelation, Relation
47from lsst.resources import ResourcePathExpression
48from lsst.utils.iteration import ensure_iterable
50from ..core import (
51 Config,
52 DataCoordinate,
53 DataId,
54 DatasetAssociation,
55 DatasetColumnTag,
56 DatasetId,
57 DatasetIdFactory,
58 DatasetIdGenEnum,
59 DatasetRef,
60 DatasetType,
61 Dimension,
62 DimensionConfig,
63 DimensionElement,
64 DimensionGraph,
65 DimensionRecord,
66 DimensionUniverse,
67 NamedKeyMapping,
68 NameLookupMapping,
69 Progress,
70 StorageClassFactory,
71 Timespan,
72 ddl,
73)
74from ..core.utils import transactional
75from ..registry import (
76 ArgumentError,
77 CollectionExpressionError,
78 CollectionSummary,
79 CollectionType,
80 CollectionTypeError,
81 ConflictingDefinitionError,
82 DataIdValueError,
83 DatasetTypeError,
84 DimensionNameError,
85 InconsistentDataIdError,
86 NoDefaultCollectionError,
87 OrphanedRecordError,
88 Registry,
89 RegistryConfig,
90 RegistryDefaults,
91 queries,
92)
93from ..registry.interfaces import ChainedCollectionRecord, RunRecord
94from ..registry.managers import RegistryManagerInstances, RegistryManagerTypes
95from ..registry.wildcards import CollectionWildcard, DatasetTypeWildcard
97if TYPE_CHECKING:
98 from .._butlerConfig import ButlerConfig
99 from ..registry.interfaces import (
100 CollectionRecord,
101 Database,
102 DatastoreRegistryBridgeManager,
103 ObsCoreTableManager,
104 )
107_LOG = logging.getLogger(__name__)
110class SqlRegistry(Registry):
111 """Registry implementation based on SQLAlchemy.
113 Parameters
114 ----------
115 database : `Database`
116 Database instance to store Registry.
117 defaults : `RegistryDefaults`
118 Default collection search path and/or output `~CollectionType.RUN`
119 collection.
120 managers : `RegistryManagerInstances`
121 All the managers required for this registry.
122 """
124 defaultConfigFile: Optional[str] = None
125 """Path to configuration defaults. Accessed within the ``configs`` resource
126 or relative to a search path. Can be None if no defaults specified.
127 """
129 @classmethod
130 def createFromConfig(
131 cls,
132 config: Optional[Union[RegistryConfig, str]] = None,
133 dimensionConfig: Optional[Union[DimensionConfig, str]] = None,
134 butlerRoot: Optional[ResourcePathExpression] = None,
135 ) -> Registry:
136 """Create registry database and return `SqlRegistry` instance.
138 This method initializes database contents, database must be empty
139 prior to calling this method.
141 Parameters
142 ----------
143 config : `RegistryConfig` or `str`, optional
144 Registry configuration, if missing then default configuration will
145 be loaded from registry.yaml.
146 dimensionConfig : `DimensionConfig` or `str`, optional
147 Dimensions configuration, if missing then default configuration
148 will be loaded from dimensions.yaml.
149 butlerRoot : convertible to `lsst.resources.ResourcePath`, optional
150 Path to the repository root this `SqlRegistry` will manage.
152 Returns
153 -------
154 registry : `SqlRegistry`
155 A new `SqlRegistry` instance.
156 """
157 config = cls.forceRegistryConfig(config)
158 config.replaceRoot(butlerRoot)
160 if isinstance(dimensionConfig, str):
161 dimensionConfig = DimensionConfig(dimensionConfig)
162 elif dimensionConfig is None:
163 dimensionConfig = DimensionConfig()
164 elif not isinstance(dimensionConfig, DimensionConfig):
165 raise TypeError(f"Incompatible Dimension configuration type: {type(dimensionConfig)}")
167 DatabaseClass = config.getDatabaseClass()
168 database = DatabaseClass.fromUri(
169 str(config.connectionString), origin=config.get("origin", 0), namespace=config.get("namespace")
170 )
171 managerTypes = RegistryManagerTypes.fromConfig(config)
172 managers = managerTypes.makeRepo(database, dimensionConfig)
173 return cls(database, RegistryDefaults(), managers)
175 @classmethod
176 def fromConfig(
177 cls,
178 config: Union[ButlerConfig, RegistryConfig, Config, str],
179 butlerRoot: Optional[ResourcePathExpression] = None,
180 writeable: bool = True,
181 defaults: Optional[RegistryDefaults] = None,
182 ) -> Registry:
183 """Create `Registry` subclass instance from `config`.
185 Registry database must be initialized prior to calling this method.
187 Parameters
188 ----------
189 config : `ButlerConfig`, `RegistryConfig`, `Config` or `str`
190 Registry configuration
191 butlerRoot : `lsst.resources.ResourcePathExpression`, optional
192 Path to the repository root this `Registry` will manage.
193 writeable : `bool`, optional
194 If `True` (default) create a read-write connection to the database.
195 defaults : `RegistryDefaults`, optional
196 Default collection search path and/or output `~CollectionType.RUN`
197 collection.
199 Returns
200 -------
201 registry : `SqlRegistry` (subclass)
202 A new `SqlRegistry` subclass instance.
203 """
204 config = cls.forceRegistryConfig(config)
205 config.replaceRoot(butlerRoot)
206 DatabaseClass = config.getDatabaseClass()
207 database = DatabaseClass.fromUri(
208 config.connectionString.render_as_string(hide_password=False),
209 origin=config.get("origin", 0),
210 namespace=config.get("namespace"),
211 writeable=writeable,
212 )
213 managerTypes = RegistryManagerTypes.fromConfig(config)
214 with database.session():
215 managers = managerTypes.loadRepo(database)
216 if defaults is None:
217 defaults = RegistryDefaults()
218 return cls(database, defaults, managers)
220 def __init__(self, database: Database, defaults: RegistryDefaults, managers: RegistryManagerInstances):
221 self._db = database
222 self._managers = managers
223 self.storageClasses = StorageClassFactory()
224 # Intentionally invoke property setter to initialize defaults. This
225 # can only be done after most of the rest of Registry has already been
226 # initialized, and must be done before the property getter is used.
227 self.defaults = defaults
228 # In the future DatasetIdFactory may become configurable and this
229 # instance will need to be shared with datasets manager.
230 self.datasetIdFactory = DatasetIdFactory()
232 def __str__(self) -> str:
233 return str(self._db)
235 def __repr__(self) -> str:
236 return f"SqlRegistry({self._db!r}, {self.dimensions!r})"
238 def isWriteable(self) -> bool:
239 # Docstring inherited from lsst.daf.butler.registry.Registry
240 return self._db.isWriteable()
242 def copy(self, defaults: Optional[RegistryDefaults] = None) -> Registry:
243 # Docstring inherited from lsst.daf.butler.registry.Registry
244 if defaults is None:
245 # No need to copy, because `RegistryDefaults` is immutable; we
246 # effectively copy on write.
247 defaults = self.defaults
248 return type(self)(self._db, defaults, self._managers)
250 @property
251 def dimensions(self) -> DimensionUniverse:
252 # Docstring inherited from lsst.daf.butler.registry.Registry
253 return self._managers.dimensions.universe
255 def refresh(self) -> None:
256 # Docstring inherited from lsst.daf.butler.registry.Registry
257 with self._db.transaction():
258 self._managers.refresh()
260 @contextlib.contextmanager
261 def transaction(self, *, savepoint: bool = False) -> Iterator[None]:
262 # Docstring inherited from lsst.daf.butler.registry.Registry
263 try:
264 with self._db.transaction(savepoint=savepoint):
265 yield
266 except BaseException:
267 # TODO: this clears the caches sometimes when we wouldn't actually
268 # need to. Can we avoid that?
269 self._managers.dimensions.clearCaches()
270 raise
272 def resetConnectionPool(self) -> None:
273 """Reset SQLAlchemy connection pool for `SqlRegistry` database.
275 This operation is useful when using registry with fork-based
276 multiprocessing. To use registry across fork boundary one has to make
277 sure that there are no currently active connections (no session or
278 transaction is in progress) and connection pool is reset using this
279 method. This method should be called by the child process immediately
280 after the fork.
281 """
282 self._db._engine.dispose()
284 def registerOpaqueTable(self, tableName: str, spec: ddl.TableSpec) -> None:
285 """Add an opaque (to the `Registry`) table for use by a `Datastore` or
286 other data repository client.
288 Opaque table records can be added via `insertOpaqueData`, retrieved via
289 `fetchOpaqueData`, and removed via `deleteOpaqueData`.
291 Parameters
292 ----------
293 tableName : `str`
294 Logical name of the opaque table. This may differ from the
295 actual name used in the database by a prefix and/or suffix.
296 spec : `ddl.TableSpec`
297 Specification for the table to be added.
298 """
299 self._managers.opaque.register(tableName, spec)
301 @transactional
302 def insertOpaqueData(self, tableName: str, *data: dict) -> None:
303 """Insert records into an opaque table.
305 Parameters
306 ----------
307 tableName : `str`
308 Logical name of the opaque table. Must match the name used in a
309 previous call to `registerOpaqueTable`.
310 data
311 Each additional positional argument is a dictionary that represents
312 a single row to be added.
313 """
314 self._managers.opaque[tableName].insert(*data)
316 def fetchOpaqueData(self, tableName: str, **where: Any) -> Iterator[Mapping[str, Any]]:
317 """Retrieve records from an opaque table.
319 Parameters
320 ----------
321 tableName : `str`
322 Logical name of the opaque table. Must match the name used in a
323 previous call to `registerOpaqueTable`.
324 where
325 Additional keyword arguments are interpreted as equality
326 constraints that restrict the returned rows (combined with AND);
327 keyword arguments are column names and values are the values they
328 must have.
330 Yields
331 ------
332 row : `dict`
333 A dictionary representing a single result row.
334 """
335 yield from self._managers.opaque[tableName].fetch(**where)
337 @transactional
338 def deleteOpaqueData(self, tableName: str, **where: Any) -> None:
339 """Remove records from an opaque table.
341 Parameters
342 ----------
343 tableName : `str`
344 Logical name of the opaque table. Must match the name used in a
345 previous call to `registerOpaqueTable`.
346 where
347 Additional keyword arguments are interpreted as equality
348 constraints that restrict the deleted rows (combined with AND);
349 keyword arguments are column names and values are the values they
350 must have.
351 """
352 self._managers.opaque[tableName].delete(where.keys(), where)
354 def registerCollection(
355 self, name: str, type: CollectionType = CollectionType.TAGGED, doc: Optional[str] = None
356 ) -> bool:
357 # Docstring inherited from lsst.daf.butler.registry.Registry
358 _, registered = self._managers.collections.register(name, type, doc=doc)
359 return registered
361 def getCollectionType(self, name: str) -> CollectionType:
362 # Docstring inherited from lsst.daf.butler.registry.Registry
363 return self._managers.collections.find(name).type
365 def _get_collection_record(self, name: str) -> CollectionRecord:
366 # Docstring inherited from lsst.daf.butler.registry.Registry
367 return self._managers.collections.find(name)
369 def registerRun(self, name: str, doc: Optional[str] = None) -> bool:
370 # Docstring inherited from lsst.daf.butler.registry.Registry
371 _, registered = self._managers.collections.register(name, CollectionType.RUN, doc=doc)
372 return registered
374 @transactional
375 def removeCollection(self, name: str) -> None:
376 # Docstring inherited from lsst.daf.butler.registry.Registry
377 self._managers.collections.remove(name)
379 def getCollectionChain(self, parent: str) -> tuple[str, ...]:
380 # Docstring inherited from lsst.daf.butler.registry.Registry
381 record = self._managers.collections.find(parent)
382 if record.type is not CollectionType.CHAINED:
383 raise CollectionTypeError(f"Collection '{parent}' has type {record.type.name}, not CHAINED.")
384 assert isinstance(record, ChainedCollectionRecord)
385 return record.children
387 @transactional
388 def setCollectionChain(self, parent: str, children: Any, *, flatten: bool = False) -> None:
389 # Docstring inherited from lsst.daf.butler.registry.Registry
390 record = self._managers.collections.find(parent)
391 if record.type is not CollectionType.CHAINED:
392 raise CollectionTypeError(f"Collection '{parent}' has type {record.type.name}, not CHAINED.")
393 assert isinstance(record, ChainedCollectionRecord)
394 children = CollectionWildcard.from_expression(children).require_ordered()
395 if children != record.children or flatten:
396 record.update(self._managers.collections, children, flatten=flatten)
398 def getCollectionParentChains(self, collection: str) -> Set[str]:
399 # Docstring inherited from lsst.daf.butler.registry.Registry
400 return {
401 record.name
402 for record in self._managers.collections.getParentChains(
403 self._managers.collections.find(collection).key
404 )
405 }
407 def getCollectionDocumentation(self, collection: str) -> Optional[str]:
408 # Docstring inherited from lsst.daf.butler.registry.Registry
409 return self._managers.collections.getDocumentation(self._managers.collections.find(collection).key)
411 def setCollectionDocumentation(self, collection: str, doc: Optional[str]) -> None:
412 # Docstring inherited from lsst.daf.butler.registry.Registry
413 self._managers.collections.setDocumentation(self._managers.collections.find(collection).key, doc)
415 def getCollectionSummary(self, collection: str) -> CollectionSummary:
416 # Docstring inherited from lsst.daf.butler.registry.Registry
417 record = self._managers.collections.find(collection)
418 return self._managers.datasets.getCollectionSummary(record)
420 def registerDatasetType(self, datasetType: DatasetType) -> bool:
421 # Docstring inherited from lsst.daf.butler.registry.Registry
422 _, inserted = self._managers.datasets.register(datasetType)
423 return inserted
425 def removeDatasetType(self, name: str | tuple[str, ...]) -> None:
426 # Docstring inherited from lsst.daf.butler.registry.Registry
428 for datasetTypeExpression in ensure_iterable(name):
429 datasetTypes = list(self.queryDatasetTypes(datasetTypeExpression))
430 if not datasetTypes:
431 _LOG.info("Dataset type %r not defined", datasetTypeExpression)
432 else:
433 for datasetType in datasetTypes:
434 self._managers.datasets.remove(datasetType.name)
435 _LOG.info("Removed dataset type %r", datasetType.name)
437 def getDatasetType(self, name: str) -> DatasetType:
438 # Docstring inherited from lsst.daf.butler.registry.Registry
439 parent_name, component = DatasetType.splitDatasetTypeName(name)
440 storage = self._managers.datasets[parent_name]
441 if component is None:
442 return storage.datasetType
443 else:
444 return storage.datasetType.makeComponentDatasetType(component)
446 def supportsIdGenerationMode(self, mode: DatasetIdGenEnum) -> bool:
447 # Docstring inherited from lsst.daf.butler.registry.Registry
448 return self._managers.datasets.supportsIdGenerationMode(mode)
450 def findDataset(
451 self,
452 datasetType: Union[DatasetType, str],
453 dataId: Optional[DataId] = None,
454 *,
455 collections: Any = None,
456 timespan: Optional[Timespan] = None,
457 **kwargs: Any,
458 ) -> Optional[DatasetRef]:
459 # Docstring inherited from lsst.daf.butler.registry.Registry
460 if collections is None:
461 if not self.defaults.collections:
462 raise NoDefaultCollectionError(
463 "No collections provided to findDataset, and no defaults from registry construction."
464 )
465 collections = self.defaults.collections
466 backend = queries.SqlQueryBackend(self._db, self._managers)
467 collection_wildcard = CollectionWildcard.from_expression(collections, require_ordered=True)
468 matched_collections = backend.resolve_collection_wildcard(collection_wildcard)
469 parent_dataset_type, components = backend.resolve_single_dataset_type_wildcard(
470 datasetType, components_deprecated=False
471 )
472 if len(components) > 1:
473 raise DatasetTypeError(
474 f"findDataset requires exactly one dataset type; got multiple components {components} "
475 f"for parent dataset type {parent_dataset_type.name}."
476 )
477 component = components[0]
478 dataId = DataCoordinate.standardize(
479 dataId,
480 graph=parent_dataset_type.dimensions,
481 universe=self.dimensions,
482 defaults=self.defaults.dataId,
483 **kwargs,
484 )
485 governor_constraints = {name: {cast(str, dataId[name])} for name in dataId.graph.governors.names}
486 (filtered_collections,) = backend.filter_dataset_collections(
487 [parent_dataset_type],
488 matched_collections,
489 governor_constraints=governor_constraints,
490 ).values()
491 if not filtered_collections:
492 return None
493 if timespan is None:
494 filtered_collections = [
495 collection_record
496 for collection_record in filtered_collections
497 if collection_record.type is not CollectionType.CALIBRATION
498 ]
499 if filtered_collections:
500 requested_columns = {"dataset_id", "run", "collection"}
501 with backend.context() as context:
502 predicate = context.make_data_coordinate_predicate(
503 dataId.subset(parent_dataset_type.dimensions), full=False
504 )
505 if timespan is not None:
506 requested_columns.add("timespan")
507 predicate = predicate.logical_and(
508 context.make_timespan_overlap_predicate(
509 DatasetColumnTag(parent_dataset_type.name, "timespan"), timespan
510 )
511 )
512 relation = backend.make_dataset_query_relation(
513 parent_dataset_type, filtered_collections, requested_columns, context
514 ).with_rows_satisfying(predicate)
515 rows = list(context.fetch_iterable(relation))
516 else:
517 rows = []
518 if not rows:
519 return None
520 elif len(rows) == 1:
521 best_row = rows[0]
522 else:
523 rank_by_collection_key = {record.key: n for n, record in enumerate(filtered_collections)}
524 collection_tag = DatasetColumnTag(parent_dataset_type.name, "collection")
525 row_iter = iter(rows)
526 best_row = next(row_iter)
527 best_rank = rank_by_collection_key[best_row[collection_tag]]
528 have_tie = False
529 for row in row_iter:
530 if (rank := rank_by_collection_key[row[collection_tag]]) < best_rank:
531 best_row = row
532 best_rank = rank
533 have_tie = False
534 elif rank == best_rank:
535 have_tie = True
536 assert timespan is not None, "Rank ties should be impossible given DB constraints."
537 if have_tie:
538 raise LookupError(
539 f"Ambiguous calibration lookup for {parent_dataset_type.name} in collections "
540 f"{collection_wildcard.strings} with timespan {timespan}."
541 )
542 reader = queries.DatasetRefReader(
543 parent_dataset_type,
544 translate_collection=lambda k: self._managers.collections[k].name,
545 )
546 ref = reader.read(best_row, data_id=dataId)
547 if component is not None:
548 ref = ref.makeComponentRef(component)
549 return ref
551 @transactional
552 def insertDatasets(
553 self,
554 datasetType: Union[DatasetType, str],
555 dataIds: Iterable[DataId],
556 run: Optional[str] = None,
557 expand: bool = True,
558 idGenerationMode: DatasetIdGenEnum = DatasetIdGenEnum.UNIQUE,
559 ) -> List[DatasetRef]:
560 # Docstring inherited from lsst.daf.butler.registry.Registry
561 if isinstance(datasetType, DatasetType):
562 storage = self._managers.datasets.find(datasetType.name)
563 if storage is None:
564 raise DatasetTypeError(f"DatasetType '{datasetType}' has not been registered.")
565 else:
566 storage = self._managers.datasets.find(datasetType)
567 if storage is None:
568 raise DatasetTypeError(f"DatasetType with name '{datasetType}' has not been registered.")
569 if run is None:
570 if self.defaults.run is None:
571 raise NoDefaultCollectionError(
572 "No run provided to insertDatasets, and no default from registry construction."
573 )
574 run = self.defaults.run
575 runRecord = self._managers.collections.find(run)
576 if runRecord.type is not CollectionType.RUN:
577 raise CollectionTypeError(
578 f"Given collection is of type {runRecord.type.name}; RUN collection required."
579 )
580 assert isinstance(runRecord, RunRecord)
581 progress = Progress("daf.butler.Registry.insertDatasets", level=logging.DEBUG)
582 if expand:
583 expandedDataIds = [
584 self.expandDataId(dataId, graph=storage.datasetType.dimensions)
585 for dataId in progress.wrap(dataIds, f"Expanding {storage.datasetType.name} data IDs")
586 ]
587 else:
588 expandedDataIds = [
589 DataCoordinate.standardize(dataId, graph=storage.datasetType.dimensions) for dataId in dataIds
590 ]
591 try:
592 refs = list(storage.insert(runRecord, expandedDataIds, idGenerationMode))
593 if self._managers.obscore:
594 context = queries.SqlQueryContext(self._db, self._managers.column_types)
595 self._managers.obscore.add_datasets(refs, context)
596 except sqlalchemy.exc.IntegrityError as err:
597 raise ConflictingDefinitionError(
598 "A database constraint failure was triggered by inserting "
599 f"one or more datasets of type {storage.datasetType} into "
600 f"collection '{run}'. "
601 "This probably means a dataset with the same data ID "
602 "and dataset type already exists, but it may also mean a "
603 "dimension row is missing."
604 ) from err
605 return refs
607 @transactional
608 def _importDatasets(
609 self,
610 datasets: Iterable[DatasetRef],
611 expand: bool = True,
612 idGenerationMode: DatasetIdGenEnum = DatasetIdGenEnum.UNIQUE,
613 reuseIds: bool = False,
614 ) -> List[DatasetRef]:
615 # Docstring inherited from lsst.daf.butler.registry.Registry
616 datasets = list(datasets)
617 if not datasets:
618 # nothing to do
619 return []
621 # find dataset type
622 datasetTypes = set(dataset.datasetType for dataset in datasets)
623 if len(datasetTypes) != 1:
624 raise DatasetTypeError(f"Multiple dataset types in input datasets: {datasetTypes}")
625 datasetType = datasetTypes.pop()
627 # get storage handler for this dataset type
628 storage = self._managers.datasets.find(datasetType.name)
629 if storage is None:
630 raise DatasetTypeError(f"DatasetType '{datasetType}' has not been registered.")
632 # find run name
633 runs = set(dataset.run for dataset in datasets)
634 if len(runs) != 1:
635 raise ValueError(f"Multiple run names in input datasets: {runs}")
636 run = runs.pop()
637 if run is None:
638 if self.defaults.run is None:
639 raise NoDefaultCollectionError(
640 "No run provided to ingestDatasets, and no default from registry construction."
641 )
642 run = self.defaults.run
644 runRecord = self._managers.collections.find(run)
645 if runRecord.type is not CollectionType.RUN:
646 raise CollectionTypeError(
647 f"Given collection '{runRecord.name}' is of type {runRecord.type.name};"
648 " RUN collection required."
649 )
650 assert isinstance(runRecord, RunRecord)
652 progress = Progress("daf.butler.Registry.insertDatasets", level=logging.DEBUG)
653 if expand:
654 expandedDatasets = [
655 dataset.expanded(self.expandDataId(dataset.dataId, graph=storage.datasetType.dimensions))
656 for dataset in progress.wrap(datasets, f"Expanding {storage.datasetType.name} data IDs")
657 ]
658 else:
659 expandedDatasets = [
660 DatasetRef(datasetType, dataset.dataId, id=dataset.id, run=dataset.run, conform=True)
661 for dataset in datasets
662 ]
664 try:
665 refs = list(storage.import_(runRecord, expandedDatasets, idGenerationMode, reuseIds))
666 if self._managers.obscore:
667 context = queries.SqlQueryContext(self._db, self._managers.column_types)
668 self._managers.obscore.add_datasets(refs, context)
669 except sqlalchemy.exc.IntegrityError as err:
670 raise ConflictingDefinitionError(
671 "A database constraint failure was triggered by inserting "
672 f"one or more datasets of type {storage.datasetType} into "
673 f"collection '{run}'. "
674 "This probably means a dataset with the same data ID "
675 "and dataset type already exists, but it may also mean a "
676 "dimension row is missing."
677 ) from err
678 return refs
680 def getDataset(self, id: DatasetId) -> Optional[DatasetRef]:
681 # Docstring inherited from lsst.daf.butler.registry.Registry
682 return self._managers.datasets.getDatasetRef(id)
684 @transactional
685 def removeDatasets(self, refs: Iterable[DatasetRef]) -> None:
686 # Docstring inherited from lsst.daf.butler.registry.Registry
687 progress = Progress("lsst.daf.butler.Registry.removeDatasets", level=logging.DEBUG)
688 for datasetType, refsForType in progress.iter_item_chunks(
689 DatasetRef.groupByType(refs).items(), desc="Removing datasets by type"
690 ):
691 storage = self._managers.datasets[datasetType.name]
692 try:
693 storage.delete(refsForType)
694 except sqlalchemy.exc.IntegrityError as err:
695 raise OrphanedRecordError(
696 "One or more datasets is still present in one or more Datastores."
697 ) from err
699 @transactional
700 def associate(self, collection: str, refs: Iterable[DatasetRef]) -> None:
701 # Docstring inherited from lsst.daf.butler.registry.Registry
702 progress = Progress("lsst.daf.butler.Registry.associate", level=logging.DEBUG)
703 collectionRecord = self._managers.collections.find(collection)
704 if collectionRecord.type is not CollectionType.TAGGED:
705 raise CollectionTypeError(
706 f"Collection '{collection}' has type {collectionRecord.type.name}, not TAGGED."
707 )
708 for datasetType, refsForType in progress.iter_item_chunks(
709 DatasetRef.groupByType(refs).items(), desc="Associating datasets by type"
710 ):
711 storage = self._managers.datasets[datasetType.name]
712 try:
713 storage.associate(collectionRecord, refsForType)
714 if self._managers.obscore:
715 # If a TAGGED collection is being monitored by ObsCore
716 # manager then we may need to save the dataset.
717 context = queries.SqlQueryContext(self._db, self._managers.column_types)
718 self._managers.obscore.associate(refsForType, collectionRecord, context)
719 except sqlalchemy.exc.IntegrityError as err:
720 raise ConflictingDefinitionError(
721 f"Constraint violation while associating dataset of type {datasetType.name} with "
722 f"collection {collection}. This probably means that one or more datasets with the same "
723 "dataset type and data ID already exist in the collection, but it may also indicate "
724 "that the datasets do not exist."
725 ) from err
727 @transactional
728 def disassociate(self, collection: str, refs: Iterable[DatasetRef]) -> None:
729 # Docstring inherited from lsst.daf.butler.registry.Registry
730 progress = Progress("lsst.daf.butler.Registry.disassociate", level=logging.DEBUG)
731 collectionRecord = self._managers.collections.find(collection)
732 if collectionRecord.type is not CollectionType.TAGGED:
733 raise CollectionTypeError(
734 f"Collection '{collection}' has type {collectionRecord.type.name}; expected TAGGED."
735 )
736 for datasetType, refsForType in progress.iter_item_chunks(
737 DatasetRef.groupByType(refs).items(), desc="Disassociating datasets by type"
738 ):
739 storage = self._managers.datasets[datasetType.name]
740 storage.disassociate(collectionRecord, refsForType)
741 if self._managers.obscore:
742 self._managers.obscore.disassociate(refsForType, collectionRecord)
744 @transactional
745 def certify(self, collection: str, refs: Iterable[DatasetRef], timespan: Timespan) -> None:
746 # Docstring inherited from lsst.daf.butler.registry.Registry
747 progress = Progress("lsst.daf.butler.Registry.certify", level=logging.DEBUG)
748 collectionRecord = self._managers.collections.find(collection)
749 for datasetType, refsForType in progress.iter_item_chunks(
750 DatasetRef.groupByType(refs).items(), desc="Certifying datasets by type"
751 ):
752 storage = self._managers.datasets[datasetType.name]
753 storage.certify(
754 collectionRecord,
755 refsForType,
756 timespan,
757 context=queries.SqlQueryContext(self._db, self._managers.column_types),
758 )
760 @transactional
761 def decertify(
762 self,
763 collection: str,
764 datasetType: Union[str, DatasetType],
765 timespan: Timespan,
766 *,
767 dataIds: Optional[Iterable[DataId]] = None,
768 ) -> None:
769 # Docstring inherited from lsst.daf.butler.registry.Registry
770 collectionRecord = self._managers.collections.find(collection)
771 if isinstance(datasetType, str):
772 storage = self._managers.datasets[datasetType]
773 else:
774 storage = self._managers.datasets[datasetType.name]
775 standardizedDataIds = None
776 if dataIds is not None:
777 standardizedDataIds = [
778 DataCoordinate.standardize(d, graph=storage.datasetType.dimensions) for d in dataIds
779 ]
780 storage.decertify(
781 collectionRecord,
782 timespan,
783 dataIds=standardizedDataIds,
784 context=queries.SqlQueryContext(self._db, self._managers.column_types),
785 )
787 def getDatastoreBridgeManager(self) -> DatastoreRegistryBridgeManager:
788 """Return an object that allows a new `Datastore` instance to
789 communicate with this `Registry`.
791 Returns
792 -------
793 manager : `DatastoreRegistryBridgeManager`
794 Object that mediates communication between this `Registry` and its
795 associated datastores.
796 """
797 return self._managers.datastores
799 def getDatasetLocations(self, ref: DatasetRef) -> Iterable[str]:
800 # Docstring inherited from lsst.daf.butler.registry.Registry
801 return self._managers.datastores.findDatastores(ref)
803 def expandDataId(
804 self,
805 dataId: Optional[DataId] = None,
806 *,
807 graph: Optional[DimensionGraph] = None,
808 records: Optional[NameLookupMapping[DimensionElement, Optional[DimensionRecord]]] = None,
809 withDefaults: bool = True,
810 **kwargs: Any,
811 ) -> DataCoordinate:
812 # Docstring inherited from lsst.daf.butler.registry.Registry
813 if not withDefaults:
814 defaults = None
815 else:
816 defaults = self.defaults.dataId
817 try:
818 standardized = DataCoordinate.standardize(
819 dataId, graph=graph, universe=self.dimensions, defaults=defaults, **kwargs
820 )
821 except KeyError as exc:
822 # This means either kwargs have some odd name or required
823 # dimension is missing.
824 raise DimensionNameError(str(exc)) from exc
825 if standardized.hasRecords():
826 return standardized
827 if records is None:
828 records = {}
829 elif isinstance(records, NamedKeyMapping):
830 records = records.byName()
831 else:
832 records = dict(records)
833 if isinstance(dataId, DataCoordinate) and dataId.hasRecords():
834 records.update(dataId.records.byName())
835 keys = standardized.byName()
836 context = queries.SqlQueryContext(self._db, self._managers.column_types)
837 for element in standardized.graph.primaryKeyTraversalOrder:
838 record = records.get(element.name, ...) # Use ... to mean not found; None might mean NULL
839 if record is ...:
840 if isinstance(element, Dimension) and keys.get(element.name) is None:
841 if element in standardized.graph.required:
842 raise DimensionNameError(
843 f"No value or null value for required dimension {element.name}."
844 )
845 keys[element.name] = None
846 record = None
847 else:
848 storage = self._managers.dimensions[element]
849 record = storage.fetch_one(DataCoordinate.standardize(keys, graph=element.graph), context)
850 records[element.name] = record
851 if record is not None:
852 for d in element.implied:
853 value = getattr(record, d.name)
854 if keys.setdefault(d.name, value) != value:
855 raise InconsistentDataIdError(
856 f"Data ID {standardized} has {d.name}={keys[d.name]!r}, "
857 f"but {element.name} implies {d.name}={value!r}."
858 )
859 else:
860 if element in standardized.graph.required:
861 raise DataIdValueError(
862 f"Could not fetch record for required dimension {element.name} via keys {keys}."
863 )
864 if element.alwaysJoin:
865 raise InconsistentDataIdError(
866 f"Could not fetch record for element {element.name} via keys {keys}, ",
867 "but it is marked alwaysJoin=True; this means one or more dimensions are not "
868 "related.",
869 )
870 for d in element.implied:
871 keys.setdefault(d.name, None)
872 records.setdefault(d.name, None)
873 return DataCoordinate.standardize(keys, graph=standardized.graph).expanded(records=records)
875 def insertDimensionData(
876 self,
877 element: Union[DimensionElement, str],
878 *data: Union[Mapping[str, Any], DimensionRecord],
879 conform: bool = True,
880 replace: bool = False,
881 skip_existing: bool = False,
882 ) -> None:
883 # Docstring inherited from lsst.daf.butler.registry.Registry
884 if conform:
885 if isinstance(element, str):
886 element = self.dimensions[element]
887 records = [
888 row if isinstance(row, DimensionRecord) else element.RecordClass(**row) for row in data
889 ]
890 else:
891 # Ignore typing since caller said to trust them with conform=False.
892 records = data # type: ignore
893 storage = self._managers.dimensions[element]
894 storage.insert(*records, replace=replace, skip_existing=skip_existing)
896 def syncDimensionData(
897 self,
898 element: Union[DimensionElement, str],
899 row: Union[Mapping[str, Any], DimensionRecord],
900 conform: bool = True,
901 update: bool = False,
902 ) -> Union[bool, Dict[str, Any]]:
903 # Docstring inherited from lsst.daf.butler.registry.Registry
904 if conform:
905 if isinstance(element, str):
906 element = self.dimensions[element]
907 record = row if isinstance(row, DimensionRecord) else element.RecordClass(**row)
908 else:
909 # Ignore typing since caller said to trust them with conform=False.
910 record = row # type: ignore
911 storage = self._managers.dimensions[element]
912 return storage.sync(record, update=update)
914 def queryDatasetTypes(
915 self,
916 expression: Any = ...,
917 *,
918 components: Optional[bool] = None,
919 missing: Optional[List[str]] = None,
920 ) -> Iterable[DatasetType]:
921 # Docstring inherited from lsst.daf.butler.registry.Registry
922 wildcard = DatasetTypeWildcard.from_expression(expression)
923 composition_dict = self._managers.datasets.resolve_wildcard(
924 wildcard,
925 components=components,
926 missing=missing,
927 )
928 result: list[DatasetType] = []
929 for parent_dataset_type, components_for_parent in composition_dict.items():
930 result.extend(
931 parent_dataset_type.makeComponentDatasetType(c) if c is not None else parent_dataset_type
932 for c in components_for_parent
933 )
934 return result
936 def queryCollections(
937 self,
938 expression: Any = ...,
939 datasetType: Optional[DatasetType] = None,
940 collectionTypes: Union[Iterable[CollectionType], CollectionType] = CollectionType.all(),
941 flattenChains: bool = False,
942 includeChains: Optional[bool] = None,
943 ) -> Sequence[str]:
944 # Docstring inherited from lsst.daf.butler.registry.Registry
946 # Right now the datasetTypes argument is completely ignored, but that
947 # is consistent with its [lack of] guarantees. DM-24939 or a follow-up
948 # ticket will take care of that.
949 try:
950 wildcard = CollectionWildcard.from_expression(expression)
951 except TypeError as exc:
952 raise CollectionExpressionError(f"Invalid collection expression '{expression}'") from exc
953 collectionTypes = ensure_iterable(collectionTypes)
954 return [
955 record.name
956 for record in self._managers.collections.resolve_wildcard(
957 wildcard,
958 collection_types=frozenset(collectionTypes),
959 flatten_chains=flattenChains,
960 include_chains=includeChains,
961 )
962 ]
964 def _makeQueryBuilder(
965 self,
966 summary: queries.QuerySummary,
967 doomed_by: Iterable[str] = (),
968 ) -> queries.QueryBuilder:
969 """Return a `QueryBuilder` instance capable of constructing and
970 managing more complex queries than those obtainable via `Registry`
971 interfaces.
973 This is an advanced interface; downstream code should prefer
974 `Registry.queryDataIds` and `Registry.queryDatasets` whenever those
975 are sufficient.
977 Parameters
978 ----------
979 summary : `queries.QuerySummary`
980 Object describing and categorizing the full set of dimensions that
981 will be included in the query.
982 doomed_by : `Iterable` of `str`, optional
983 A list of diagnostic messages that indicate why the query is going
984 to yield no results and should not even be executed. If an empty
985 container (default) the query will be executed unless other code
986 determines that it is doomed.
988 Returns
989 -------
990 builder : `queries.QueryBuilder`
991 Object that can be used to construct and perform advanced queries.
992 """
993 doomed_by = list(doomed_by)
994 backend = queries.SqlQueryBackend(self._db, self._managers)
995 context = backend.context()
996 relation: Relation | None = None
997 if doomed_by:
998 relation = LeafRelation.make_doomed(context.sql_engine, set(), doomed_by)
999 return queries.QueryBuilder(
1000 summary,
1001 backend=backend,
1002 context=context,
1003 relation=relation,
1004 )
1006 def _standardize_query_data_id_args(
1007 self, data_id: DataId | None, *, doomed_by: list[str], **kwargs: Any
1008 ) -> DataCoordinate:
1009 """Preprocess the data ID arguments passed to query* methods.
1011 Parameters
1012 ----------
1013 data_id : `DataId` or `None`
1014 Data ID that constrains the query results.
1015 doomed_by : `list` [ `str` ]
1016 List to append messages indicating why the query is doomed to
1017 yield no results.
1018 **kwargs
1019 Additional data ID key-value pairs, extending and overriding
1020 ``data_id``.
1022 Returns
1023 -------
1024 data_id : `DataCoordinate`
1025 Standardized data ID. Will be fully expanded unless expansion
1026 fails, in which case a message will be appended to ``doomed_by``
1027 on return.
1028 """
1029 try:
1030 return self.expandDataId(data_id, **kwargs)
1031 except DataIdValueError as err:
1032 doomed_by.append(str(err))
1033 return DataCoordinate.standardize(
1034 data_id, **kwargs, universe=self.dimensions, defaults=self.defaults.dataId
1035 )
1037 def _standardize_query_dataset_args(
1038 self,
1039 datasets: Any,
1040 collections: Any,
1041 components: bool | None,
1042 mode: Literal["find_first"] | Literal["find_all"] | Literal["constrain"] = "constrain",
1043 *,
1044 doomed_by: list[str],
1045 ) -> tuple[dict[DatasetType, list[str | None]], CollectionWildcard | None]:
1046 """Preprocess dataset arguments passed to query* methods.
1048 Parameters
1049 ----------
1050 datasets : `DatasetType`, `str`, `re.Pattern`, or iterable of these
1051 Expression identifying dataset types. See `queryDatasetTypes` for
1052 details.
1053 collections : `str`, `re.Pattern`, or iterable of these
1054 Expression identifying collections to be searched. See
1055 `queryCollections` for details.
1056 components : `bool`, optional
1057 If `True`, apply all expression patterns to component dataset type
1058 names as well. If `False`, never apply patterns to components.
1059 If `None` (default), apply patterns to components only if their
1060 parent datasets were not matched by the expression.
1061 Fully-specified component datasets (`str` or `DatasetType`
1062 instances) are always included.
1064 Values other than `False` are deprecated, and only `False` will be
1065 supported after v26. After v27 this argument will be removed
1066 entirely.
1067 mode : `str`, optional
1068 The way in which datasets are being used in this query; one of:
1070 - "find_first": this is a query for the first dataset in an
1071 ordered list of collections. Prohibits collection wildcards,
1072 but permits dataset type wildcards.
1074 - "find_all": this is a query for all datasets in all matched
1075 collections. Permits collection and dataset type wildcards.
1077 - "constrain": this is a query for something other than datasets,
1078 with results constrained by dataset existence. Permits
1079 collection wildcards and prohibits ``...`` as a dataset type
1080 wildcard.
1081 doomed_by : `list` [ `str` ]
1082 List to append messages indicating why the query is doomed to
1083 yield no results.
1085 Returns
1086 -------
1087 composition : `defaultdict` [ `DatasetType`, `list` [ `str` ] ]
1088 Dictionary mapping parent dataset type to `list` of components
1089 matched for that dataset type (or `None` for the parent itself).
1090 collections : `CollectionWildcard`
1091 Processed collection expression.
1092 """
1093 composition: dict[DatasetType, list[str | None]] = {}
1094 if datasets is not None:
1095 if not collections:
1096 if not self.defaults.collections:
1097 raise NoDefaultCollectionError("No collections, and no registry default collections.")
1098 collections = self.defaults.collections
1099 else:
1100 collections = CollectionWildcard.from_expression(collections)
1101 if mode == "find_first" and collections.patterns:
1102 raise TypeError(
1103 f"Collection pattern(s) {collections.patterns} not allowed in this context."
1104 )
1105 missing: list[str] = []
1106 composition = self._managers.datasets.resolve_wildcard(
1107 datasets, components=components, missing=missing, explicit_only=(mode == "constrain")
1108 )
1109 if missing and mode == "constrain":
1110 # After v26 this should raise MissingDatasetTypeError, to be
1111 # implemented on DM-36303.
1112 warnings.warn(
1113 f"Dataset type(s) {missing} are not registered; this will be an error after v26.",
1114 FutureWarning,
1115 )
1116 doomed_by.extend(f"Dataset type {name} is not registered." for name in missing)
1117 elif collections:
1118 raise ArgumentError(f"Cannot pass 'collections' (='{collections}') without 'datasets'.")
1119 return composition, collections
1121 def queryDatasets(
1122 self,
1123 datasetType: Any,
1124 *,
1125 collections: Any = None,
1126 dimensions: Optional[Iterable[Union[Dimension, str]]] = None,
1127 dataId: Optional[DataId] = None,
1128 where: str = "",
1129 findFirst: bool = False,
1130 components: Optional[bool] = None,
1131 bind: Optional[Mapping[str, Any]] = None,
1132 check: bool = True,
1133 **kwargs: Any,
1134 ) -> queries.DatasetQueryResults:
1135 # Docstring inherited from lsst.daf.butler.registry.Registry
1136 doomed_by: list[str] = []
1137 data_id = self._standardize_query_data_id_args(dataId, doomed_by=doomed_by, **kwargs)
1138 dataset_composition, collections = self._standardize_query_dataset_args(
1139 datasetType,
1140 collections,
1141 components,
1142 mode="find_first" if findFirst else "find_all",
1143 doomed_by=doomed_by,
1144 )
1145 parent_results: list[queries.ParentDatasetQueryResults] = []
1146 for parent_dataset_type, components_for_parent in dataset_composition.items():
1147 # The full set of dimensions in the query is the combination of
1148 # those needed for the DatasetType and those explicitly requested,
1149 # if any.
1150 dimension_names = set(parent_dataset_type.dimensions.names)
1151 if dimensions is not None:
1152 dimension_names.update(self.dimensions.extract(dimensions).names)
1153 # Construct the summary structure needed to construct a
1154 # QueryBuilder.
1155 summary = queries.QuerySummary(
1156 requested=DimensionGraph(self.dimensions, names=dimension_names),
1157 column_types=self._managers.column_types,
1158 data_id=data_id,
1159 expression=where,
1160 bind=bind,
1161 defaults=self.defaults.dataId,
1162 check=check,
1163 datasets=[parent_dataset_type],
1164 )
1165 builder = self._makeQueryBuilder(summary)
1166 # Add the dataset subquery to the query, telling the QueryBuilder
1167 # to include the rank of the selected collection in the results
1168 # only if we need to findFirst. Note that if any of the
1169 # collections are actually wildcard expressions, and
1170 # findFirst=True, this will raise TypeError for us.
1171 builder.joinDataset(parent_dataset_type, collections, isResult=True, findFirst=findFirst)
1172 query = builder.finish()
1173 parent_results.append(
1174 queries.ParentDatasetQueryResults(
1175 query, parent_dataset_type, components=components_for_parent
1176 )
1177 )
1178 if not parent_results:
1179 doomed_by.extend(
1180 f"No registered dataset type matching {t!r} found, so no matching datasets can "
1181 "exist in any collection."
1182 for t in ensure_iterable(datasetType)
1183 )
1184 return queries.ChainedDatasetQueryResults([], doomed_by=doomed_by)
1185 elif len(parent_results) == 1:
1186 return parent_results[0]
1187 else:
1188 return queries.ChainedDatasetQueryResults(parent_results)
1190 def queryDataIds(
1191 self,
1192 dimensions: Union[Iterable[Union[Dimension, str]], Dimension, str],
1193 *,
1194 dataId: Optional[DataId] = None,
1195 datasets: Any = None,
1196 collections: Any = None,
1197 where: str = "",
1198 components: Optional[bool] = None,
1199 bind: Optional[Mapping[str, Any]] = None,
1200 check: bool = True,
1201 **kwargs: Any,
1202 ) -> queries.DataCoordinateQueryResults:
1203 # Docstring inherited from lsst.daf.butler.registry.Registry
1204 dimensions = ensure_iterable(dimensions)
1205 requestedDimensions = self.dimensions.extract(dimensions)
1206 doomed_by: list[str] = []
1207 data_id = self._standardize_query_data_id_args(dataId, doomed_by=doomed_by, **kwargs)
1208 dataset_composition, collections = self._standardize_query_dataset_args(
1209 datasets, collections, components, doomed_by=doomed_by
1210 )
1211 summary = queries.QuerySummary(
1212 requested=requestedDimensions,
1213 column_types=self._managers.column_types,
1214 data_id=data_id,
1215 expression=where,
1216 bind=bind,
1217 defaults=self.defaults.dataId,
1218 check=check,
1219 datasets=dataset_composition.keys(),
1220 )
1221 builder = self._makeQueryBuilder(summary, doomed_by=doomed_by)
1222 for datasetType in dataset_composition.keys():
1223 builder.joinDataset(datasetType, collections, isResult=False)
1224 query = builder.finish()
1226 return queries.DataCoordinateQueryResults(query)
1228 def queryDimensionRecords(
1229 self,
1230 element: Union[DimensionElement, str],
1231 *,
1232 dataId: Optional[DataId] = None,
1233 datasets: Any = None,
1234 collections: Any = None,
1235 where: str = "",
1236 components: Optional[bool] = None,
1237 bind: Optional[Mapping[str, Any]] = None,
1238 check: bool = True,
1239 **kwargs: Any,
1240 ) -> queries.DimensionRecordQueryResults:
1241 # Docstring inherited from lsst.daf.butler.registry.Registry
1242 if not isinstance(element, DimensionElement):
1243 try:
1244 element = self.dimensions[element]
1245 except KeyError as e:
1246 raise DimensionNameError(
1247 f"No such dimension '{element}', available dimensions: "
1248 + str(self.dimensions.getStaticElements())
1249 ) from e
1250 doomed_by: list[str] = []
1251 data_id = self._standardize_query_data_id_args(dataId, doomed_by=doomed_by, **kwargs)
1252 dataset_composition, collections = self._standardize_query_dataset_args(
1253 datasets, collections, components, doomed_by=doomed_by
1254 )
1255 summary = queries.QuerySummary(
1256 requested=element.graph,
1257 column_types=self._managers.column_types,
1258 data_id=data_id,
1259 expression=where,
1260 bind=bind,
1261 defaults=self.defaults.dataId,
1262 check=check,
1263 datasets=dataset_composition.keys(),
1264 )
1265 builder = self._makeQueryBuilder(summary, doomed_by=doomed_by)
1266 for datasetType in dataset_composition.keys():
1267 builder.joinDataset(datasetType, collections, isResult=False)
1268 query = builder.finish().with_record_columns(element)
1269 return queries.DatabaseDimensionRecordQueryResults(query, element)
1271 def queryDatasetAssociations(
1272 self,
1273 datasetType: Union[str, DatasetType],
1274 collections: Any = ...,
1275 *,
1276 collectionTypes: Iterable[CollectionType] = CollectionType.all(),
1277 flattenChains: bool = False,
1278 ) -> Iterator[DatasetAssociation]:
1279 # Docstring inherited from lsst.daf.butler.registry.Registry
1280 if collections is None:
1281 if not self.defaults.collections:
1282 raise NoDefaultCollectionError(
1283 "No collections provided to queryDatasetAssociations, "
1284 "and no defaults from registry construction."
1285 )
1286 collections = self.defaults.collections
1287 collections = CollectionWildcard.from_expression(collections)
1288 backend = queries.SqlQueryBackend(self._db, self._managers)
1289 parent_dataset_type, _ = backend.resolve_single_dataset_type_wildcard(datasetType, components=False)
1290 timespan_tag = DatasetColumnTag(parent_dataset_type.name, "timespan")
1291 collection_tag = DatasetColumnTag(parent_dataset_type.name, "collection")
1292 for parent_collection_record in backend.resolve_collection_wildcard(
1293 collections,
1294 collection_types=frozenset(collectionTypes),
1295 flatten_chains=flattenChains,
1296 ):
1297 # Resolve this possibly-chained collection into a list of
1298 # non-CHAINED collections that actually hold datasets of this
1299 # type.
1300 candidate_collection_records = backend.resolve_dataset_collections(
1301 parent_dataset_type,
1302 CollectionWildcard.from_names([parent_collection_record.name]),
1303 allow_calibration_collections=True,
1304 governor_constraints={},
1305 )
1306 if not candidate_collection_records:
1307 continue
1308 with backend.context() as context:
1309 relation = backend.make_dataset_query_relation(
1310 parent_dataset_type,
1311 candidate_collection_records,
1312 columns={"dataset_id", "run", "timespan", "collection"},
1313 context=context,
1314 )
1315 reader = queries.DatasetRefReader(
1316 parent_dataset_type,
1317 translate_collection=lambda k: self._managers.collections[k].name,
1318 full=False,
1319 )
1320 for row in context.fetch_iterable(relation):
1321 ref = reader.read(row)
1322 collection_record = self._managers.collections[row[collection_tag]]
1323 if collection_record.type is CollectionType.CALIBRATION:
1324 timespan = row[timespan_tag]
1325 else:
1326 # For backwards compatibility and (possibly?) user
1327 # convenience we continue to define the timespan of a
1328 # DatasetAssociation row for a non-CALIBRATION
1329 # collection to be None rather than a fully unbounded
1330 # timespan.
1331 timespan = None
1332 yield DatasetAssociation(ref=ref, collection=collection_record.name, timespan=timespan)
1334 @property
1335 def obsCoreTableManager(self) -> ObsCoreTableManager | None:
1336 # Docstring inherited from lsst.daf.butler.registry.Registry
1337 return self._managers.obscore
1339 storageClasses: StorageClassFactory
1340 """All storage classes known to the registry (`StorageClassFactory`).
1341 """