Coverage for python/lsst/daf/butler/registries/sql.py: 16%
515 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-10-25 15:14 +0000
« prev ^ index » next coverage.py v7.3.2, created at 2023-10-25 15:14 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = ("SqlRegistry",)
26import contextlib
27import logging
28import warnings
29from collections.abc import Iterable, Iterator, Mapping, Sequence
30from typing import TYPE_CHECKING, Any, Literal, cast
32import sqlalchemy
33from lsst.daf.relation import LeafRelation, Relation
34from lsst.resources import ResourcePathExpression
35from lsst.utils.introspection import find_outside_stacklevel
36from lsst.utils.iteration import ensure_iterable
38from ..core import (
39 Config,
40 DataCoordinate,
41 DataId,
42 DatasetAssociation,
43 DatasetColumnTag,
44 DatasetId,
45 DatasetIdGenEnum,
46 DatasetRef,
47 DatasetType,
48 Dimension,
49 DimensionConfig,
50 DimensionElement,
51 DimensionGraph,
52 DimensionRecord,
53 DimensionUniverse,
54 NamedKeyMapping,
55 NameLookupMapping,
56 Progress,
57 StorageClassFactory,
58 Timespan,
59 ddl,
60)
61from ..core.utils import transactional
62from ..registry import (
63 ArgumentError,
64 CollectionExpressionError,
65 CollectionSummary,
66 CollectionType,
67 CollectionTypeError,
68 ConflictingDefinitionError,
69 DataIdValueError,
70 DatasetTypeError,
71 DimensionNameError,
72 InconsistentDataIdError,
73 NoDefaultCollectionError,
74 OrphanedRecordError,
75 RegistryConfig,
76 RegistryConsistencyError,
77 RegistryDefaults,
78 _ButlerRegistry,
79 queries,
80)
81from ..registry.interfaces import ChainedCollectionRecord, RunRecord
82from ..registry.managers import RegistryManagerInstances, RegistryManagerTypes
83from ..registry.wildcards import CollectionWildcard, DatasetTypeWildcard
85if TYPE_CHECKING:
86 from .._butlerConfig import ButlerConfig
87 from ..registry._registry import CollectionArgType
88 from ..registry.interfaces import (
89 CollectionRecord,
90 Database,
91 DatastoreRegistryBridgeManager,
92 ObsCoreTableManager,
93 )
96_LOG = logging.getLogger(__name__)
99class SqlRegistry(_ButlerRegistry):
100 """Registry implementation based on SQLAlchemy.
102 Parameters
103 ----------
104 database : `Database`
105 Database instance to store Registry.
106 defaults : `RegistryDefaults`
107 Default collection search path and/or output `~CollectionType.RUN`
108 collection.
109 managers : `RegistryManagerInstances`
110 All the managers required for this registry.
111 """
113 defaultConfigFile: str | None = None
114 """Path to configuration defaults. Accessed within the ``configs`` resource
115 or relative to a search path. Can be None if no defaults specified.
116 """
118 @classmethod
119 def createFromConfig(
120 cls,
121 config: RegistryConfig | str | None = None,
122 dimensionConfig: DimensionConfig | str | None = None,
123 butlerRoot: ResourcePathExpression | None = None,
124 ) -> _ButlerRegistry:
125 """Create registry database and return `SqlRegistry` instance.
127 This method initializes database contents, database must be empty
128 prior to calling this method.
130 Parameters
131 ----------
132 config : `RegistryConfig` or `str`, optional
133 Registry configuration, if missing then default configuration will
134 be loaded from registry.yaml.
135 dimensionConfig : `DimensionConfig` or `str`, optional
136 Dimensions configuration, if missing then default configuration
137 will be loaded from dimensions.yaml.
138 butlerRoot : convertible to `lsst.resources.ResourcePath`, optional
139 Path to the repository root this `SqlRegistry` will manage.
141 Returns
142 -------
143 registry : `SqlRegistry`
144 A new `SqlRegistry` instance.
145 """
146 config = cls.forceRegistryConfig(config)
147 config.replaceRoot(butlerRoot)
149 if isinstance(dimensionConfig, str):
150 dimensionConfig = DimensionConfig(dimensionConfig)
151 elif dimensionConfig is None:
152 dimensionConfig = DimensionConfig()
153 elif not isinstance(dimensionConfig, DimensionConfig):
154 raise TypeError(f"Incompatible Dimension configuration type: {type(dimensionConfig)}")
156 DatabaseClass = config.getDatabaseClass()
157 database = DatabaseClass.fromUri(
158 config.connectionString, origin=config.get("origin", 0), namespace=config.get("namespace")
159 )
160 managerTypes = RegistryManagerTypes.fromConfig(config)
161 managers = managerTypes.makeRepo(database, dimensionConfig)
162 return cls(database, RegistryDefaults(), managers)
164 @classmethod
165 def fromConfig(
166 cls,
167 config: ButlerConfig | RegistryConfig | Config | str,
168 butlerRoot: ResourcePathExpression | None = None,
169 writeable: bool = True,
170 defaults: RegistryDefaults | None = None,
171 ) -> _ButlerRegistry:
172 """Create `Registry` subclass instance from `config`.
174 Registry database must be initialized prior to calling this method.
176 Parameters
177 ----------
178 config : `ButlerConfig`, `RegistryConfig`, `Config` or `str`
179 Registry configuration
180 butlerRoot : `lsst.resources.ResourcePathExpression`, optional
181 Path to the repository root this `Registry` will manage.
182 writeable : `bool`, optional
183 If `True` (default) create a read-write connection to the database.
184 defaults : `RegistryDefaults`, optional
185 Default collection search path and/or output `~CollectionType.RUN`
186 collection.
188 Returns
189 -------
190 registry : `SqlRegistry` (subclass)
191 A new `SqlRegistry` subclass instance.
192 """
193 config = cls.forceRegistryConfig(config)
194 config.replaceRoot(butlerRoot)
195 DatabaseClass = config.getDatabaseClass()
196 database = DatabaseClass.fromUri(
197 config.connectionString,
198 origin=config.get("origin", 0),
199 namespace=config.get("namespace"),
200 writeable=writeable,
201 )
202 managerTypes = RegistryManagerTypes.fromConfig(config)
203 with database.session():
204 managers = managerTypes.loadRepo(database)
205 if defaults is None:
206 defaults = RegistryDefaults()
207 return cls(database, defaults, managers)
209 def __init__(self, database: Database, defaults: RegistryDefaults, managers: RegistryManagerInstances):
210 self._db = database
211 self._managers = managers
212 self.storageClasses = StorageClassFactory()
213 # Intentionally invoke property setter to initialize defaults. This
214 # can only be done after most of the rest of Registry has already been
215 # initialized, and must be done before the property getter is used.
216 self.defaults = defaults
218 def __str__(self) -> str:
219 return str(self._db)
221 def __repr__(self) -> str:
222 return f"SqlRegistry({self._db!r}, {self.dimensions!r})"
224 def isWriteable(self) -> bool:
225 # Docstring inherited from lsst.daf.butler.registry.Registry
226 return self._db.isWriteable()
228 def copy(self, defaults: RegistryDefaults | None = None) -> _ButlerRegistry:
229 # Docstring inherited from lsst.daf.butler.registry.Registry
230 if defaults is None:
231 # No need to copy, because `RegistryDefaults` is immutable; we
232 # effectively copy on write.
233 defaults = self.defaults
234 return type(self)(self._db, defaults, self._managers)
236 @property
237 def dimensions(self) -> DimensionUniverse:
238 # Docstring inherited from lsst.daf.butler.registry.Registry
239 return self._managers.dimensions.universe
241 def refresh(self) -> None:
242 # Docstring inherited from lsst.daf.butler.registry.Registry
243 with self._db.transaction():
244 self._managers.refresh()
246 @contextlib.contextmanager
247 def transaction(self, *, savepoint: bool = False) -> Iterator[None]:
248 # Docstring inherited from lsst.daf.butler.registry.Registry
249 try:
250 with self._db.transaction(savepoint=savepoint):
251 yield
252 except BaseException:
253 # TODO: this clears the caches sometimes when we wouldn't actually
254 # need to. Can we avoid that?
255 self._managers.dimensions.clearCaches()
256 raise
258 def resetConnectionPool(self) -> None:
259 """Reset SQLAlchemy connection pool for `SqlRegistry` database.
261 This operation is useful when using registry with fork-based
262 multiprocessing. To use registry across fork boundary one has to make
263 sure that there are no currently active connections (no session or
264 transaction is in progress) and connection pool is reset using this
265 method. This method should be called by the child process immediately
266 after the fork.
267 """
268 self._db._engine.dispose()
270 def registerOpaqueTable(self, tableName: str, spec: ddl.TableSpec) -> None:
271 """Add an opaque (to the `Registry`) table for use by a `Datastore` or
272 other data repository client.
274 Opaque table records can be added via `insertOpaqueData`, retrieved via
275 `fetchOpaqueData`, and removed via `deleteOpaqueData`.
277 Parameters
278 ----------
279 tableName : `str`
280 Logical name of the opaque table. This may differ from the
281 actual name used in the database by a prefix and/or suffix.
282 spec : `ddl.TableSpec`
283 Specification for the table to be added.
284 """
285 self._managers.opaque.register(tableName, spec)
287 @transactional
288 def insertOpaqueData(self, tableName: str, *data: dict) -> None:
289 """Insert records into an opaque table.
291 Parameters
292 ----------
293 tableName : `str`
294 Logical name of the opaque table. Must match the name used in a
295 previous call to `registerOpaqueTable`.
296 data
297 Each additional positional argument is a dictionary that represents
298 a single row to be added.
299 """
300 self._managers.opaque[tableName].insert(*data)
302 def fetchOpaqueData(self, tableName: str, **where: Any) -> Iterator[Mapping[str, Any]]:
303 """Retrieve records from an opaque table.
305 Parameters
306 ----------
307 tableName : `str`
308 Logical name of the opaque table. Must match the name used in a
309 previous call to `registerOpaqueTable`.
310 where
311 Additional keyword arguments are interpreted as equality
312 constraints that restrict the returned rows (combined with AND);
313 keyword arguments are column names and values are the values they
314 must have.
316 Yields
317 ------
318 row : `dict`
319 A dictionary representing a single result row.
320 """
321 yield from self._managers.opaque[tableName].fetch(**where)
323 @transactional
324 def deleteOpaqueData(self, tableName: str, **where: Any) -> None:
325 """Remove records from an opaque table.
327 Parameters
328 ----------
329 tableName : `str`
330 Logical name of the opaque table. Must match the name used in a
331 previous call to `registerOpaqueTable`.
332 where
333 Additional keyword arguments are interpreted as equality
334 constraints that restrict the deleted rows (combined with AND);
335 keyword arguments are column names and values are the values they
336 must have.
337 """
338 self._managers.opaque[tableName].delete(where.keys(), where)
340 def registerCollection(
341 self, name: str, type: CollectionType = CollectionType.TAGGED, doc: str | None = None
342 ) -> bool:
343 # Docstring inherited from lsst.daf.butler.registry.Registry
344 _, registered = self._managers.collections.register(name, type, doc=doc)
345 return registered
347 def getCollectionType(self, name: str) -> CollectionType:
348 # Docstring inherited from lsst.daf.butler.registry.Registry
349 return self._managers.collections.find(name).type
351 def _get_collection_record(self, name: str) -> CollectionRecord:
352 # Docstring inherited from lsst.daf.butler.registry.Registry
353 return self._managers.collections.find(name)
355 def registerRun(self, name: str, doc: str | None = None) -> bool:
356 # Docstring inherited from lsst.daf.butler.registry.Registry
357 _, registered = self._managers.collections.register(name, CollectionType.RUN, doc=doc)
358 return registered
360 @transactional
361 def removeCollection(self, name: str) -> None:
362 # Docstring inherited from lsst.daf.butler.registry.Registry
363 self._managers.collections.remove(name)
365 def getCollectionChain(self, parent: str) -> tuple[str, ...]:
366 # Docstring inherited from lsst.daf.butler.registry.Registry
367 record = self._managers.collections.find(parent)
368 if record.type is not CollectionType.CHAINED:
369 raise CollectionTypeError(f"Collection '{parent}' has type {record.type.name}, not CHAINED.")
370 assert isinstance(record, ChainedCollectionRecord)
371 return record.children
373 @transactional
374 def setCollectionChain(self, parent: str, children: Any, *, flatten: bool = False) -> None:
375 # Docstring inherited from lsst.daf.butler.registry.Registry
376 record = self._managers.collections.find(parent)
377 if record.type is not CollectionType.CHAINED:
378 raise CollectionTypeError(f"Collection '{parent}' has type {record.type.name}, not CHAINED.")
379 assert isinstance(record, ChainedCollectionRecord)
380 children = CollectionWildcard.from_expression(children).require_ordered()
381 if children != record.children or flatten:
382 record.update(self._managers.collections, children, flatten=flatten)
384 def getCollectionParentChains(self, collection: str) -> set[str]:
385 # Docstring inherited from lsst.daf.butler.registry.Registry
386 return {
387 record.name
388 for record in self._managers.collections.getParentChains(
389 self._managers.collections.find(collection).key
390 )
391 }
393 def getCollectionDocumentation(self, collection: str) -> str | None:
394 # Docstring inherited from lsst.daf.butler.registry.Registry
395 return self._managers.collections.getDocumentation(self._managers.collections.find(collection).key)
397 def setCollectionDocumentation(self, collection: str, doc: str | None) -> None:
398 # Docstring inherited from lsst.daf.butler.registry.Registry
399 self._managers.collections.setDocumentation(self._managers.collections.find(collection).key, doc)
401 def getCollectionSummary(self, collection: str) -> CollectionSummary:
402 # Docstring inherited from lsst.daf.butler.registry.Registry
403 record = self._managers.collections.find(collection)
404 return self._managers.datasets.getCollectionSummary(record)
406 def registerDatasetType(self, datasetType: DatasetType) -> bool:
407 # Docstring inherited from lsst.daf.butler.registry.Registry
408 _, inserted = self._managers.datasets.register(datasetType)
409 return inserted
411 def removeDatasetType(self, name: str | tuple[str, ...]) -> None:
412 # Docstring inherited from lsst.daf.butler.registry.Registry
414 for datasetTypeExpression in ensure_iterable(name):
415 # Catch any warnings from the caller specifying a component
416 # dataset type. This will result in an error later but the
417 # warning could be confusing when the caller is not querying
418 # anything.
419 with warnings.catch_warnings():
420 warnings.simplefilter("ignore", category=FutureWarning)
421 datasetTypes = list(self.queryDatasetTypes(datasetTypeExpression))
422 if not datasetTypes:
423 _LOG.info("Dataset type %r not defined", datasetTypeExpression)
424 else:
425 for datasetType in datasetTypes:
426 self._managers.datasets.remove(datasetType.name)
427 _LOG.info("Removed dataset type %r", datasetType.name)
429 def getDatasetType(self, name: str) -> DatasetType:
430 # Docstring inherited from lsst.daf.butler.registry.Registry
431 parent_name, component = DatasetType.splitDatasetTypeName(name)
432 storage = self._managers.datasets[parent_name]
433 if component is None:
434 return storage.datasetType
435 else:
436 return storage.datasetType.makeComponentDatasetType(component)
438 def supportsIdGenerationMode(self, mode: DatasetIdGenEnum) -> bool:
439 # Docstring inherited from lsst.daf.butler.registry.Registry
440 return self._managers.datasets.supportsIdGenerationMode(mode)
442 def findDataset(
443 self,
444 datasetType: DatasetType | str,
445 dataId: DataId | None = None,
446 *,
447 collections: CollectionArgType | None = None,
448 timespan: Timespan | None = None,
449 **kwargs: Any,
450 ) -> DatasetRef | None:
451 # Docstring inherited from lsst.daf.butler.registry.Registry
452 if collections is None:
453 if not self.defaults.collections:
454 raise NoDefaultCollectionError(
455 "No collections provided to findDataset, and no defaults from registry construction."
456 )
457 collections = self.defaults.collections
458 backend = queries.SqlQueryBackend(self._db, self._managers)
459 collection_wildcard = CollectionWildcard.from_expression(collections, require_ordered=True)
460 if collection_wildcard.empty():
461 return None
462 matched_collections = backend.resolve_collection_wildcard(collection_wildcard)
463 parent_dataset_type, components = backend.resolve_single_dataset_type_wildcard(
464 datasetType, components_deprecated=False
465 )
466 if len(components) > 1:
467 raise DatasetTypeError(
468 f"findDataset requires exactly one dataset type; got multiple components {components} "
469 f"for parent dataset type {parent_dataset_type.name}."
470 )
471 component = components[0]
472 dataId = DataCoordinate.standardize(
473 dataId,
474 graph=parent_dataset_type.dimensions,
475 universe=self.dimensions,
476 defaults=self.defaults.dataId,
477 **kwargs,
478 )
479 governor_constraints = {name: {cast(str, dataId[name])} for name in dataId.graph.governors.names}
480 (filtered_collections,) = backend.filter_dataset_collections(
481 [parent_dataset_type],
482 matched_collections,
483 governor_constraints=governor_constraints,
484 ).values()
485 if not filtered_collections:
486 return None
487 if timespan is None:
488 filtered_collections = [
489 collection_record
490 for collection_record in filtered_collections
491 if collection_record.type is not CollectionType.CALIBRATION
492 ]
493 if filtered_collections:
494 requested_columns = {"dataset_id", "run", "collection"}
495 with backend.context() as context:
496 predicate = context.make_data_coordinate_predicate(
497 dataId.subset(parent_dataset_type.dimensions), full=False
498 )
499 if timespan is not None:
500 requested_columns.add("timespan")
501 predicate = predicate.logical_and(
502 context.make_timespan_overlap_predicate(
503 DatasetColumnTag(parent_dataset_type.name, "timespan"), timespan
504 )
505 )
506 relation = backend.make_dataset_query_relation(
507 parent_dataset_type, filtered_collections, requested_columns, context
508 ).with_rows_satisfying(predicate)
509 rows = list(context.fetch_iterable(relation))
510 else:
511 rows = []
512 if not rows:
513 return None
514 elif len(rows) == 1:
515 best_row = rows[0]
516 else:
517 rank_by_collection_key = {record.key: n for n, record in enumerate(filtered_collections)}
518 collection_tag = DatasetColumnTag(parent_dataset_type.name, "collection")
519 row_iter = iter(rows)
520 best_row = next(row_iter)
521 best_rank = rank_by_collection_key[best_row[collection_tag]]
522 have_tie = False
523 for row in row_iter:
524 if (rank := rank_by_collection_key[row[collection_tag]]) < best_rank:
525 best_row = row
526 best_rank = rank
527 have_tie = False
528 elif rank == best_rank:
529 have_tie = True
530 assert timespan is not None, "Rank ties should be impossible given DB constraints."
531 if have_tie:
532 raise LookupError(
533 f"Ambiguous calibration lookup for {parent_dataset_type.name} in collections "
534 f"{collection_wildcard.strings} with timespan {timespan}."
535 )
536 reader = queries.DatasetRefReader(
537 parent_dataset_type,
538 translate_collection=lambda k: self._managers.collections[k].name,
539 )
540 ref = reader.read(best_row, data_id=dataId)
541 if component is not None:
542 ref = ref.makeComponentRef(component)
543 return ref
545 @transactional
546 def insertDatasets(
547 self,
548 datasetType: DatasetType | str,
549 dataIds: Iterable[DataId],
550 run: str | None = None,
551 expand: bool = True,
552 idGenerationMode: DatasetIdGenEnum = DatasetIdGenEnum.UNIQUE,
553 ) -> list[DatasetRef]:
554 # Docstring inherited from lsst.daf.butler.registry.Registry
555 if isinstance(datasetType, DatasetType):
556 storage = self._managers.datasets.find(datasetType.name)
557 if storage is None:
558 raise DatasetTypeError(f"DatasetType '{datasetType}' has not been registered.")
559 else:
560 storage = self._managers.datasets.find(datasetType)
561 if storage is None:
562 raise DatasetTypeError(f"DatasetType with name '{datasetType}' has not been registered.")
563 if run is None:
564 if self.defaults.run is None:
565 raise NoDefaultCollectionError(
566 "No run provided to insertDatasets, and no default from registry construction."
567 )
568 run = self.defaults.run
569 runRecord = self._managers.collections.find(run)
570 if runRecord.type is not CollectionType.RUN:
571 raise CollectionTypeError(
572 f"Given collection is of type {runRecord.type.name}; RUN collection required."
573 )
574 assert isinstance(runRecord, RunRecord)
575 progress = Progress("daf.butler.Registry.insertDatasets", level=logging.DEBUG)
576 if expand:
577 expandedDataIds = [
578 self.expandDataId(dataId, graph=storage.datasetType.dimensions)
579 for dataId in progress.wrap(dataIds, f"Expanding {storage.datasetType.name} data IDs")
580 ]
581 else:
582 expandedDataIds = [
583 DataCoordinate.standardize(dataId, graph=storage.datasetType.dimensions) for dataId in dataIds
584 ]
585 try:
586 refs = list(storage.insert(runRecord, expandedDataIds, idGenerationMode))
587 if self._managers.obscore:
588 context = queries.SqlQueryContext(self._db, self._managers.column_types)
589 self._managers.obscore.add_datasets(refs, context)
590 except sqlalchemy.exc.IntegrityError as err:
591 raise ConflictingDefinitionError(
592 "A database constraint failure was triggered by inserting "
593 f"one or more datasets of type {storage.datasetType} into "
594 f"collection '{run}'. "
595 "This probably means a dataset with the same data ID "
596 "and dataset type already exists, but it may also mean a "
597 "dimension row is missing."
598 ) from err
599 return refs
601 @transactional
602 def _importDatasets(
603 self,
604 datasets: Iterable[DatasetRef],
605 expand: bool = True,
606 ) -> list[DatasetRef]:
607 # Docstring inherited from lsst.daf.butler.registry.Registry
608 datasets = list(datasets)
609 if not datasets:
610 # nothing to do
611 return []
613 # find dataset type
614 datasetTypes = {dataset.datasetType for dataset in datasets}
615 if len(datasetTypes) != 1:
616 raise DatasetTypeError(f"Multiple dataset types in input datasets: {datasetTypes}")
617 datasetType = datasetTypes.pop()
619 # get storage handler for this dataset type
620 storage = self._managers.datasets.find(datasetType.name)
621 if storage is None:
622 raise DatasetTypeError(f"DatasetType '{datasetType}' has not been registered.")
624 # find run name
625 runs = {dataset.run for dataset in datasets}
626 if len(runs) != 1:
627 raise ValueError(f"Multiple run names in input datasets: {runs}")
628 run = runs.pop()
630 runRecord = self._managers.collections.find(run)
631 if runRecord.type is not CollectionType.RUN:
632 raise CollectionTypeError(
633 f"Given collection '{runRecord.name}' is of type {runRecord.type.name};"
634 " RUN collection required."
635 )
636 assert isinstance(runRecord, RunRecord)
638 progress = Progress("daf.butler.Registry.insertDatasets", level=logging.DEBUG)
639 if expand:
640 expandedDatasets = [
641 dataset.expanded(self.expandDataId(dataset.dataId, graph=storage.datasetType.dimensions))
642 for dataset in progress.wrap(datasets, f"Expanding {storage.datasetType.name} data IDs")
643 ]
644 else:
645 expandedDatasets = [
646 DatasetRef(datasetType, dataset.dataId, id=dataset.id, run=dataset.run, conform=True)
647 for dataset in datasets
648 ]
650 try:
651 refs = list(storage.import_(runRecord, expandedDatasets))
652 if self._managers.obscore:
653 context = queries.SqlQueryContext(self._db, self._managers.column_types)
654 self._managers.obscore.add_datasets(refs, context)
655 except sqlalchemy.exc.IntegrityError as err:
656 raise ConflictingDefinitionError(
657 "A database constraint failure was triggered by inserting "
658 f"one or more datasets of type {storage.datasetType} into "
659 f"collection '{run}'. "
660 "This probably means a dataset with the same data ID "
661 "and dataset type already exists, but it may also mean a "
662 "dimension row is missing."
663 ) from err
664 # Check that imported dataset IDs match the input
665 for imported_ref, input_ref in zip(refs, datasets, strict=True):
666 if imported_ref.id != input_ref.id:
667 raise RegistryConsistencyError(
668 "Imported dataset ID differs from input dataset ID, "
669 f"input ref: {input_ref}, imported ref: {imported_ref}"
670 )
671 return refs
673 def getDataset(self, id: DatasetId) -> DatasetRef | None:
674 # Docstring inherited from lsst.daf.butler.registry.Registry
675 return self._managers.datasets.getDatasetRef(id)
677 @transactional
678 def removeDatasets(self, refs: Iterable[DatasetRef]) -> None:
679 # Docstring inherited from lsst.daf.butler.registry.Registry
680 progress = Progress("lsst.daf.butler.Registry.removeDatasets", level=logging.DEBUG)
681 for datasetType, refsForType in progress.iter_item_chunks(
682 DatasetRef.iter_by_type(refs), desc="Removing datasets by type"
683 ):
684 storage = self._managers.datasets[datasetType.name]
685 try:
686 storage.delete(refsForType)
687 except sqlalchemy.exc.IntegrityError as err:
688 raise OrphanedRecordError(
689 "One or more datasets is still present in one or more Datastores."
690 ) from err
692 @transactional
693 def associate(self, collection: str, refs: Iterable[DatasetRef]) -> None:
694 # Docstring inherited from lsst.daf.butler.registry.Registry
695 progress = Progress("lsst.daf.butler.Registry.associate", level=logging.DEBUG)
696 collectionRecord = self._managers.collections.find(collection)
697 if collectionRecord.type is not CollectionType.TAGGED:
698 raise CollectionTypeError(
699 f"Collection '{collection}' has type {collectionRecord.type.name}, not TAGGED."
700 )
701 for datasetType, refsForType in progress.iter_item_chunks(
702 DatasetRef.iter_by_type(refs), desc="Associating datasets by type"
703 ):
704 storage = self._managers.datasets[datasetType.name]
705 try:
706 storage.associate(collectionRecord, refsForType)
707 if self._managers.obscore:
708 # If a TAGGED collection is being monitored by ObsCore
709 # manager then we may need to save the dataset.
710 context = queries.SqlQueryContext(self._db, self._managers.column_types)
711 self._managers.obscore.associate(refsForType, collectionRecord, context)
712 except sqlalchemy.exc.IntegrityError as err:
713 raise ConflictingDefinitionError(
714 f"Constraint violation while associating dataset of type {datasetType.name} with "
715 f"collection {collection}. This probably means that one or more datasets with the same "
716 "dataset type and data ID already exist in the collection, but it may also indicate "
717 "that the datasets do not exist."
718 ) from err
720 @transactional
721 def disassociate(self, collection: str, refs: Iterable[DatasetRef]) -> None:
722 # Docstring inherited from lsst.daf.butler.registry.Registry
723 progress = Progress("lsst.daf.butler.Registry.disassociate", level=logging.DEBUG)
724 collectionRecord = self._managers.collections.find(collection)
725 if collectionRecord.type is not CollectionType.TAGGED:
726 raise CollectionTypeError(
727 f"Collection '{collection}' has type {collectionRecord.type.name}; expected TAGGED."
728 )
729 for datasetType, refsForType in progress.iter_item_chunks(
730 DatasetRef.iter_by_type(refs), desc="Disassociating datasets by type"
731 ):
732 storage = self._managers.datasets[datasetType.name]
733 storage.disassociate(collectionRecord, refsForType)
734 if self._managers.obscore:
735 self._managers.obscore.disassociate(refsForType, collectionRecord)
737 @transactional
738 def certify(self, collection: str, refs: Iterable[DatasetRef], timespan: Timespan) -> None:
739 # Docstring inherited from lsst.daf.butler.registry.Registry
740 progress = Progress("lsst.daf.butler.Registry.certify", level=logging.DEBUG)
741 collectionRecord = self._managers.collections.find(collection)
742 for datasetType, refsForType in progress.iter_item_chunks(
743 DatasetRef.iter_by_type(refs), desc="Certifying datasets by type"
744 ):
745 storage = self._managers.datasets[datasetType.name]
746 storage.certify(
747 collectionRecord,
748 refsForType,
749 timespan,
750 context=queries.SqlQueryContext(self._db, self._managers.column_types),
751 )
753 @transactional
754 def decertify(
755 self,
756 collection: str,
757 datasetType: str | DatasetType,
758 timespan: Timespan,
759 *,
760 dataIds: Iterable[DataId] | None = None,
761 ) -> None:
762 # Docstring inherited from lsst.daf.butler.registry.Registry
763 collectionRecord = self._managers.collections.find(collection)
764 if isinstance(datasetType, str):
765 storage = self._managers.datasets[datasetType]
766 else:
767 storage = self._managers.datasets[datasetType.name]
768 standardizedDataIds = None
769 if dataIds is not None:
770 standardizedDataIds = [
771 DataCoordinate.standardize(d, graph=storage.datasetType.dimensions) for d in dataIds
772 ]
773 storage.decertify(
774 collectionRecord,
775 timespan,
776 dataIds=standardizedDataIds,
777 context=queries.SqlQueryContext(self._db, self._managers.column_types),
778 )
780 def getDatastoreBridgeManager(self) -> DatastoreRegistryBridgeManager:
781 """Return an object that allows a new `Datastore` instance to
782 communicate with this `Registry`.
784 Returns
785 -------
786 manager : `DatastoreRegistryBridgeManager`
787 Object that mediates communication between this `Registry` and its
788 associated datastores.
789 """
790 return self._managers.datastores
792 def getDatasetLocations(self, ref: DatasetRef) -> Iterable[str]:
793 # Docstring inherited from lsst.daf.butler.registry.Registry
794 return self._managers.datastores.findDatastores(ref)
796 def expandDataId(
797 self,
798 dataId: DataId | None = None,
799 *,
800 graph: DimensionGraph | None = None,
801 records: NameLookupMapping[DimensionElement, DimensionRecord | None] | None = None,
802 withDefaults: bool = True,
803 **kwargs: Any,
804 ) -> DataCoordinate:
805 # Docstring inherited from lsst.daf.butler.registry.Registry
806 if not withDefaults:
807 defaults = None
808 else:
809 defaults = self.defaults.dataId
810 try:
811 standardized = DataCoordinate.standardize(
812 dataId, graph=graph, universe=self.dimensions, defaults=defaults, **kwargs
813 )
814 except KeyError as exc:
815 # This means either kwargs have some odd name or required
816 # dimension is missing.
817 raise DimensionNameError(str(exc)) from exc
818 if standardized.hasRecords():
819 return standardized
820 if records is None:
821 records = {}
822 elif isinstance(records, NamedKeyMapping):
823 records = records.byName()
824 else:
825 records = dict(records)
826 if isinstance(dataId, DataCoordinate) and dataId.hasRecords():
827 records.update(dataId.records.byName())
828 keys = standardized.byName()
829 context = queries.SqlQueryContext(self._db, self._managers.column_types)
830 for element in standardized.graph.primaryKeyTraversalOrder:
831 record = records.get(element.name, ...) # Use ... to mean not found; None might mean NULL
832 if record is ...:
833 if isinstance(element, Dimension) and keys.get(element.name) is None:
834 if element in standardized.graph.required:
835 raise DimensionNameError(
836 f"No value or null value for required dimension {element.name}."
837 )
838 keys[element.name] = None
839 record = None
840 else:
841 storage = self._managers.dimensions[element]
842 record = storage.fetch_one(DataCoordinate.standardize(keys, graph=element.graph), context)
843 records[element.name] = record
844 if record is not None:
845 for d in element.implied:
846 value = getattr(record, d.name)
847 if keys.setdefault(d.name, value) != value:
848 raise InconsistentDataIdError(
849 f"Data ID {standardized} has {d.name}={keys[d.name]!r}, "
850 f"but {element.name} implies {d.name}={value!r}."
851 )
852 else:
853 if element in standardized.graph.required:
854 raise DataIdValueError(
855 f"Could not fetch record for required dimension {element.name} via keys {keys}."
856 )
857 if element.alwaysJoin:
858 raise InconsistentDataIdError(
859 f"Could not fetch record for element {element.name} via keys {keys}, ",
860 "but it is marked alwaysJoin=True; this means one or more dimensions are not "
861 "related.",
862 )
863 for d in element.implied:
864 keys.setdefault(d.name, None)
865 records.setdefault(d.name, None)
866 return DataCoordinate.standardize(keys, graph=standardized.graph).expanded(records=records)
868 def insertDimensionData(
869 self,
870 element: DimensionElement | str,
871 *data: Mapping[str, Any] | DimensionRecord,
872 conform: bool = True,
873 replace: bool = False,
874 skip_existing: bool = False,
875 ) -> None:
876 # Docstring inherited from lsst.daf.butler.registry.Registry
877 if conform:
878 if isinstance(element, str):
879 element = self.dimensions[element]
880 records = [
881 row if isinstance(row, DimensionRecord) else element.RecordClass(**row) for row in data
882 ]
883 else:
884 # Ignore typing since caller said to trust them with conform=False.
885 records = data # type: ignore
886 storage = self._managers.dimensions[element]
887 storage.insert(*records, replace=replace, skip_existing=skip_existing)
889 def syncDimensionData(
890 self,
891 element: DimensionElement | str,
892 row: Mapping[str, Any] | DimensionRecord,
893 conform: bool = True,
894 update: bool = False,
895 ) -> bool | dict[str, Any]:
896 # Docstring inherited from lsst.daf.butler.registry.Registry
897 if conform:
898 if isinstance(element, str):
899 element = self.dimensions[element]
900 record = row if isinstance(row, DimensionRecord) else element.RecordClass(**row)
901 else:
902 # Ignore typing since caller said to trust them with conform=False.
903 record = row # type: ignore
904 storage = self._managers.dimensions[element]
905 return storage.sync(record, update=update)
907 def queryDatasetTypes(
908 self,
909 expression: Any = ...,
910 *,
911 components: bool | None = False,
912 missing: list[str] | None = None,
913 ) -> Iterable[DatasetType]:
914 # Docstring inherited from lsst.daf.butler.registry.Registry
915 wildcard = DatasetTypeWildcard.from_expression(expression)
916 composition_dict = self._managers.datasets.resolve_wildcard(
917 wildcard,
918 components=components,
919 missing=missing,
920 )
921 result: list[DatasetType] = []
922 for parent_dataset_type, components_for_parent in composition_dict.items():
923 result.extend(
924 parent_dataset_type.makeComponentDatasetType(c) if c is not None else parent_dataset_type
925 for c in components_for_parent
926 )
927 return result
929 def queryCollections(
930 self,
931 expression: Any = ...,
932 datasetType: DatasetType | None = None,
933 collectionTypes: Iterable[CollectionType] | CollectionType = CollectionType.all(),
934 flattenChains: bool = False,
935 includeChains: bool | None = None,
936 ) -> Sequence[str]:
937 # Docstring inherited from lsst.daf.butler.registry.Registry
939 # Right now the datasetTypes argument is completely ignored, but that
940 # is consistent with its [lack of] guarantees. DM-24939 or a follow-up
941 # ticket will take care of that.
942 try:
943 wildcard = CollectionWildcard.from_expression(expression)
944 except TypeError as exc:
945 raise CollectionExpressionError(f"Invalid collection expression '{expression}'") from exc
946 collectionTypes = ensure_iterable(collectionTypes)
947 return [
948 record.name
949 for record in self._managers.collections.resolve_wildcard(
950 wildcard,
951 collection_types=frozenset(collectionTypes),
952 flatten_chains=flattenChains,
953 include_chains=includeChains,
954 )
955 ]
957 def _makeQueryBuilder(
958 self,
959 summary: queries.QuerySummary,
960 doomed_by: Iterable[str] = (),
961 ) -> queries.QueryBuilder:
962 """Return a `QueryBuilder` instance capable of constructing and
963 managing more complex queries than those obtainable via `Registry`
964 interfaces.
966 This is an advanced interface; downstream code should prefer
967 `Registry.queryDataIds` and `Registry.queryDatasets` whenever those
968 are sufficient.
970 Parameters
971 ----------
972 summary : `queries.QuerySummary`
973 Object describing and categorizing the full set of dimensions that
974 will be included in the query.
975 doomed_by : `~collections.abc.Iterable` of `str`, optional
976 A list of diagnostic messages that indicate why the query is going
977 to yield no results and should not even be executed. If an empty
978 container (default) the query will be executed unless other code
979 determines that it is doomed.
981 Returns
982 -------
983 builder : `queries.QueryBuilder`
984 Object that can be used to construct and perform advanced queries.
985 """
986 doomed_by = list(doomed_by)
987 backend = queries.SqlQueryBackend(self._db, self._managers)
988 context = backend.context()
989 relation: Relation | None = None
990 if doomed_by:
991 relation = LeafRelation.make_doomed(context.sql_engine, set(), doomed_by)
992 return queries.QueryBuilder(
993 summary,
994 backend=backend,
995 context=context,
996 relation=relation,
997 )
999 def _standardize_query_data_id_args(
1000 self, data_id: DataId | None, *, doomed_by: list[str], **kwargs: Any
1001 ) -> DataCoordinate:
1002 """Preprocess the data ID arguments passed to query* methods.
1004 Parameters
1005 ----------
1006 data_id : `DataId` or `None`
1007 Data ID that constrains the query results.
1008 doomed_by : `list` [ `str` ]
1009 List to append messages indicating why the query is doomed to
1010 yield no results.
1011 **kwargs
1012 Additional data ID key-value pairs, extending and overriding
1013 ``data_id``.
1015 Returns
1016 -------
1017 data_id : `DataCoordinate`
1018 Standardized data ID. Will be fully expanded unless expansion
1019 fails, in which case a message will be appended to ``doomed_by``
1020 on return.
1021 """
1022 try:
1023 return self.expandDataId(data_id, **kwargs)
1024 except DataIdValueError as err:
1025 doomed_by.append(str(err))
1026 return DataCoordinate.standardize(
1027 data_id, **kwargs, universe=self.dimensions, defaults=self.defaults.dataId
1028 )
1030 def _standardize_query_dataset_args(
1031 self,
1032 datasets: Any,
1033 collections: CollectionArgType | None,
1034 components: bool | None,
1035 mode: Literal["find_first"] | Literal["find_all"] | Literal["constrain"] = "constrain",
1036 *,
1037 doomed_by: list[str],
1038 ) -> tuple[dict[DatasetType, list[str | None]], CollectionWildcard | None]:
1039 """Preprocess dataset arguments passed to query* methods.
1041 Parameters
1042 ----------
1043 datasets : `DatasetType`, `str`, `re.Pattern`, or iterable of these
1044 Expression identifying dataset types. See `queryDatasetTypes` for
1045 details.
1046 collections : `str`, `re.Pattern`, or iterable of these
1047 Expression identifying collections to be searched. See
1048 `queryCollections` for details.
1049 components : `bool`, optional
1050 If `True`, apply all expression patterns to component dataset type
1051 names as well. If `False`, never apply patterns to components.
1052 If `None` (default), apply patterns to components only if their
1053 parent datasets were not matched by the expression.
1054 Fully-specified component datasets (`str` or `DatasetType`
1055 instances) are always included.
1057 Values other than `False` are deprecated, and only `False` will be
1058 supported after v26. After v27 this argument will be removed
1059 entirely.
1060 mode : `str`, optional
1061 The way in which datasets are being used in this query; one of:
1063 - "find_first": this is a query for the first dataset in an
1064 ordered list of collections. Prohibits collection wildcards,
1065 but permits dataset type wildcards.
1067 - "find_all": this is a query for all datasets in all matched
1068 collections. Permits collection and dataset type wildcards.
1070 - "constrain": this is a query for something other than datasets,
1071 with results constrained by dataset existence. Permits
1072 collection wildcards and prohibits ``...`` as a dataset type
1073 wildcard.
1074 doomed_by : `list` [ `str` ]
1075 List to append messages indicating why the query is doomed to
1076 yield no results.
1078 Returns
1079 -------
1080 composition : `defaultdict` [ `DatasetType`, `list` [ `str` ] ]
1081 Dictionary mapping parent dataset type to `list` of components
1082 matched for that dataset type (or `None` for the parent itself).
1083 collections : `CollectionWildcard`
1084 Processed collection expression.
1085 """
1086 composition: dict[DatasetType, list[str | None]] = {}
1087 collection_wildcard: CollectionWildcard | None = None
1088 if datasets is not None:
1089 if collections is None:
1090 if not self.defaults.collections:
1091 raise NoDefaultCollectionError("No collections, and no registry default collections.")
1092 collection_wildcard = CollectionWildcard.from_expression(self.defaults.collections)
1093 else:
1094 collection_wildcard = CollectionWildcard.from_expression(collections)
1095 if mode == "find_first" and collection_wildcard.patterns:
1096 raise TypeError(
1097 f"Collection pattern(s) {collection_wildcard.patterns} not allowed in this context."
1098 )
1099 missing: list[str] = []
1100 composition = self._managers.datasets.resolve_wildcard(
1101 datasets, components=components, missing=missing, explicit_only=(mode == "constrain")
1102 )
1103 if missing and mode == "constrain":
1104 # After v26 this should raise MissingDatasetTypeError, to be
1105 # implemented on DM-36303.
1106 warnings.warn(
1107 f"Dataset type(s) {missing} are not registered; this will be an error after v26.",
1108 FutureWarning,
1109 stacklevel=find_outside_stacklevel("lsst.daf.butler"),
1110 )
1111 doomed_by.extend(f"Dataset type {name} is not registered." for name in missing)
1112 elif collections:
1113 # I think this check should actually be `collections is not None`,
1114 # but it looks like some CLI scripts use empty tuple as default.
1115 raise ArgumentError(f"Cannot pass 'collections' (='{collections}') without 'datasets'.")
1116 return composition, collection_wildcard
1118 def queryDatasets(
1119 self,
1120 datasetType: Any,
1121 *,
1122 collections: CollectionArgType | None = None,
1123 dimensions: Iterable[Dimension | str] | None = None,
1124 dataId: DataId | None = None,
1125 where: str = "",
1126 findFirst: bool = False,
1127 components: bool | None = False,
1128 bind: Mapping[str, Any] | None = None,
1129 check: bool = True,
1130 **kwargs: Any,
1131 ) -> queries.DatasetQueryResults:
1132 # Docstring inherited from lsst.daf.butler.registry.Registry
1133 doomed_by: list[str] = []
1134 data_id = self._standardize_query_data_id_args(dataId, doomed_by=doomed_by, **kwargs)
1135 dataset_composition, collection_wildcard = self._standardize_query_dataset_args(
1136 datasetType,
1137 collections,
1138 components,
1139 mode="find_first" if findFirst else "find_all",
1140 doomed_by=doomed_by,
1141 )
1142 if collection_wildcard is not None and collection_wildcard.empty():
1143 doomed_by.append("No datasets can be found because collection list is empty.")
1144 return queries.ChainedDatasetQueryResults([], doomed_by=doomed_by)
1145 parent_results: list[queries.ParentDatasetQueryResults] = []
1146 for parent_dataset_type, components_for_parent in dataset_composition.items():
1147 # The full set of dimensions in the query is the combination of
1148 # those needed for the DatasetType and those explicitly requested,
1149 # if any.
1150 dimension_names = set(parent_dataset_type.dimensions.names)
1151 if dimensions is not None:
1152 dimension_names.update(self.dimensions.extract(dimensions).names)
1153 # Construct the summary structure needed to construct a
1154 # QueryBuilder.
1155 summary = queries.QuerySummary(
1156 requested=DimensionGraph(self.dimensions, names=dimension_names),
1157 column_types=self._managers.column_types,
1158 data_id=data_id,
1159 expression=where,
1160 bind=bind,
1161 defaults=self.defaults.dataId,
1162 check=check,
1163 datasets=[parent_dataset_type],
1164 )
1165 builder = self._makeQueryBuilder(summary)
1166 # Add the dataset subquery to the query, telling the QueryBuilder
1167 # to include the rank of the selected collection in the results
1168 # only if we need to findFirst. Note that if any of the
1169 # collections are actually wildcard expressions, and
1170 # findFirst=True, this will raise TypeError for us.
1171 builder.joinDataset(parent_dataset_type, collection_wildcard, isResult=True, findFirst=findFirst)
1172 query = builder.finish()
1173 parent_results.append(
1174 queries.ParentDatasetQueryResults(
1175 query, parent_dataset_type, components=components_for_parent
1176 )
1177 )
1178 if not parent_results:
1179 doomed_by.extend(
1180 f"No registered dataset type matching {t!r} found, so no matching datasets can "
1181 "exist in any collection."
1182 for t in ensure_iterable(datasetType)
1183 )
1184 return queries.ChainedDatasetQueryResults([], doomed_by=doomed_by)
1185 elif len(parent_results) == 1:
1186 return parent_results[0]
1187 else:
1188 return queries.ChainedDatasetQueryResults(parent_results)
1190 def queryDataIds(
1191 self,
1192 dimensions: Iterable[Dimension | str] | Dimension | str,
1193 *,
1194 dataId: DataId | None = None,
1195 datasets: Any = None,
1196 collections: CollectionArgType | None = None,
1197 where: str = "",
1198 components: bool | None = None,
1199 bind: Mapping[str, Any] | None = None,
1200 check: bool = True,
1201 **kwargs: Any,
1202 ) -> queries.DataCoordinateQueryResults:
1203 # Docstring inherited from lsst.daf.butler.registry.Registry
1204 dimensions = ensure_iterable(dimensions)
1205 requestedDimensions = self.dimensions.extract(dimensions)
1206 doomed_by: list[str] = []
1207 data_id = self._standardize_query_data_id_args(dataId, doomed_by=doomed_by, **kwargs)
1208 dataset_composition, collection_wildcard = self._standardize_query_dataset_args(
1209 datasets, collections, components, doomed_by=doomed_by
1210 )
1211 if collection_wildcard is not None and collection_wildcard.empty():
1212 doomed_by.append("No data coordinates can be found because collection list is empty.")
1213 summary = queries.QuerySummary(
1214 requested=requestedDimensions,
1215 column_types=self._managers.column_types,
1216 data_id=data_id,
1217 expression=where,
1218 bind=bind,
1219 defaults=self.defaults.dataId,
1220 check=check,
1221 datasets=dataset_composition.keys(),
1222 )
1223 builder = self._makeQueryBuilder(summary, doomed_by=doomed_by)
1224 for datasetType in dataset_composition:
1225 builder.joinDataset(datasetType, collection_wildcard, isResult=False)
1226 query = builder.finish()
1228 return queries.DataCoordinateQueryResults(query)
1230 def queryDimensionRecords(
1231 self,
1232 element: DimensionElement | str,
1233 *,
1234 dataId: DataId | None = None,
1235 datasets: Any = None,
1236 collections: CollectionArgType | None = None,
1237 where: str = "",
1238 components: bool | None = None,
1239 bind: Mapping[str, Any] | None = None,
1240 check: bool = True,
1241 **kwargs: Any,
1242 ) -> queries.DimensionRecordQueryResults:
1243 # Docstring inherited from lsst.daf.butler.registry.Registry
1244 if not isinstance(element, DimensionElement):
1245 try:
1246 element = self.dimensions[element]
1247 except KeyError as e:
1248 raise DimensionNameError(
1249 f"No such dimension '{element}', available dimensions: "
1250 + str(self.dimensions.getStaticElements())
1251 ) from e
1252 doomed_by: list[str] = []
1253 data_id = self._standardize_query_data_id_args(dataId, doomed_by=doomed_by, **kwargs)
1254 dataset_composition, collection_wildcard = self._standardize_query_dataset_args(
1255 datasets, collections, components, doomed_by=doomed_by
1256 )
1257 if collection_wildcard is not None and collection_wildcard.empty():
1258 doomed_by.append("No dimension records can be found because collection list is empty.")
1259 summary = queries.QuerySummary(
1260 requested=element.graph,
1261 column_types=self._managers.column_types,
1262 data_id=data_id,
1263 expression=where,
1264 bind=bind,
1265 defaults=self.defaults.dataId,
1266 check=check,
1267 datasets=dataset_composition.keys(),
1268 )
1269 builder = self._makeQueryBuilder(summary, doomed_by=doomed_by)
1270 for datasetType in dataset_composition:
1271 builder.joinDataset(datasetType, collection_wildcard, isResult=False)
1272 query = builder.finish().with_record_columns(element)
1273 return queries.DatabaseDimensionRecordQueryResults(query, element)
1275 def queryDatasetAssociations(
1276 self,
1277 datasetType: str | DatasetType,
1278 collections: CollectionArgType | None = ...,
1279 *,
1280 collectionTypes: Iterable[CollectionType] = CollectionType.all(),
1281 flattenChains: bool = False,
1282 ) -> Iterator[DatasetAssociation]:
1283 # Docstring inherited from lsst.daf.butler.registry.Registry
1284 if collections is None:
1285 if not self.defaults.collections:
1286 raise NoDefaultCollectionError(
1287 "No collections provided to queryDatasetAssociations, "
1288 "and no defaults from registry construction."
1289 )
1290 collections = self.defaults.collections
1291 collection_wildcard = CollectionWildcard.from_expression(collections)
1292 backend = queries.SqlQueryBackend(self._db, self._managers)
1293 parent_dataset_type, _ = backend.resolve_single_dataset_type_wildcard(datasetType, components=False)
1294 timespan_tag = DatasetColumnTag(parent_dataset_type.name, "timespan")
1295 collection_tag = DatasetColumnTag(parent_dataset_type.name, "collection")
1296 for parent_collection_record in backend.resolve_collection_wildcard(
1297 collection_wildcard,
1298 collection_types=frozenset(collectionTypes),
1299 flatten_chains=flattenChains,
1300 ):
1301 # Resolve this possibly-chained collection into a list of
1302 # non-CHAINED collections that actually hold datasets of this
1303 # type.
1304 candidate_collection_records = backend.resolve_dataset_collections(
1305 parent_dataset_type,
1306 CollectionWildcard.from_names([parent_collection_record.name]),
1307 allow_calibration_collections=True,
1308 governor_constraints={},
1309 )
1310 if not candidate_collection_records:
1311 continue
1312 with backend.context() as context:
1313 relation = backend.make_dataset_query_relation(
1314 parent_dataset_type,
1315 candidate_collection_records,
1316 columns={"dataset_id", "run", "timespan", "collection"},
1317 context=context,
1318 )
1319 reader = queries.DatasetRefReader(
1320 parent_dataset_type,
1321 translate_collection=lambda k: self._managers.collections[k].name,
1322 full=False,
1323 )
1324 for row in context.fetch_iterable(relation):
1325 ref = reader.read(row)
1326 collection_record = self._managers.collections[row[collection_tag]]
1327 if collection_record.type is CollectionType.CALIBRATION:
1328 timespan = row[timespan_tag]
1329 else:
1330 # For backwards compatibility and (possibly?) user
1331 # convenience we continue to define the timespan of a
1332 # DatasetAssociation row for a non-CALIBRATION
1333 # collection to be None rather than a fully unbounded
1334 # timespan.
1335 timespan = None
1336 yield DatasetAssociation(ref=ref, collection=collection_record.name, timespan=timespan)
1338 @property
1339 def obsCoreTableManager(self) -> ObsCoreTableManager | None:
1340 # Docstring inherited from lsst.daf.butler.registry.Registry
1341 return self._managers.obscore
1343 storageClasses: StorageClassFactory
1344 """All storage classes known to the registry (`StorageClassFactory`).
1345 """