Coverage for python/lsst/daf/butler/registries/sql.py: 12%
516 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-07-12 10:56 -0700
« prev ^ index » next coverage.py v7.2.7, created at 2023-07-12 10:56 -0700
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = ("SqlRegistry",)
26import contextlib
27import logging
28import warnings
29from collections.abc import Iterable, Iterator, Mapping, Sequence
30from typing import TYPE_CHECKING, Any, Literal, cast
32import sqlalchemy
33from lsst.daf.relation import LeafRelation, Relation
34from lsst.resources import ResourcePathExpression
35from lsst.utils.introspection import find_outside_stacklevel
36from lsst.utils.iteration import ensure_iterable
38from ..core import (
39 Config,
40 DataCoordinate,
41 DataId,
42 DatasetAssociation,
43 DatasetColumnTag,
44 DatasetId,
45 DatasetIdFactory,
46 DatasetIdGenEnum,
47 DatasetRef,
48 DatasetType,
49 Dimension,
50 DimensionConfig,
51 DimensionElement,
52 DimensionGraph,
53 DimensionRecord,
54 DimensionUniverse,
55 NamedKeyMapping,
56 NameLookupMapping,
57 Progress,
58 StorageClassFactory,
59 Timespan,
60 ddl,
61)
62from ..core.utils import transactional
63from ..registry import (
64 ArgumentError,
65 CollectionExpressionError,
66 CollectionSummary,
67 CollectionType,
68 CollectionTypeError,
69 ConflictingDefinitionError,
70 DataIdValueError,
71 DatasetTypeError,
72 DimensionNameError,
73 InconsistentDataIdError,
74 NoDefaultCollectionError,
75 OrphanedRecordError,
76 Registry,
77 RegistryConfig,
78 RegistryConsistencyError,
79 RegistryDefaults,
80 queries,
81)
82from ..registry.interfaces import ChainedCollectionRecord, RunRecord
83from ..registry.managers import RegistryManagerInstances, RegistryManagerTypes
84from ..registry.wildcards import CollectionWildcard, DatasetTypeWildcard
86if TYPE_CHECKING:
87 from .._butlerConfig import ButlerConfig
88 from ..registry._registry import CollectionArgType
89 from ..registry.interfaces import (
90 CollectionRecord,
91 Database,
92 DatastoreRegistryBridgeManager,
93 ObsCoreTableManager,
94 )
97_LOG = logging.getLogger(__name__)
100class SqlRegistry(Registry):
101 """Registry implementation based on SQLAlchemy.
103 Parameters
104 ----------
105 database : `Database`
106 Database instance to store Registry.
107 defaults : `RegistryDefaults`
108 Default collection search path and/or output `~CollectionType.RUN`
109 collection.
110 managers : `RegistryManagerInstances`
111 All the managers required for this registry.
112 """
114 defaultConfigFile: str | None = None
115 """Path to configuration defaults. Accessed within the ``configs`` resource
116 or relative to a search path. Can be None if no defaults specified.
117 """
119 @classmethod
120 def createFromConfig(
121 cls,
122 config: RegistryConfig | str | None = None,
123 dimensionConfig: DimensionConfig | str | None = None,
124 butlerRoot: ResourcePathExpression | None = None,
125 ) -> Registry:
126 """Create registry database and return `SqlRegistry` instance.
128 This method initializes database contents, database must be empty
129 prior to calling this method.
131 Parameters
132 ----------
133 config : `RegistryConfig` or `str`, optional
134 Registry configuration, if missing then default configuration will
135 be loaded from registry.yaml.
136 dimensionConfig : `DimensionConfig` or `str`, optional
137 Dimensions configuration, if missing then default configuration
138 will be loaded from dimensions.yaml.
139 butlerRoot : convertible to `lsst.resources.ResourcePath`, optional
140 Path to the repository root this `SqlRegistry` will manage.
142 Returns
143 -------
144 registry : `SqlRegistry`
145 A new `SqlRegistry` instance.
146 """
147 config = cls.forceRegistryConfig(config)
148 config.replaceRoot(butlerRoot)
150 if isinstance(dimensionConfig, str):
151 dimensionConfig = DimensionConfig(dimensionConfig)
152 elif dimensionConfig is None:
153 dimensionConfig = DimensionConfig()
154 elif not isinstance(dimensionConfig, DimensionConfig):
155 raise TypeError(f"Incompatible Dimension configuration type: {type(dimensionConfig)}")
157 DatabaseClass = config.getDatabaseClass()
158 database = DatabaseClass.fromUri(
159 config.connectionString, origin=config.get("origin", 0), namespace=config.get("namespace")
160 )
161 managerTypes = RegistryManagerTypes.fromConfig(config)
162 managers = managerTypes.makeRepo(database, dimensionConfig)
163 return cls(database, RegistryDefaults(), managers)
165 @classmethod
166 def fromConfig(
167 cls,
168 config: ButlerConfig | RegistryConfig | Config | str,
169 butlerRoot: ResourcePathExpression | None = None,
170 writeable: bool = True,
171 defaults: RegistryDefaults | None = None,
172 ) -> Registry:
173 """Create `Registry` subclass instance from `config`.
175 Registry database must be initialized prior to calling this method.
177 Parameters
178 ----------
179 config : `ButlerConfig`, `RegistryConfig`, `Config` or `str`
180 Registry configuration
181 butlerRoot : `lsst.resources.ResourcePathExpression`, optional
182 Path to the repository root this `Registry` will manage.
183 writeable : `bool`, optional
184 If `True` (default) create a read-write connection to the database.
185 defaults : `RegistryDefaults`, optional
186 Default collection search path and/or output `~CollectionType.RUN`
187 collection.
189 Returns
190 -------
191 registry : `SqlRegistry` (subclass)
192 A new `SqlRegistry` subclass instance.
193 """
194 config = cls.forceRegistryConfig(config)
195 config.replaceRoot(butlerRoot)
196 DatabaseClass = config.getDatabaseClass()
197 database = DatabaseClass.fromUri(
198 config.connectionString,
199 origin=config.get("origin", 0),
200 namespace=config.get("namespace"),
201 writeable=writeable,
202 )
203 managerTypes = RegistryManagerTypes.fromConfig(config)
204 with database.session():
205 managers = managerTypes.loadRepo(database)
206 if defaults is None:
207 defaults = RegistryDefaults()
208 return cls(database, defaults, managers)
210 def __init__(self, database: Database, defaults: RegistryDefaults, managers: RegistryManagerInstances):
211 self._db = database
212 self._managers = managers
213 self.storageClasses = StorageClassFactory()
214 # Intentionally invoke property setter to initialize defaults. This
215 # can only be done after most of the rest of Registry has already been
216 # initialized, and must be done before the property getter is used.
217 self.defaults = defaults
218 # In the future DatasetIdFactory may become configurable and this
219 # instance will need to be shared with datasets manager.
220 self.datasetIdFactory = DatasetIdFactory()
222 def __str__(self) -> str:
223 return str(self._db)
225 def __repr__(self) -> str:
226 return f"SqlRegistry({self._db!r}, {self.dimensions!r})"
228 def isWriteable(self) -> bool:
229 # Docstring inherited from lsst.daf.butler.registry.Registry
230 return self._db.isWriteable()
232 def copy(self, defaults: RegistryDefaults | None = None) -> Registry:
233 # Docstring inherited from lsst.daf.butler.registry.Registry
234 if defaults is None:
235 # No need to copy, because `RegistryDefaults` is immutable; we
236 # effectively copy on write.
237 defaults = self.defaults
238 return type(self)(self._db, defaults, self._managers)
240 @property
241 def dimensions(self) -> DimensionUniverse:
242 # Docstring inherited from lsst.daf.butler.registry.Registry
243 return self._managers.dimensions.universe
245 def refresh(self) -> None:
246 # Docstring inherited from lsst.daf.butler.registry.Registry
247 with self._db.transaction():
248 self._managers.refresh()
250 @contextlib.contextmanager
251 def transaction(self, *, savepoint: bool = False) -> Iterator[None]:
252 # Docstring inherited from lsst.daf.butler.registry.Registry
253 try:
254 with self._db.transaction(savepoint=savepoint):
255 yield
256 except BaseException:
257 # TODO: this clears the caches sometimes when we wouldn't actually
258 # need to. Can we avoid that?
259 self._managers.dimensions.clearCaches()
260 raise
262 def resetConnectionPool(self) -> None:
263 """Reset SQLAlchemy connection pool for `SqlRegistry` database.
265 This operation is useful when using registry with fork-based
266 multiprocessing. To use registry across fork boundary one has to make
267 sure that there are no currently active connections (no session or
268 transaction is in progress) and connection pool is reset using this
269 method. This method should be called by the child process immediately
270 after the fork.
271 """
272 self._db._engine.dispose()
274 def registerOpaqueTable(self, tableName: str, spec: ddl.TableSpec) -> None:
275 """Add an opaque (to the `Registry`) table for use by a `Datastore` or
276 other data repository client.
278 Opaque table records can be added via `insertOpaqueData`, retrieved via
279 `fetchOpaqueData`, and removed via `deleteOpaqueData`.
281 Parameters
282 ----------
283 tableName : `str`
284 Logical name of the opaque table. This may differ from the
285 actual name used in the database by a prefix and/or suffix.
286 spec : `ddl.TableSpec`
287 Specification for the table to be added.
288 """
289 self._managers.opaque.register(tableName, spec)
291 @transactional
292 def insertOpaqueData(self, tableName: str, *data: dict) -> None:
293 """Insert records into an opaque table.
295 Parameters
296 ----------
297 tableName : `str`
298 Logical name of the opaque table. Must match the name used in a
299 previous call to `registerOpaqueTable`.
300 data
301 Each additional positional argument is a dictionary that represents
302 a single row to be added.
303 """
304 self._managers.opaque[tableName].insert(*data)
306 def fetchOpaqueData(self, tableName: str, **where: Any) -> Iterator[Mapping[str, Any]]:
307 """Retrieve records from an opaque table.
309 Parameters
310 ----------
311 tableName : `str`
312 Logical name of the opaque table. Must match the name used in a
313 previous call to `registerOpaqueTable`.
314 where
315 Additional keyword arguments are interpreted as equality
316 constraints that restrict the returned rows (combined with AND);
317 keyword arguments are column names and values are the values they
318 must have.
320 Yields
321 ------
322 row : `dict`
323 A dictionary representing a single result row.
324 """
325 yield from self._managers.opaque[tableName].fetch(**where)
327 @transactional
328 def deleteOpaqueData(self, tableName: str, **where: Any) -> None:
329 """Remove records from an opaque table.
331 Parameters
332 ----------
333 tableName : `str`
334 Logical name of the opaque table. Must match the name used in a
335 previous call to `registerOpaqueTable`.
336 where
337 Additional keyword arguments are interpreted as equality
338 constraints that restrict the deleted rows (combined with AND);
339 keyword arguments are column names and values are the values they
340 must have.
341 """
342 self._managers.opaque[tableName].delete(where.keys(), where)
344 def registerCollection(
345 self, name: str, type: CollectionType = CollectionType.TAGGED, doc: str | None = None
346 ) -> bool:
347 # Docstring inherited from lsst.daf.butler.registry.Registry
348 _, registered = self._managers.collections.register(name, type, doc=doc)
349 return registered
351 def getCollectionType(self, name: str) -> CollectionType:
352 # Docstring inherited from lsst.daf.butler.registry.Registry
353 return self._managers.collections.find(name).type
355 def _get_collection_record(self, name: str) -> CollectionRecord:
356 # Docstring inherited from lsst.daf.butler.registry.Registry
357 return self._managers.collections.find(name)
359 def registerRun(self, name: str, doc: str | None = None) -> bool:
360 # Docstring inherited from lsst.daf.butler.registry.Registry
361 _, registered = self._managers.collections.register(name, CollectionType.RUN, doc=doc)
362 return registered
364 @transactional
365 def removeCollection(self, name: str) -> None:
366 # Docstring inherited from lsst.daf.butler.registry.Registry
367 self._managers.collections.remove(name)
369 def getCollectionChain(self, parent: str) -> tuple[str, ...]:
370 # Docstring inherited from lsst.daf.butler.registry.Registry
371 record = self._managers.collections.find(parent)
372 if record.type is not CollectionType.CHAINED:
373 raise CollectionTypeError(f"Collection '{parent}' has type {record.type.name}, not CHAINED.")
374 assert isinstance(record, ChainedCollectionRecord)
375 return record.children
377 @transactional
378 def setCollectionChain(self, parent: str, children: Any, *, flatten: bool = False) -> None:
379 # Docstring inherited from lsst.daf.butler.registry.Registry
380 record = self._managers.collections.find(parent)
381 if record.type is not CollectionType.CHAINED:
382 raise CollectionTypeError(f"Collection '{parent}' has type {record.type.name}, not CHAINED.")
383 assert isinstance(record, ChainedCollectionRecord)
384 children = CollectionWildcard.from_expression(children).require_ordered()
385 if children != record.children or flatten:
386 record.update(self._managers.collections, children, flatten=flatten)
388 def getCollectionParentChains(self, collection: str) -> set[str]:
389 # Docstring inherited from lsst.daf.butler.registry.Registry
390 return {
391 record.name
392 for record in self._managers.collections.getParentChains(
393 self._managers.collections.find(collection).key
394 )
395 }
397 def getCollectionDocumentation(self, collection: str) -> str | None:
398 # Docstring inherited from lsst.daf.butler.registry.Registry
399 return self._managers.collections.getDocumentation(self._managers.collections.find(collection).key)
401 def setCollectionDocumentation(self, collection: str, doc: str | None) -> None:
402 # Docstring inherited from lsst.daf.butler.registry.Registry
403 self._managers.collections.setDocumentation(self._managers.collections.find(collection).key, doc)
405 def getCollectionSummary(self, collection: str) -> CollectionSummary:
406 # Docstring inherited from lsst.daf.butler.registry.Registry
407 record = self._managers.collections.find(collection)
408 return self._managers.datasets.getCollectionSummary(record)
410 def registerDatasetType(self, datasetType: DatasetType) -> bool:
411 # Docstring inherited from lsst.daf.butler.registry.Registry
412 _, inserted = self._managers.datasets.register(datasetType)
413 return inserted
415 def removeDatasetType(self, name: str | tuple[str, ...]) -> None:
416 # Docstring inherited from lsst.daf.butler.registry.Registry
418 for datasetTypeExpression in ensure_iterable(name):
419 # Catch any warnings from the caller specifying a component
420 # dataset type. This will result in an error later but the
421 # warning could be confusing when the caller is not querying
422 # anything.
423 with warnings.catch_warnings():
424 warnings.simplefilter("ignore", category=FutureWarning)
425 datasetTypes = list(self.queryDatasetTypes(datasetTypeExpression))
426 if not datasetTypes:
427 _LOG.info("Dataset type %r not defined", datasetTypeExpression)
428 else:
429 for datasetType in datasetTypes:
430 self._managers.datasets.remove(datasetType.name)
431 _LOG.info("Removed dataset type %r", datasetType.name)
433 def getDatasetType(self, name: str) -> DatasetType:
434 # Docstring inherited from lsst.daf.butler.registry.Registry
435 parent_name, component = DatasetType.splitDatasetTypeName(name)
436 storage = self._managers.datasets[parent_name]
437 if component is None:
438 return storage.datasetType
439 else:
440 return storage.datasetType.makeComponentDatasetType(component)
442 def supportsIdGenerationMode(self, mode: DatasetIdGenEnum) -> bool:
443 # Docstring inherited from lsst.daf.butler.registry.Registry
444 return self._managers.datasets.supportsIdGenerationMode(mode)
446 def findDataset(
447 self,
448 datasetType: DatasetType | str,
449 dataId: DataId | None = None,
450 *,
451 collections: CollectionArgType | None = None,
452 timespan: Timespan | None = None,
453 **kwargs: Any,
454 ) -> DatasetRef | None:
455 # Docstring inherited from lsst.daf.butler.registry.Registry
456 if collections is None:
457 if not self.defaults.collections:
458 raise NoDefaultCollectionError(
459 "No collections provided to findDataset, and no defaults from registry construction."
460 )
461 collections = self.defaults.collections
462 backend = queries.SqlQueryBackend(self._db, self._managers)
463 collection_wildcard = CollectionWildcard.from_expression(collections, require_ordered=True)
464 if collection_wildcard.empty():
465 return None
466 matched_collections = backend.resolve_collection_wildcard(collection_wildcard)
467 parent_dataset_type, components = backend.resolve_single_dataset_type_wildcard(
468 datasetType, components_deprecated=False
469 )
470 if len(components) > 1:
471 raise DatasetTypeError(
472 f"findDataset requires exactly one dataset type; got multiple components {components} "
473 f"for parent dataset type {parent_dataset_type.name}."
474 )
475 component = components[0]
476 dataId = DataCoordinate.standardize(
477 dataId,
478 graph=parent_dataset_type.dimensions,
479 universe=self.dimensions,
480 defaults=self.defaults.dataId,
481 **kwargs,
482 )
483 governor_constraints = {name: {cast(str, dataId[name])} for name in dataId.graph.governors.names}
484 (filtered_collections,) = backend.filter_dataset_collections(
485 [parent_dataset_type],
486 matched_collections,
487 governor_constraints=governor_constraints,
488 ).values()
489 if not filtered_collections:
490 return None
491 if timespan is None:
492 filtered_collections = [
493 collection_record
494 for collection_record in filtered_collections
495 if collection_record.type is not CollectionType.CALIBRATION
496 ]
497 if filtered_collections:
498 requested_columns = {"dataset_id", "run", "collection"}
499 with backend.context() as context:
500 predicate = context.make_data_coordinate_predicate(
501 dataId.subset(parent_dataset_type.dimensions), full=False
502 )
503 if timespan is not None:
504 requested_columns.add("timespan")
505 predicate = predicate.logical_and(
506 context.make_timespan_overlap_predicate(
507 DatasetColumnTag(parent_dataset_type.name, "timespan"), timespan
508 )
509 )
510 relation = backend.make_dataset_query_relation(
511 parent_dataset_type, filtered_collections, requested_columns, context
512 ).with_rows_satisfying(predicate)
513 rows = list(context.fetch_iterable(relation))
514 else:
515 rows = []
516 if not rows:
517 return None
518 elif len(rows) == 1:
519 best_row = rows[0]
520 else:
521 rank_by_collection_key = {record.key: n for n, record in enumerate(filtered_collections)}
522 collection_tag = DatasetColumnTag(parent_dataset_type.name, "collection")
523 row_iter = iter(rows)
524 best_row = next(row_iter)
525 best_rank = rank_by_collection_key[best_row[collection_tag]]
526 have_tie = False
527 for row in row_iter:
528 if (rank := rank_by_collection_key[row[collection_tag]]) < best_rank:
529 best_row = row
530 best_rank = rank
531 have_tie = False
532 elif rank == best_rank:
533 have_tie = True
534 assert timespan is not None, "Rank ties should be impossible given DB constraints."
535 if have_tie:
536 raise LookupError(
537 f"Ambiguous calibration lookup for {parent_dataset_type.name} in collections "
538 f"{collection_wildcard.strings} with timespan {timespan}."
539 )
540 reader = queries.DatasetRefReader(
541 parent_dataset_type,
542 translate_collection=lambda k: self._managers.collections[k].name,
543 )
544 ref = reader.read(best_row, data_id=dataId)
545 if component is not None:
546 ref = ref.makeComponentRef(component)
547 return ref
549 @transactional
550 def insertDatasets(
551 self,
552 datasetType: DatasetType | str,
553 dataIds: Iterable[DataId],
554 run: str | None = None,
555 expand: bool = True,
556 idGenerationMode: DatasetIdGenEnum = DatasetIdGenEnum.UNIQUE,
557 ) -> list[DatasetRef]:
558 # Docstring inherited from lsst.daf.butler.registry.Registry
559 if isinstance(datasetType, DatasetType):
560 storage = self._managers.datasets.find(datasetType.name)
561 if storage is None:
562 raise DatasetTypeError(f"DatasetType '{datasetType}' has not been registered.")
563 else:
564 storage = self._managers.datasets.find(datasetType)
565 if storage is None:
566 raise DatasetTypeError(f"DatasetType with name '{datasetType}' has not been registered.")
567 if run is None:
568 if self.defaults.run is None:
569 raise NoDefaultCollectionError(
570 "No run provided to insertDatasets, and no default from registry construction."
571 )
572 run = self.defaults.run
573 runRecord = self._managers.collections.find(run)
574 if runRecord.type is not CollectionType.RUN:
575 raise CollectionTypeError(
576 f"Given collection is of type {runRecord.type.name}; RUN collection required."
577 )
578 assert isinstance(runRecord, RunRecord)
579 progress = Progress("daf.butler.Registry.insertDatasets", level=logging.DEBUG)
580 if expand:
581 expandedDataIds = [
582 self.expandDataId(dataId, graph=storage.datasetType.dimensions)
583 for dataId in progress.wrap(dataIds, f"Expanding {storage.datasetType.name} data IDs")
584 ]
585 else:
586 expandedDataIds = [
587 DataCoordinate.standardize(dataId, graph=storage.datasetType.dimensions) for dataId in dataIds
588 ]
589 try:
590 refs = list(storage.insert(runRecord, expandedDataIds, idGenerationMode))
591 if self._managers.obscore:
592 context = queries.SqlQueryContext(self._db, self._managers.column_types)
593 self._managers.obscore.add_datasets(refs, context)
594 except sqlalchemy.exc.IntegrityError as err:
595 raise ConflictingDefinitionError(
596 "A database constraint failure was triggered by inserting "
597 f"one or more datasets of type {storage.datasetType} into "
598 f"collection '{run}'. "
599 "This probably means a dataset with the same data ID "
600 "and dataset type already exists, but it may also mean a "
601 "dimension row is missing."
602 ) from err
603 return refs
605 @transactional
606 def _importDatasets(
607 self,
608 datasets: Iterable[DatasetRef],
609 expand: bool = True,
610 ) -> list[DatasetRef]:
611 # Docstring inherited from lsst.daf.butler.registry.Registry
612 datasets = list(datasets)
613 if not datasets:
614 # nothing to do
615 return []
617 # find dataset type
618 datasetTypes = {dataset.datasetType for dataset in datasets}
619 if len(datasetTypes) != 1:
620 raise DatasetTypeError(f"Multiple dataset types in input datasets: {datasetTypes}")
621 datasetType = datasetTypes.pop()
623 # get storage handler for this dataset type
624 storage = self._managers.datasets.find(datasetType.name)
625 if storage is None:
626 raise DatasetTypeError(f"DatasetType '{datasetType}' has not been registered.")
628 # find run name
629 runs = {dataset.run for dataset in datasets}
630 if len(runs) != 1:
631 raise ValueError(f"Multiple run names in input datasets: {runs}")
632 run = runs.pop()
634 runRecord = self._managers.collections.find(run)
635 if runRecord.type is not CollectionType.RUN:
636 raise CollectionTypeError(
637 f"Given collection '{runRecord.name}' is of type {runRecord.type.name};"
638 " RUN collection required."
639 )
640 assert isinstance(runRecord, RunRecord)
642 progress = Progress("daf.butler.Registry.insertDatasets", level=logging.DEBUG)
643 if expand:
644 expandedDatasets = [
645 dataset.expanded(self.expandDataId(dataset.dataId, graph=storage.datasetType.dimensions))
646 for dataset in progress.wrap(datasets, f"Expanding {storage.datasetType.name} data IDs")
647 ]
648 else:
649 expandedDatasets = [
650 DatasetRef(datasetType, dataset.dataId, id=dataset.id, run=dataset.run, conform=True)
651 for dataset in datasets
652 ]
654 try:
655 refs = list(storage.import_(runRecord, expandedDatasets))
656 if self._managers.obscore:
657 context = queries.SqlQueryContext(self._db, self._managers.column_types)
658 self._managers.obscore.add_datasets(refs, context)
659 except sqlalchemy.exc.IntegrityError as err:
660 raise ConflictingDefinitionError(
661 "A database constraint failure was triggered by inserting "
662 f"one or more datasets of type {storage.datasetType} into "
663 f"collection '{run}'. "
664 "This probably means a dataset with the same data ID "
665 "and dataset type already exists, but it may also mean a "
666 "dimension row is missing."
667 ) from err
668 # Check that imported dataset IDs match the input
669 for imported_ref, input_ref in zip(refs, datasets):
670 if imported_ref.id != input_ref.id:
671 raise RegistryConsistencyError(
672 "Imported dataset ID differs from input dataset ID, "
673 f"input ref: {input_ref}, imported ref: {imported_ref}"
674 )
675 return refs
677 def getDataset(self, id: DatasetId) -> DatasetRef | None:
678 # Docstring inherited from lsst.daf.butler.registry.Registry
679 return self._managers.datasets.getDatasetRef(id)
681 @transactional
682 def removeDatasets(self, refs: Iterable[DatasetRef]) -> None:
683 # Docstring inherited from lsst.daf.butler.registry.Registry
684 progress = Progress("lsst.daf.butler.Registry.removeDatasets", level=logging.DEBUG)
685 for datasetType, refsForType in progress.iter_item_chunks(
686 DatasetRef.groupByType(refs).items(), desc="Removing datasets by type"
687 ):
688 storage = self._managers.datasets[datasetType.name]
689 try:
690 storage.delete(refsForType)
691 except sqlalchemy.exc.IntegrityError as err:
692 raise OrphanedRecordError(
693 "One or more datasets is still present in one or more Datastores."
694 ) from err
696 @transactional
697 def associate(self, collection: str, refs: Iterable[DatasetRef]) -> None:
698 # Docstring inherited from lsst.daf.butler.registry.Registry
699 progress = Progress("lsst.daf.butler.Registry.associate", level=logging.DEBUG)
700 collectionRecord = self._managers.collections.find(collection)
701 if collectionRecord.type is not CollectionType.TAGGED:
702 raise CollectionTypeError(
703 f"Collection '{collection}' has type {collectionRecord.type.name}, not TAGGED."
704 )
705 for datasetType, refsForType in progress.iter_item_chunks(
706 DatasetRef.groupByType(refs).items(), desc="Associating datasets by type"
707 ):
708 storage = self._managers.datasets[datasetType.name]
709 try:
710 storage.associate(collectionRecord, refsForType)
711 if self._managers.obscore:
712 # If a TAGGED collection is being monitored by ObsCore
713 # manager then we may need to save the dataset.
714 context = queries.SqlQueryContext(self._db, self._managers.column_types)
715 self._managers.obscore.associate(refsForType, collectionRecord, context)
716 except sqlalchemy.exc.IntegrityError as err:
717 raise ConflictingDefinitionError(
718 f"Constraint violation while associating dataset of type {datasetType.name} with "
719 f"collection {collection}. This probably means that one or more datasets with the same "
720 "dataset type and data ID already exist in the collection, but it may also indicate "
721 "that the datasets do not exist."
722 ) from err
724 @transactional
725 def disassociate(self, collection: str, refs: Iterable[DatasetRef]) -> None:
726 # Docstring inherited from lsst.daf.butler.registry.Registry
727 progress = Progress("lsst.daf.butler.Registry.disassociate", level=logging.DEBUG)
728 collectionRecord = self._managers.collections.find(collection)
729 if collectionRecord.type is not CollectionType.TAGGED:
730 raise CollectionTypeError(
731 f"Collection '{collection}' has type {collectionRecord.type.name}; expected TAGGED."
732 )
733 for datasetType, refsForType in progress.iter_item_chunks(
734 DatasetRef.groupByType(refs).items(), desc="Disassociating datasets by type"
735 ):
736 storage = self._managers.datasets[datasetType.name]
737 storage.disassociate(collectionRecord, refsForType)
738 if self._managers.obscore:
739 self._managers.obscore.disassociate(refsForType, collectionRecord)
741 @transactional
742 def certify(self, collection: str, refs: Iterable[DatasetRef], timespan: Timespan) -> None:
743 # Docstring inherited from lsst.daf.butler.registry.Registry
744 progress = Progress("lsst.daf.butler.Registry.certify", level=logging.DEBUG)
745 collectionRecord = self._managers.collections.find(collection)
746 for datasetType, refsForType in progress.iter_item_chunks(
747 DatasetRef.groupByType(refs).items(), desc="Certifying datasets by type"
748 ):
749 storage = self._managers.datasets[datasetType.name]
750 storage.certify(
751 collectionRecord,
752 refsForType,
753 timespan,
754 context=queries.SqlQueryContext(self._db, self._managers.column_types),
755 )
757 @transactional
758 def decertify(
759 self,
760 collection: str,
761 datasetType: str | DatasetType,
762 timespan: Timespan,
763 *,
764 dataIds: Iterable[DataId] | None = None,
765 ) -> None:
766 # Docstring inherited from lsst.daf.butler.registry.Registry
767 collectionRecord = self._managers.collections.find(collection)
768 if isinstance(datasetType, str):
769 storage = self._managers.datasets[datasetType]
770 else:
771 storage = self._managers.datasets[datasetType.name]
772 standardizedDataIds = None
773 if dataIds is not None:
774 standardizedDataIds = [
775 DataCoordinate.standardize(d, graph=storage.datasetType.dimensions) for d in dataIds
776 ]
777 storage.decertify(
778 collectionRecord,
779 timespan,
780 dataIds=standardizedDataIds,
781 context=queries.SqlQueryContext(self._db, self._managers.column_types),
782 )
784 def getDatastoreBridgeManager(self) -> DatastoreRegistryBridgeManager:
785 """Return an object that allows a new `Datastore` instance to
786 communicate with this `Registry`.
788 Returns
789 -------
790 manager : `DatastoreRegistryBridgeManager`
791 Object that mediates communication between this `Registry` and its
792 associated datastores.
793 """
794 return self._managers.datastores
796 def getDatasetLocations(self, ref: DatasetRef) -> Iterable[str]:
797 # Docstring inherited from lsst.daf.butler.registry.Registry
798 return self._managers.datastores.findDatastores(ref)
800 def expandDataId(
801 self,
802 dataId: DataId | None = None,
803 *,
804 graph: DimensionGraph | None = None,
805 records: NameLookupMapping[DimensionElement, DimensionRecord | None] | None = None,
806 withDefaults: bool = True,
807 **kwargs: Any,
808 ) -> DataCoordinate:
809 # Docstring inherited from lsst.daf.butler.registry.Registry
810 if not withDefaults:
811 defaults = None
812 else:
813 defaults = self.defaults.dataId
814 try:
815 standardized = DataCoordinate.standardize(
816 dataId, graph=graph, universe=self.dimensions, defaults=defaults, **kwargs
817 )
818 except KeyError as exc:
819 # This means either kwargs have some odd name or required
820 # dimension is missing.
821 raise DimensionNameError(str(exc)) from exc
822 if standardized.hasRecords():
823 return standardized
824 if records is None:
825 records = {}
826 elif isinstance(records, NamedKeyMapping):
827 records = records.byName()
828 else:
829 records = dict(records)
830 if isinstance(dataId, DataCoordinate) and dataId.hasRecords():
831 records.update(dataId.records.byName())
832 keys = standardized.byName()
833 context = queries.SqlQueryContext(self._db, self._managers.column_types)
834 for element in standardized.graph.primaryKeyTraversalOrder:
835 record = records.get(element.name, ...) # Use ... to mean not found; None might mean NULL
836 if record is ...:
837 if isinstance(element, Dimension) and keys.get(element.name) is None:
838 if element in standardized.graph.required:
839 raise DimensionNameError(
840 f"No value or null value for required dimension {element.name}."
841 )
842 keys[element.name] = None
843 record = None
844 else:
845 storage = self._managers.dimensions[element]
846 record = storage.fetch_one(DataCoordinate.standardize(keys, graph=element.graph), context)
847 records[element.name] = record
848 if record is not None:
849 for d in element.implied:
850 value = getattr(record, d.name)
851 if keys.setdefault(d.name, value) != value:
852 raise InconsistentDataIdError(
853 f"Data ID {standardized} has {d.name}={keys[d.name]!r}, "
854 f"but {element.name} implies {d.name}={value!r}."
855 )
856 else:
857 if element in standardized.graph.required:
858 raise DataIdValueError(
859 f"Could not fetch record for required dimension {element.name} via keys {keys}."
860 )
861 if element.alwaysJoin:
862 raise InconsistentDataIdError(
863 f"Could not fetch record for element {element.name} via keys {keys}, ",
864 "but it is marked alwaysJoin=True; this means one or more dimensions are not "
865 "related.",
866 )
867 for d in element.implied:
868 keys.setdefault(d.name, None)
869 records.setdefault(d.name, None)
870 return DataCoordinate.standardize(keys, graph=standardized.graph).expanded(records=records)
872 def insertDimensionData(
873 self,
874 element: DimensionElement | str,
875 *data: Mapping[str, Any] | DimensionRecord,
876 conform: bool = True,
877 replace: bool = False,
878 skip_existing: bool = False,
879 ) -> None:
880 # Docstring inherited from lsst.daf.butler.registry.Registry
881 if conform:
882 if isinstance(element, str):
883 element = self.dimensions[element]
884 records = [
885 row if isinstance(row, DimensionRecord) else element.RecordClass(**row) for row in data
886 ]
887 else:
888 # Ignore typing since caller said to trust them with conform=False.
889 records = data # type: ignore
890 storage = self._managers.dimensions[element]
891 storage.insert(*records, replace=replace, skip_existing=skip_existing)
893 def syncDimensionData(
894 self,
895 element: DimensionElement | str,
896 row: Mapping[str, Any] | DimensionRecord,
897 conform: bool = True,
898 update: bool = False,
899 ) -> bool | dict[str, Any]:
900 # Docstring inherited from lsst.daf.butler.registry.Registry
901 if conform:
902 if isinstance(element, str):
903 element = self.dimensions[element]
904 record = row if isinstance(row, DimensionRecord) else element.RecordClass(**row)
905 else:
906 # Ignore typing since caller said to trust them with conform=False.
907 record = row # type: ignore
908 storage = self._managers.dimensions[element]
909 return storage.sync(record, update=update)
911 def queryDatasetTypes(
912 self,
913 expression: Any = ...,
914 *,
915 components: bool | None = None,
916 missing: list[str] | None = None,
917 ) -> Iterable[DatasetType]:
918 # Docstring inherited from lsst.daf.butler.registry.Registry
919 wildcard = DatasetTypeWildcard.from_expression(expression)
920 composition_dict = self._managers.datasets.resolve_wildcard(
921 wildcard,
922 components=components,
923 missing=missing,
924 )
925 result: list[DatasetType] = []
926 for parent_dataset_type, components_for_parent in composition_dict.items():
927 result.extend(
928 parent_dataset_type.makeComponentDatasetType(c) if c is not None else parent_dataset_type
929 for c in components_for_parent
930 )
931 return result
933 def queryCollections(
934 self,
935 expression: Any = ...,
936 datasetType: DatasetType | None = None,
937 collectionTypes: Iterable[CollectionType] | CollectionType = CollectionType.all(),
938 flattenChains: bool = False,
939 includeChains: bool | None = None,
940 ) -> Sequence[str]:
941 # Docstring inherited from lsst.daf.butler.registry.Registry
943 # Right now the datasetTypes argument is completely ignored, but that
944 # is consistent with its [lack of] guarantees. DM-24939 or a follow-up
945 # ticket will take care of that.
946 try:
947 wildcard = CollectionWildcard.from_expression(expression)
948 except TypeError as exc:
949 raise CollectionExpressionError(f"Invalid collection expression '{expression}'") from exc
950 collectionTypes = ensure_iterable(collectionTypes)
951 return [
952 record.name
953 for record in self._managers.collections.resolve_wildcard(
954 wildcard,
955 collection_types=frozenset(collectionTypes),
956 flatten_chains=flattenChains,
957 include_chains=includeChains,
958 )
959 ]
961 def _makeQueryBuilder(
962 self,
963 summary: queries.QuerySummary,
964 doomed_by: Iterable[str] = (),
965 ) -> queries.QueryBuilder:
966 """Return a `QueryBuilder` instance capable of constructing and
967 managing more complex queries than those obtainable via `Registry`
968 interfaces.
970 This is an advanced interface; downstream code should prefer
971 `Registry.queryDataIds` and `Registry.queryDatasets` whenever those
972 are sufficient.
974 Parameters
975 ----------
976 summary : `queries.QuerySummary`
977 Object describing and categorizing the full set of dimensions that
978 will be included in the query.
979 doomed_by : `~collections.abc.Iterable` of `str`, optional
980 A list of diagnostic messages that indicate why the query is going
981 to yield no results and should not even be executed. If an empty
982 container (default) the query will be executed unless other code
983 determines that it is doomed.
985 Returns
986 -------
987 builder : `queries.QueryBuilder`
988 Object that can be used to construct and perform advanced queries.
989 """
990 doomed_by = list(doomed_by)
991 backend = queries.SqlQueryBackend(self._db, self._managers)
992 context = backend.context()
993 relation: Relation | None = None
994 if doomed_by:
995 relation = LeafRelation.make_doomed(context.sql_engine, set(), doomed_by)
996 return queries.QueryBuilder(
997 summary,
998 backend=backend,
999 context=context,
1000 relation=relation,
1001 )
1003 def _standardize_query_data_id_args(
1004 self, data_id: DataId | None, *, doomed_by: list[str], **kwargs: Any
1005 ) -> DataCoordinate:
1006 """Preprocess the data ID arguments passed to query* methods.
1008 Parameters
1009 ----------
1010 data_id : `DataId` or `None`
1011 Data ID that constrains the query results.
1012 doomed_by : `list` [ `str` ]
1013 List to append messages indicating why the query is doomed to
1014 yield no results.
1015 **kwargs
1016 Additional data ID key-value pairs, extending and overriding
1017 ``data_id``.
1019 Returns
1020 -------
1021 data_id : `DataCoordinate`
1022 Standardized data ID. Will be fully expanded unless expansion
1023 fails, in which case a message will be appended to ``doomed_by``
1024 on return.
1025 """
1026 try:
1027 return self.expandDataId(data_id, **kwargs)
1028 except DataIdValueError as err:
1029 doomed_by.append(str(err))
1030 return DataCoordinate.standardize(
1031 data_id, **kwargs, universe=self.dimensions, defaults=self.defaults.dataId
1032 )
1034 def _standardize_query_dataset_args(
1035 self,
1036 datasets: Any,
1037 collections: CollectionArgType | None,
1038 components: bool | None,
1039 mode: Literal["find_first"] | Literal["find_all"] | Literal["constrain"] = "constrain",
1040 *,
1041 doomed_by: list[str],
1042 ) -> tuple[dict[DatasetType, list[str | None]], CollectionWildcard | None]:
1043 """Preprocess dataset arguments passed to query* methods.
1045 Parameters
1046 ----------
1047 datasets : `DatasetType`, `str`, `re.Pattern`, or iterable of these
1048 Expression identifying dataset types. See `queryDatasetTypes` for
1049 details.
1050 collections : `str`, `re.Pattern`, or iterable of these
1051 Expression identifying collections to be searched. See
1052 `queryCollections` for details.
1053 components : `bool`, optional
1054 If `True`, apply all expression patterns to component dataset type
1055 names as well. If `False`, never apply patterns to components.
1056 If `None` (default), apply patterns to components only if their
1057 parent datasets were not matched by the expression.
1058 Fully-specified component datasets (`str` or `DatasetType`
1059 instances) are always included.
1061 Values other than `False` are deprecated, and only `False` will be
1062 supported after v26. After v27 this argument will be removed
1063 entirely.
1064 mode : `str`, optional
1065 The way in which datasets are being used in this query; one of:
1067 - "find_first": this is a query for the first dataset in an
1068 ordered list of collections. Prohibits collection wildcards,
1069 but permits dataset type wildcards.
1071 - "find_all": this is a query for all datasets in all matched
1072 collections. Permits collection and dataset type wildcards.
1074 - "constrain": this is a query for something other than datasets,
1075 with results constrained by dataset existence. Permits
1076 collection wildcards and prohibits ``...`` as a dataset type
1077 wildcard.
1078 doomed_by : `list` [ `str` ]
1079 List to append messages indicating why the query is doomed to
1080 yield no results.
1082 Returns
1083 -------
1084 composition : `defaultdict` [ `DatasetType`, `list` [ `str` ] ]
1085 Dictionary mapping parent dataset type to `list` of components
1086 matched for that dataset type (or `None` for the parent itself).
1087 collections : `CollectionWildcard`
1088 Processed collection expression.
1089 """
1090 composition: dict[DatasetType, list[str | None]] = {}
1091 collection_wildcard: CollectionWildcard | None = None
1092 if datasets is not None:
1093 if collections is None:
1094 if not self.defaults.collections:
1095 raise NoDefaultCollectionError("No collections, and no registry default collections.")
1096 collection_wildcard = CollectionWildcard.from_expression(self.defaults.collections)
1097 else:
1098 collection_wildcard = CollectionWildcard.from_expression(collections)
1099 if mode == "find_first" and collection_wildcard.patterns:
1100 raise TypeError(
1101 f"Collection pattern(s) {collection_wildcard.patterns} not allowed in this context."
1102 )
1103 missing: list[str] = []
1104 composition = self._managers.datasets.resolve_wildcard(
1105 datasets, components=components, missing=missing, explicit_only=(mode == "constrain")
1106 )
1107 if missing and mode == "constrain":
1108 # After v26 this should raise MissingDatasetTypeError, to be
1109 # implemented on DM-36303.
1110 warnings.warn(
1111 f"Dataset type(s) {missing} are not registered; this will be an error after v26.",
1112 FutureWarning,
1113 stacklevel=find_outside_stacklevel("lsst.daf.butler"),
1114 )
1115 doomed_by.extend(f"Dataset type {name} is not registered." for name in missing)
1116 elif collections:
1117 # I think this check should actually be `collections is not None`,
1118 # but it looks like some CLI scripts use empty tuple as default.
1119 raise ArgumentError(f"Cannot pass 'collections' (='{collections}') without 'datasets'.")
1120 return composition, collection_wildcard
1122 def queryDatasets(
1123 self,
1124 datasetType: Any,
1125 *,
1126 collections: CollectionArgType | None = None,
1127 dimensions: Iterable[Dimension | str] | None = None,
1128 dataId: DataId | None = None,
1129 where: str = "",
1130 findFirst: bool = False,
1131 components: bool | None = None,
1132 bind: Mapping[str, Any] | None = None,
1133 check: bool = True,
1134 **kwargs: Any,
1135 ) -> queries.DatasetQueryResults:
1136 # Docstring inherited from lsst.daf.butler.registry.Registry
1137 doomed_by: list[str] = []
1138 data_id = self._standardize_query_data_id_args(dataId, doomed_by=doomed_by, **kwargs)
1139 dataset_composition, collection_wildcard = self._standardize_query_dataset_args(
1140 datasetType,
1141 collections,
1142 components,
1143 mode="find_first" if findFirst else "find_all",
1144 doomed_by=doomed_by,
1145 )
1146 if collection_wildcard is not None and collection_wildcard.empty():
1147 doomed_by.append("No datasets can be found because collection list is empty.")
1148 return queries.ChainedDatasetQueryResults([], doomed_by=doomed_by)
1149 parent_results: list[queries.ParentDatasetQueryResults] = []
1150 for parent_dataset_type, components_for_parent in dataset_composition.items():
1151 # The full set of dimensions in the query is the combination of
1152 # those needed for the DatasetType and those explicitly requested,
1153 # if any.
1154 dimension_names = set(parent_dataset_type.dimensions.names)
1155 if dimensions is not None:
1156 dimension_names.update(self.dimensions.extract(dimensions).names)
1157 # Construct the summary structure needed to construct a
1158 # QueryBuilder.
1159 summary = queries.QuerySummary(
1160 requested=DimensionGraph(self.dimensions, names=dimension_names),
1161 column_types=self._managers.column_types,
1162 data_id=data_id,
1163 expression=where,
1164 bind=bind,
1165 defaults=self.defaults.dataId,
1166 check=check,
1167 datasets=[parent_dataset_type],
1168 )
1169 builder = self._makeQueryBuilder(summary)
1170 # Add the dataset subquery to the query, telling the QueryBuilder
1171 # to include the rank of the selected collection in the results
1172 # only if we need to findFirst. Note that if any of the
1173 # collections are actually wildcard expressions, and
1174 # findFirst=True, this will raise TypeError for us.
1175 builder.joinDataset(parent_dataset_type, collection_wildcard, isResult=True, findFirst=findFirst)
1176 query = builder.finish()
1177 parent_results.append(
1178 queries.ParentDatasetQueryResults(
1179 query, parent_dataset_type, components=components_for_parent
1180 )
1181 )
1182 if not parent_results:
1183 doomed_by.extend(
1184 f"No registered dataset type matching {t!r} found, so no matching datasets can "
1185 "exist in any collection."
1186 for t in ensure_iterable(datasetType)
1187 )
1188 return queries.ChainedDatasetQueryResults([], doomed_by=doomed_by)
1189 elif len(parent_results) == 1:
1190 return parent_results[0]
1191 else:
1192 return queries.ChainedDatasetQueryResults(parent_results)
1194 def queryDataIds(
1195 self,
1196 dimensions: Iterable[Dimension | str] | Dimension | str,
1197 *,
1198 dataId: DataId | None = None,
1199 datasets: Any = None,
1200 collections: CollectionArgType | None = None,
1201 where: str = "",
1202 components: bool | None = None,
1203 bind: Mapping[str, Any] | None = None,
1204 check: bool = True,
1205 **kwargs: Any,
1206 ) -> queries.DataCoordinateQueryResults:
1207 # Docstring inherited from lsst.daf.butler.registry.Registry
1208 dimensions = ensure_iterable(dimensions)
1209 requestedDimensions = self.dimensions.extract(dimensions)
1210 doomed_by: list[str] = []
1211 data_id = self._standardize_query_data_id_args(dataId, doomed_by=doomed_by, **kwargs)
1212 dataset_composition, collection_wildcard = self._standardize_query_dataset_args(
1213 datasets, collections, components, doomed_by=doomed_by
1214 )
1215 if collection_wildcard is not None and collection_wildcard.empty():
1216 doomed_by.append("No data coordinates can be found because collection list is empty.")
1217 summary = queries.QuerySummary(
1218 requested=requestedDimensions,
1219 column_types=self._managers.column_types,
1220 data_id=data_id,
1221 expression=where,
1222 bind=bind,
1223 defaults=self.defaults.dataId,
1224 check=check,
1225 datasets=dataset_composition.keys(),
1226 )
1227 builder = self._makeQueryBuilder(summary, doomed_by=doomed_by)
1228 for datasetType in dataset_composition.keys():
1229 builder.joinDataset(datasetType, collection_wildcard, isResult=False)
1230 query = builder.finish()
1232 return queries.DataCoordinateQueryResults(query)
1234 def queryDimensionRecords(
1235 self,
1236 element: DimensionElement | str,
1237 *,
1238 dataId: DataId | None = None,
1239 datasets: Any = None,
1240 collections: CollectionArgType | None = None,
1241 where: str = "",
1242 components: bool | None = None,
1243 bind: Mapping[str, Any] | None = None,
1244 check: bool = True,
1245 **kwargs: Any,
1246 ) -> queries.DimensionRecordQueryResults:
1247 # Docstring inherited from lsst.daf.butler.registry.Registry
1248 if not isinstance(element, DimensionElement):
1249 try:
1250 element = self.dimensions[element]
1251 except KeyError as e:
1252 raise DimensionNameError(
1253 f"No such dimension '{element}', available dimensions: "
1254 + str(self.dimensions.getStaticElements())
1255 ) from e
1256 doomed_by: list[str] = []
1257 data_id = self._standardize_query_data_id_args(dataId, doomed_by=doomed_by, **kwargs)
1258 dataset_composition, collection_wildcard = self._standardize_query_dataset_args(
1259 datasets, collections, components, doomed_by=doomed_by
1260 )
1261 if collection_wildcard is not None and collection_wildcard.empty():
1262 doomed_by.append("No dimension records can be found because collection list is empty.")
1263 summary = queries.QuerySummary(
1264 requested=element.graph,
1265 column_types=self._managers.column_types,
1266 data_id=data_id,
1267 expression=where,
1268 bind=bind,
1269 defaults=self.defaults.dataId,
1270 check=check,
1271 datasets=dataset_composition.keys(),
1272 )
1273 builder = self._makeQueryBuilder(summary, doomed_by=doomed_by)
1274 for datasetType in dataset_composition.keys():
1275 builder.joinDataset(datasetType, collection_wildcard, isResult=False)
1276 query = builder.finish().with_record_columns(element)
1277 return queries.DatabaseDimensionRecordQueryResults(query, element)
1279 def queryDatasetAssociations(
1280 self,
1281 datasetType: str | DatasetType,
1282 collections: CollectionArgType | None = ...,
1283 *,
1284 collectionTypes: Iterable[CollectionType] = CollectionType.all(),
1285 flattenChains: bool = False,
1286 ) -> Iterator[DatasetAssociation]:
1287 # Docstring inherited from lsst.daf.butler.registry.Registry
1288 if collections is None:
1289 if not self.defaults.collections:
1290 raise NoDefaultCollectionError(
1291 "No collections provided to queryDatasetAssociations, "
1292 "and no defaults from registry construction."
1293 )
1294 collections = self.defaults.collections
1295 collection_wildcard = CollectionWildcard.from_expression(collections)
1296 backend = queries.SqlQueryBackend(self._db, self._managers)
1297 parent_dataset_type, _ = backend.resolve_single_dataset_type_wildcard(datasetType, components=False)
1298 timespan_tag = DatasetColumnTag(parent_dataset_type.name, "timespan")
1299 collection_tag = DatasetColumnTag(parent_dataset_type.name, "collection")
1300 for parent_collection_record in backend.resolve_collection_wildcard(
1301 collection_wildcard,
1302 collection_types=frozenset(collectionTypes),
1303 flatten_chains=flattenChains,
1304 ):
1305 # Resolve this possibly-chained collection into a list of
1306 # non-CHAINED collections that actually hold datasets of this
1307 # type.
1308 candidate_collection_records = backend.resolve_dataset_collections(
1309 parent_dataset_type,
1310 CollectionWildcard.from_names([parent_collection_record.name]),
1311 allow_calibration_collections=True,
1312 governor_constraints={},
1313 )
1314 if not candidate_collection_records:
1315 continue
1316 with backend.context() as context:
1317 relation = backend.make_dataset_query_relation(
1318 parent_dataset_type,
1319 candidate_collection_records,
1320 columns={"dataset_id", "run", "timespan", "collection"},
1321 context=context,
1322 )
1323 reader = queries.DatasetRefReader(
1324 parent_dataset_type,
1325 translate_collection=lambda k: self._managers.collections[k].name,
1326 full=False,
1327 )
1328 for row in context.fetch_iterable(relation):
1329 ref = reader.read(row)
1330 collection_record = self._managers.collections[row[collection_tag]]
1331 if collection_record.type is CollectionType.CALIBRATION:
1332 timespan = row[timespan_tag]
1333 else:
1334 # For backwards compatibility and (possibly?) user
1335 # convenience we continue to define the timespan of a
1336 # DatasetAssociation row for a non-CALIBRATION
1337 # collection to be None rather than a fully unbounded
1338 # timespan.
1339 timespan = None
1340 yield DatasetAssociation(ref=ref, collection=collection_record.name, timespan=timespan)
1342 @property
1343 def obsCoreTableManager(self) -> ObsCoreTableManager | None:
1344 # Docstring inherited from lsst.daf.butler.registry.Registry
1345 return self._managers.obscore
1347 storageClasses: StorageClassFactory
1348 """All storage classes known to the registry (`StorageClassFactory`).
1349 """