Coverage for python/lsst/daf/butler/registries/sql.py: 12%
514 statements
« prev ^ index » next coverage.py v7.2.5, created at 2023-05-09 02:11 -0700
« prev ^ index » next coverage.py v7.2.5, created at 2023-05-09 02:11 -0700
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = ("SqlRegistry",)
26import contextlib
27import logging
28import warnings
29from typing import (
30 TYPE_CHECKING,
31 Any,
32 Dict,
33 Iterable,
34 Iterator,
35 List,
36 Literal,
37 Mapping,
38 Optional,
39 Sequence,
40 Set,
41 Union,
42 cast,
43)
45import sqlalchemy
46from lsst.daf.relation import LeafRelation, Relation
47from lsst.resources import ResourcePathExpression
48from lsst.utils.ellipsis import Ellipsis
49from lsst.utils.iteration import ensure_iterable
51from ..core import (
52 Config,
53 DataCoordinate,
54 DataId,
55 DatasetAssociation,
56 DatasetColumnTag,
57 DatasetId,
58 DatasetIdFactory,
59 DatasetIdGenEnum,
60 DatasetRef,
61 DatasetType,
62 Dimension,
63 DimensionConfig,
64 DimensionElement,
65 DimensionGraph,
66 DimensionRecord,
67 DimensionUniverse,
68 NamedKeyMapping,
69 NameLookupMapping,
70 Progress,
71 StorageClassFactory,
72 Timespan,
73 ddl,
74)
75from ..core.utils import transactional
76from ..registry import (
77 ArgumentError,
78 CollectionExpressionError,
79 CollectionSummary,
80 CollectionType,
81 CollectionTypeError,
82 ConflictingDefinitionError,
83 DataIdValueError,
84 DatasetTypeError,
85 DimensionNameError,
86 InconsistentDataIdError,
87 NoDefaultCollectionError,
88 OrphanedRecordError,
89 Registry,
90 RegistryConfig,
91 RegistryDefaults,
92 queries,
93)
94from ..registry.interfaces import ChainedCollectionRecord, RunRecord
95from ..registry.managers import RegistryManagerInstances, RegistryManagerTypes
96from ..registry.wildcards import CollectionWildcard, DatasetTypeWildcard
98if TYPE_CHECKING:
99 from .._butlerConfig import ButlerConfig
100 from ..registry._registry import CollectionArgType
101 from ..registry.interfaces import (
102 CollectionRecord,
103 Database,
104 DatastoreRegistryBridgeManager,
105 ObsCoreTableManager,
106 )
109_LOG = logging.getLogger(__name__)
112class SqlRegistry(Registry):
113 """Registry implementation based on SQLAlchemy.
115 Parameters
116 ----------
117 database : `Database`
118 Database instance to store Registry.
119 defaults : `RegistryDefaults`
120 Default collection search path and/or output `~CollectionType.RUN`
121 collection.
122 managers : `RegistryManagerInstances`
123 All the managers required for this registry.
124 """
126 defaultConfigFile: Optional[str] = None
127 """Path to configuration defaults. Accessed within the ``configs`` resource
128 or relative to a search path. Can be None if no defaults specified.
129 """
131 @classmethod
132 def createFromConfig(
133 cls,
134 config: Optional[Union[RegistryConfig, str]] = None,
135 dimensionConfig: Optional[Union[DimensionConfig, str]] = None,
136 butlerRoot: Optional[ResourcePathExpression] = None,
137 ) -> Registry:
138 """Create registry database and return `SqlRegistry` instance.
140 This method initializes database contents, database must be empty
141 prior to calling this method.
143 Parameters
144 ----------
145 config : `RegistryConfig` or `str`, optional
146 Registry configuration, if missing then default configuration will
147 be loaded from registry.yaml.
148 dimensionConfig : `DimensionConfig` or `str`, optional
149 Dimensions configuration, if missing then default configuration
150 will be loaded from dimensions.yaml.
151 butlerRoot : convertible to `lsst.resources.ResourcePath`, optional
152 Path to the repository root this `SqlRegistry` will manage.
154 Returns
155 -------
156 registry : `SqlRegistry`
157 A new `SqlRegistry` instance.
158 """
159 config = cls.forceRegistryConfig(config)
160 config.replaceRoot(butlerRoot)
162 if isinstance(dimensionConfig, str):
163 dimensionConfig = DimensionConfig(dimensionConfig)
164 elif dimensionConfig is None:
165 dimensionConfig = DimensionConfig()
166 elif not isinstance(dimensionConfig, DimensionConfig):
167 raise TypeError(f"Incompatible Dimension configuration type: {type(dimensionConfig)}")
169 DatabaseClass = config.getDatabaseClass()
170 database = DatabaseClass.fromUri(
171 str(config.connectionString), origin=config.get("origin", 0), namespace=config.get("namespace")
172 )
173 managerTypes = RegistryManagerTypes.fromConfig(config)
174 managers = managerTypes.makeRepo(database, dimensionConfig)
175 return cls(database, RegistryDefaults(), managers)
177 @classmethod
178 def fromConfig(
179 cls,
180 config: Union[ButlerConfig, RegistryConfig, Config, str],
181 butlerRoot: Optional[ResourcePathExpression] = None,
182 writeable: bool = True,
183 defaults: Optional[RegistryDefaults] = None,
184 ) -> Registry:
185 """Create `Registry` subclass instance from `config`.
187 Registry database must be initialized prior to calling this method.
189 Parameters
190 ----------
191 config : `ButlerConfig`, `RegistryConfig`, `Config` or `str`
192 Registry configuration
193 butlerRoot : `lsst.resources.ResourcePathExpression`, optional
194 Path to the repository root this `Registry` will manage.
195 writeable : `bool`, optional
196 If `True` (default) create a read-write connection to the database.
197 defaults : `RegistryDefaults`, optional
198 Default collection search path and/or output `~CollectionType.RUN`
199 collection.
201 Returns
202 -------
203 registry : `SqlRegistry` (subclass)
204 A new `SqlRegistry` subclass instance.
205 """
206 config = cls.forceRegistryConfig(config)
207 config.replaceRoot(butlerRoot)
208 DatabaseClass = config.getDatabaseClass()
209 database = DatabaseClass.fromUri(
210 config.connectionString.render_as_string(hide_password=False),
211 origin=config.get("origin", 0),
212 namespace=config.get("namespace"),
213 writeable=writeable,
214 )
215 managerTypes = RegistryManagerTypes.fromConfig(config)
216 with database.session():
217 managers = managerTypes.loadRepo(database)
218 if defaults is None:
219 defaults = RegistryDefaults()
220 return cls(database, defaults, managers)
222 def __init__(self, database: Database, defaults: RegistryDefaults, managers: RegistryManagerInstances):
223 self._db = database
224 self._managers = managers
225 self.storageClasses = StorageClassFactory()
226 # Intentionally invoke property setter to initialize defaults. This
227 # can only be done after most of the rest of Registry has already been
228 # initialized, and must be done before the property getter is used.
229 self.defaults = defaults
230 # In the future DatasetIdFactory may become configurable and this
231 # instance will need to be shared with datasets manager.
232 self.datasetIdFactory = DatasetIdFactory()
234 def __str__(self) -> str:
235 return str(self._db)
237 def __repr__(self) -> str:
238 return f"SqlRegistry({self._db!r}, {self.dimensions!r})"
240 def isWriteable(self) -> bool:
241 # Docstring inherited from lsst.daf.butler.registry.Registry
242 return self._db.isWriteable()
244 def copy(self, defaults: Optional[RegistryDefaults] = None) -> Registry:
245 # Docstring inherited from lsst.daf.butler.registry.Registry
246 if defaults is None:
247 # No need to copy, because `RegistryDefaults` is immutable; we
248 # effectively copy on write.
249 defaults = self.defaults
250 return type(self)(self._db, defaults, self._managers)
252 @property
253 def dimensions(self) -> DimensionUniverse:
254 # Docstring inherited from lsst.daf.butler.registry.Registry
255 return self._managers.dimensions.universe
257 def refresh(self) -> None:
258 # Docstring inherited from lsst.daf.butler.registry.Registry
259 with self._db.transaction():
260 self._managers.refresh()
262 @contextlib.contextmanager
263 def transaction(self, *, savepoint: bool = False) -> Iterator[None]:
264 # Docstring inherited from lsst.daf.butler.registry.Registry
265 try:
266 with self._db.transaction(savepoint=savepoint):
267 yield
268 except BaseException:
269 # TODO: this clears the caches sometimes when we wouldn't actually
270 # need to. Can we avoid that?
271 self._managers.dimensions.clearCaches()
272 raise
274 def resetConnectionPool(self) -> None:
275 """Reset SQLAlchemy connection pool for `SqlRegistry` database.
277 This operation is useful when using registry with fork-based
278 multiprocessing. To use registry across fork boundary one has to make
279 sure that there are no currently active connections (no session or
280 transaction is in progress) and connection pool is reset using this
281 method. This method should be called by the child process immediately
282 after the fork.
283 """
284 self._db._engine.dispose()
286 def registerOpaqueTable(self, tableName: str, spec: ddl.TableSpec) -> None:
287 """Add an opaque (to the `Registry`) table for use by a `Datastore` or
288 other data repository client.
290 Opaque table records can be added via `insertOpaqueData`, retrieved via
291 `fetchOpaqueData`, and removed via `deleteOpaqueData`.
293 Parameters
294 ----------
295 tableName : `str`
296 Logical name of the opaque table. This may differ from the
297 actual name used in the database by a prefix and/or suffix.
298 spec : `ddl.TableSpec`
299 Specification for the table to be added.
300 """
301 self._managers.opaque.register(tableName, spec)
303 @transactional
304 def insertOpaqueData(self, tableName: str, *data: dict) -> None:
305 """Insert records into an opaque table.
307 Parameters
308 ----------
309 tableName : `str`
310 Logical name of the opaque table. Must match the name used in a
311 previous call to `registerOpaqueTable`.
312 data
313 Each additional positional argument is a dictionary that represents
314 a single row to be added.
315 """
316 self._managers.opaque[tableName].insert(*data)
318 def fetchOpaqueData(self, tableName: str, **where: Any) -> Iterator[Mapping[str, Any]]:
319 """Retrieve records from an opaque table.
321 Parameters
322 ----------
323 tableName : `str`
324 Logical name of the opaque table. Must match the name used in a
325 previous call to `registerOpaqueTable`.
326 where
327 Additional keyword arguments are interpreted as equality
328 constraints that restrict the returned rows (combined with AND);
329 keyword arguments are column names and values are the values they
330 must have.
332 Yields
333 ------
334 row : `dict`
335 A dictionary representing a single result row.
336 """
337 yield from self._managers.opaque[tableName].fetch(**where)
339 @transactional
340 def deleteOpaqueData(self, tableName: str, **where: Any) -> None:
341 """Remove records from an opaque table.
343 Parameters
344 ----------
345 tableName : `str`
346 Logical name of the opaque table. Must match the name used in a
347 previous call to `registerOpaqueTable`.
348 where
349 Additional keyword arguments are interpreted as equality
350 constraints that restrict the deleted rows (combined with AND);
351 keyword arguments are column names and values are the values they
352 must have.
353 """
354 self._managers.opaque[tableName].delete(where.keys(), where)
356 def registerCollection(
357 self, name: str, type: CollectionType = CollectionType.TAGGED, doc: Optional[str] = None
358 ) -> bool:
359 # Docstring inherited from lsst.daf.butler.registry.Registry
360 _, registered = self._managers.collections.register(name, type, doc=doc)
361 return registered
363 def getCollectionType(self, name: str) -> CollectionType:
364 # Docstring inherited from lsst.daf.butler.registry.Registry
365 return self._managers.collections.find(name).type
367 def _get_collection_record(self, name: str) -> CollectionRecord:
368 # Docstring inherited from lsst.daf.butler.registry.Registry
369 return self._managers.collections.find(name)
371 def registerRun(self, name: str, doc: Optional[str] = None) -> bool:
372 # Docstring inherited from lsst.daf.butler.registry.Registry
373 _, registered = self._managers.collections.register(name, CollectionType.RUN, doc=doc)
374 return registered
376 @transactional
377 def removeCollection(self, name: str) -> None:
378 # Docstring inherited from lsst.daf.butler.registry.Registry
379 self._managers.collections.remove(name)
381 def getCollectionChain(self, parent: str) -> tuple[str, ...]:
382 # Docstring inherited from lsst.daf.butler.registry.Registry
383 record = self._managers.collections.find(parent)
384 if record.type is not CollectionType.CHAINED:
385 raise CollectionTypeError(f"Collection '{parent}' has type {record.type.name}, not CHAINED.")
386 assert isinstance(record, ChainedCollectionRecord)
387 return record.children
389 @transactional
390 def setCollectionChain(self, parent: str, children: Any, *, flatten: bool = False) -> None:
391 # Docstring inherited from lsst.daf.butler.registry.Registry
392 record = self._managers.collections.find(parent)
393 if record.type is not CollectionType.CHAINED:
394 raise CollectionTypeError(f"Collection '{parent}' has type {record.type.name}, not CHAINED.")
395 assert isinstance(record, ChainedCollectionRecord)
396 children = CollectionWildcard.from_expression(children).require_ordered()
397 if children != record.children or flatten:
398 record.update(self._managers.collections, children, flatten=flatten)
400 def getCollectionParentChains(self, collection: str) -> Set[str]:
401 # Docstring inherited from lsst.daf.butler.registry.Registry
402 return {
403 record.name
404 for record in self._managers.collections.getParentChains(
405 self._managers.collections.find(collection).key
406 )
407 }
409 def getCollectionDocumentation(self, collection: str) -> Optional[str]:
410 # Docstring inherited from lsst.daf.butler.registry.Registry
411 return self._managers.collections.getDocumentation(self._managers.collections.find(collection).key)
413 def setCollectionDocumentation(self, collection: str, doc: Optional[str]) -> None:
414 # Docstring inherited from lsst.daf.butler.registry.Registry
415 self._managers.collections.setDocumentation(self._managers.collections.find(collection).key, doc)
417 def getCollectionSummary(self, collection: str) -> CollectionSummary:
418 # Docstring inherited from lsst.daf.butler.registry.Registry
419 record = self._managers.collections.find(collection)
420 return self._managers.datasets.getCollectionSummary(record)
422 def registerDatasetType(self, datasetType: DatasetType) -> bool:
423 # Docstring inherited from lsst.daf.butler.registry.Registry
424 _, inserted = self._managers.datasets.register(datasetType)
425 return inserted
427 def removeDatasetType(self, name: str | tuple[str, ...]) -> None:
428 # Docstring inherited from lsst.daf.butler.registry.Registry
430 for datasetTypeExpression in ensure_iterable(name):
431 datasetTypes = list(self.queryDatasetTypes(datasetTypeExpression))
432 if not datasetTypes:
433 _LOG.info("Dataset type %r not defined", datasetTypeExpression)
434 else:
435 for datasetType in datasetTypes:
436 self._managers.datasets.remove(datasetType.name)
437 _LOG.info("Removed dataset type %r", datasetType.name)
439 def getDatasetType(self, name: str) -> DatasetType:
440 # Docstring inherited from lsst.daf.butler.registry.Registry
441 parent_name, component = DatasetType.splitDatasetTypeName(name)
442 storage = self._managers.datasets[parent_name]
443 if component is None:
444 return storage.datasetType
445 else:
446 return storage.datasetType.makeComponentDatasetType(component)
448 def supportsIdGenerationMode(self, mode: DatasetIdGenEnum) -> bool:
449 # Docstring inherited from lsst.daf.butler.registry.Registry
450 return self._managers.datasets.supportsIdGenerationMode(mode)
452 def findDataset(
453 self,
454 datasetType: Union[DatasetType, str],
455 dataId: Optional[DataId] = None,
456 *,
457 collections: CollectionArgType | None = None,
458 timespan: Optional[Timespan] = None,
459 **kwargs: Any,
460 ) -> Optional[DatasetRef]:
461 # Docstring inherited from lsst.daf.butler.registry.Registry
462 if collections is None:
463 if not self.defaults.collections:
464 raise NoDefaultCollectionError(
465 "No collections provided to findDataset, and no defaults from registry construction."
466 )
467 collections = self.defaults.collections
468 backend = queries.SqlQueryBackend(self._db, self._managers)
469 collection_wildcard = CollectionWildcard.from_expression(collections, require_ordered=True)
470 if collection_wildcard.empty():
471 return None
472 matched_collections = backend.resolve_collection_wildcard(collection_wildcard)
473 parent_dataset_type, components = backend.resolve_single_dataset_type_wildcard(
474 datasetType, components_deprecated=False
475 )
476 if len(components) > 1:
477 raise DatasetTypeError(
478 f"findDataset requires exactly one dataset type; got multiple components {components} "
479 f"for parent dataset type {parent_dataset_type.name}."
480 )
481 component = components[0]
482 dataId = DataCoordinate.standardize(
483 dataId,
484 graph=parent_dataset_type.dimensions,
485 universe=self.dimensions,
486 defaults=self.defaults.dataId,
487 **kwargs,
488 )
489 governor_constraints = {name: {cast(str, dataId[name])} for name in dataId.graph.governors.names}
490 (filtered_collections,) = backend.filter_dataset_collections(
491 [parent_dataset_type],
492 matched_collections,
493 governor_constraints=governor_constraints,
494 ).values()
495 if not filtered_collections:
496 return None
497 if timespan is None:
498 filtered_collections = [
499 collection_record
500 for collection_record in filtered_collections
501 if collection_record.type is not CollectionType.CALIBRATION
502 ]
503 if filtered_collections:
504 requested_columns = {"dataset_id", "run", "collection"}
505 with backend.context() as context:
506 predicate = context.make_data_coordinate_predicate(
507 dataId.subset(parent_dataset_type.dimensions), full=False
508 )
509 if timespan is not None:
510 requested_columns.add("timespan")
511 predicate = predicate.logical_and(
512 context.make_timespan_overlap_predicate(
513 DatasetColumnTag(parent_dataset_type.name, "timespan"), timespan
514 )
515 )
516 relation = backend.make_dataset_query_relation(
517 parent_dataset_type, filtered_collections, requested_columns, context
518 ).with_rows_satisfying(predicate)
519 rows = list(context.fetch_iterable(relation))
520 else:
521 rows = []
522 if not rows:
523 return None
524 elif len(rows) == 1:
525 best_row = rows[0]
526 else:
527 rank_by_collection_key = {record.key: n for n, record in enumerate(filtered_collections)}
528 collection_tag = DatasetColumnTag(parent_dataset_type.name, "collection")
529 row_iter = iter(rows)
530 best_row = next(row_iter)
531 best_rank = rank_by_collection_key[best_row[collection_tag]]
532 have_tie = False
533 for row in row_iter:
534 if (rank := rank_by_collection_key[row[collection_tag]]) < best_rank:
535 best_row = row
536 best_rank = rank
537 have_tie = False
538 elif rank == best_rank:
539 have_tie = True
540 assert timespan is not None, "Rank ties should be impossible given DB constraints."
541 if have_tie:
542 raise LookupError(
543 f"Ambiguous calibration lookup for {parent_dataset_type.name} in collections "
544 f"{collection_wildcard.strings} with timespan {timespan}."
545 )
546 reader = queries.DatasetRefReader(
547 parent_dataset_type,
548 translate_collection=lambda k: self._managers.collections[k].name,
549 )
550 ref = reader.read(best_row, data_id=dataId)
551 if component is not None:
552 ref = ref.makeComponentRef(component)
553 return ref
555 @transactional
556 def insertDatasets(
557 self,
558 datasetType: Union[DatasetType, str],
559 dataIds: Iterable[DataId],
560 run: Optional[str] = None,
561 expand: bool = True,
562 idGenerationMode: DatasetIdGenEnum = DatasetIdGenEnum.UNIQUE,
563 ) -> List[DatasetRef]:
564 # Docstring inherited from lsst.daf.butler.registry.Registry
565 if isinstance(datasetType, DatasetType):
566 storage = self._managers.datasets.find(datasetType.name)
567 if storage is None:
568 raise DatasetTypeError(f"DatasetType '{datasetType}' has not been registered.")
569 else:
570 storage = self._managers.datasets.find(datasetType)
571 if storage is None:
572 raise DatasetTypeError(f"DatasetType with name '{datasetType}' has not been registered.")
573 if run is None:
574 if self.defaults.run is None:
575 raise NoDefaultCollectionError(
576 "No run provided to insertDatasets, and no default from registry construction."
577 )
578 run = self.defaults.run
579 runRecord = self._managers.collections.find(run)
580 if runRecord.type is not CollectionType.RUN:
581 raise CollectionTypeError(
582 f"Given collection is of type {runRecord.type.name}; RUN collection required."
583 )
584 assert isinstance(runRecord, RunRecord)
585 progress = Progress("daf.butler.Registry.insertDatasets", level=logging.DEBUG)
586 if expand:
587 expandedDataIds = [
588 self.expandDataId(dataId, graph=storage.datasetType.dimensions)
589 for dataId in progress.wrap(dataIds, f"Expanding {storage.datasetType.name} data IDs")
590 ]
591 else:
592 expandedDataIds = [
593 DataCoordinate.standardize(dataId, graph=storage.datasetType.dimensions) for dataId in dataIds
594 ]
595 try:
596 refs = list(storage.insert(runRecord, expandedDataIds, idGenerationMode))
597 if self._managers.obscore:
598 context = queries.SqlQueryContext(self._db, self._managers.column_types)
599 self._managers.obscore.add_datasets(refs, context)
600 except sqlalchemy.exc.IntegrityError as err:
601 raise ConflictingDefinitionError(
602 "A database constraint failure was triggered by inserting "
603 f"one or more datasets of type {storage.datasetType} into "
604 f"collection '{run}'. "
605 "This probably means a dataset with the same data ID "
606 "and dataset type already exists, but it may also mean a "
607 "dimension row is missing."
608 ) from err
609 return refs
611 @transactional
612 def _importDatasets(
613 self,
614 datasets: Iterable[DatasetRef],
615 expand: bool = True,
616 idGenerationMode: DatasetIdGenEnum = DatasetIdGenEnum.UNIQUE,
617 reuseIds: bool = False,
618 ) -> List[DatasetRef]:
619 # Docstring inherited from lsst.daf.butler.registry.Registry
620 datasets = list(datasets)
621 if not datasets:
622 # nothing to do
623 return []
625 # find dataset type
626 datasetTypes = set(dataset.datasetType for dataset in datasets)
627 if len(datasetTypes) != 1:
628 raise DatasetTypeError(f"Multiple dataset types in input datasets: {datasetTypes}")
629 datasetType = datasetTypes.pop()
631 # get storage handler for this dataset type
632 storage = self._managers.datasets.find(datasetType.name)
633 if storage is None:
634 raise DatasetTypeError(f"DatasetType '{datasetType}' has not been registered.")
636 # find run name
637 runs = set(dataset.run for dataset in datasets)
638 if len(runs) != 1:
639 raise ValueError(f"Multiple run names in input datasets: {runs}")
640 run = runs.pop()
641 if run is None:
642 if self.defaults.run is None:
643 raise NoDefaultCollectionError(
644 "No run provided to ingestDatasets, and no default from registry construction."
645 )
646 run = self.defaults.run
648 runRecord = self._managers.collections.find(run)
649 if runRecord.type is not CollectionType.RUN:
650 raise CollectionTypeError(
651 f"Given collection '{runRecord.name}' is of type {runRecord.type.name};"
652 " RUN collection required."
653 )
654 assert isinstance(runRecord, RunRecord)
656 progress = Progress("daf.butler.Registry.insertDatasets", level=logging.DEBUG)
657 if expand:
658 expandedDatasets = [
659 dataset.expanded(self.expandDataId(dataset.dataId, graph=storage.datasetType.dimensions))
660 for dataset in progress.wrap(datasets, f"Expanding {storage.datasetType.name} data IDs")
661 ]
662 else:
663 expandedDatasets = [
664 DatasetRef(datasetType, dataset.dataId, id=dataset.id, run=dataset.run, conform=True)
665 for dataset in datasets
666 ]
668 try:
669 refs = list(storage.import_(runRecord, expandedDatasets, idGenerationMode, reuseIds))
670 if self._managers.obscore:
671 context = queries.SqlQueryContext(self._db, self._managers.column_types)
672 self._managers.obscore.add_datasets(refs, context)
673 except sqlalchemy.exc.IntegrityError as err:
674 raise ConflictingDefinitionError(
675 "A database constraint failure was triggered by inserting "
676 f"one or more datasets of type {storage.datasetType} into "
677 f"collection '{run}'. "
678 "This probably means a dataset with the same data ID "
679 "and dataset type already exists, but it may also mean a "
680 "dimension row is missing."
681 ) from err
682 return refs
684 def getDataset(self, id: DatasetId) -> Optional[DatasetRef]:
685 # Docstring inherited from lsst.daf.butler.registry.Registry
686 return self._managers.datasets.getDatasetRef(id)
688 @transactional
689 def removeDatasets(self, refs: Iterable[DatasetRef]) -> None:
690 # Docstring inherited from lsst.daf.butler.registry.Registry
691 progress = Progress("lsst.daf.butler.Registry.removeDatasets", level=logging.DEBUG)
692 for datasetType, refsForType in progress.iter_item_chunks(
693 DatasetRef.groupByType(refs).items(), desc="Removing datasets by type"
694 ):
695 storage = self._managers.datasets[datasetType.name]
696 try:
697 storage.delete(refsForType)
698 except sqlalchemy.exc.IntegrityError as err:
699 raise OrphanedRecordError(
700 "One or more datasets is still present in one or more Datastores."
701 ) from err
703 @transactional
704 def associate(self, collection: str, refs: Iterable[DatasetRef]) -> None:
705 # Docstring inherited from lsst.daf.butler.registry.Registry
706 progress = Progress("lsst.daf.butler.Registry.associate", level=logging.DEBUG)
707 collectionRecord = self._managers.collections.find(collection)
708 if collectionRecord.type is not CollectionType.TAGGED:
709 raise CollectionTypeError(
710 f"Collection '{collection}' has type {collectionRecord.type.name}, not TAGGED."
711 )
712 for datasetType, refsForType in progress.iter_item_chunks(
713 DatasetRef.groupByType(refs).items(), desc="Associating datasets by type"
714 ):
715 storage = self._managers.datasets[datasetType.name]
716 try:
717 storage.associate(collectionRecord, refsForType)
718 if self._managers.obscore:
719 # If a TAGGED collection is being monitored by ObsCore
720 # manager then we may need to save the dataset.
721 context = queries.SqlQueryContext(self._db, self._managers.column_types)
722 self._managers.obscore.associate(refsForType, collectionRecord, context)
723 except sqlalchemy.exc.IntegrityError as err:
724 raise ConflictingDefinitionError(
725 f"Constraint violation while associating dataset of type {datasetType.name} with "
726 f"collection {collection}. This probably means that one or more datasets with the same "
727 "dataset type and data ID already exist in the collection, but it may also indicate "
728 "that the datasets do not exist."
729 ) from err
731 @transactional
732 def disassociate(self, collection: str, refs: Iterable[DatasetRef]) -> None:
733 # Docstring inherited from lsst.daf.butler.registry.Registry
734 progress = Progress("lsst.daf.butler.Registry.disassociate", level=logging.DEBUG)
735 collectionRecord = self._managers.collections.find(collection)
736 if collectionRecord.type is not CollectionType.TAGGED:
737 raise CollectionTypeError(
738 f"Collection '{collection}' has type {collectionRecord.type.name}; expected TAGGED."
739 )
740 for datasetType, refsForType in progress.iter_item_chunks(
741 DatasetRef.groupByType(refs).items(), desc="Disassociating datasets by type"
742 ):
743 storage = self._managers.datasets[datasetType.name]
744 storage.disassociate(collectionRecord, refsForType)
745 if self._managers.obscore:
746 self._managers.obscore.disassociate(refsForType, collectionRecord)
748 @transactional
749 def certify(self, collection: str, refs: Iterable[DatasetRef], timespan: Timespan) -> None:
750 # Docstring inherited from lsst.daf.butler.registry.Registry
751 progress = Progress("lsst.daf.butler.Registry.certify", level=logging.DEBUG)
752 collectionRecord = self._managers.collections.find(collection)
753 for datasetType, refsForType in progress.iter_item_chunks(
754 DatasetRef.groupByType(refs).items(), desc="Certifying datasets by type"
755 ):
756 storage = self._managers.datasets[datasetType.name]
757 storage.certify(
758 collectionRecord,
759 refsForType,
760 timespan,
761 context=queries.SqlQueryContext(self._db, self._managers.column_types),
762 )
764 @transactional
765 def decertify(
766 self,
767 collection: str,
768 datasetType: Union[str, DatasetType],
769 timespan: Timespan,
770 *,
771 dataIds: Optional[Iterable[DataId]] = None,
772 ) -> None:
773 # Docstring inherited from lsst.daf.butler.registry.Registry
774 collectionRecord = self._managers.collections.find(collection)
775 if isinstance(datasetType, str):
776 storage = self._managers.datasets[datasetType]
777 else:
778 storage = self._managers.datasets[datasetType.name]
779 standardizedDataIds = None
780 if dataIds is not None:
781 standardizedDataIds = [
782 DataCoordinate.standardize(d, graph=storage.datasetType.dimensions) for d in dataIds
783 ]
784 storage.decertify(
785 collectionRecord,
786 timespan,
787 dataIds=standardizedDataIds,
788 context=queries.SqlQueryContext(self._db, self._managers.column_types),
789 )
791 def getDatastoreBridgeManager(self) -> DatastoreRegistryBridgeManager:
792 """Return an object that allows a new `Datastore` instance to
793 communicate with this `Registry`.
795 Returns
796 -------
797 manager : `DatastoreRegistryBridgeManager`
798 Object that mediates communication between this `Registry` and its
799 associated datastores.
800 """
801 return self._managers.datastores
803 def getDatasetLocations(self, ref: DatasetRef) -> Iterable[str]:
804 # Docstring inherited from lsst.daf.butler.registry.Registry
805 return self._managers.datastores.findDatastores(ref)
807 def expandDataId(
808 self,
809 dataId: Optional[DataId] = None,
810 *,
811 graph: Optional[DimensionGraph] = None,
812 records: Optional[NameLookupMapping[DimensionElement, Optional[DimensionRecord]]] = None,
813 withDefaults: bool = True,
814 **kwargs: Any,
815 ) -> DataCoordinate:
816 # Docstring inherited from lsst.daf.butler.registry.Registry
817 if not withDefaults:
818 defaults = None
819 else:
820 defaults = self.defaults.dataId
821 try:
822 standardized = DataCoordinate.standardize(
823 dataId, graph=graph, universe=self.dimensions, defaults=defaults, **kwargs
824 )
825 except KeyError as exc:
826 # This means either kwargs have some odd name or required
827 # dimension is missing.
828 raise DimensionNameError(str(exc)) from exc
829 if standardized.hasRecords():
830 return standardized
831 if records is None:
832 records = {}
833 elif isinstance(records, NamedKeyMapping):
834 records = records.byName()
835 else:
836 records = dict(records)
837 if isinstance(dataId, DataCoordinate) and dataId.hasRecords():
838 records.update(dataId.records.byName())
839 keys = standardized.byName()
840 context = queries.SqlQueryContext(self._db, self._managers.column_types)
841 for element in standardized.graph.primaryKeyTraversalOrder:
842 record = records.get(element.name, ...) # Use ... to mean not found; None might mean NULL
843 if record is ...:
844 if isinstance(element, Dimension) and keys.get(element.name) is None:
845 if element in standardized.graph.required:
846 raise DimensionNameError(
847 f"No value or null value for required dimension {element.name}."
848 )
849 keys[element.name] = None
850 record = None
851 else:
852 storage = self._managers.dimensions[element]
853 record = storage.fetch_one(DataCoordinate.standardize(keys, graph=element.graph), context)
854 records[element.name] = record
855 if record is not None:
856 for d in element.implied:
857 value = getattr(record, d.name)
858 if keys.setdefault(d.name, value) != value:
859 raise InconsistentDataIdError(
860 f"Data ID {standardized} has {d.name}={keys[d.name]!r}, "
861 f"but {element.name} implies {d.name}={value!r}."
862 )
863 else:
864 if element in standardized.graph.required:
865 raise DataIdValueError(
866 f"Could not fetch record for required dimension {element.name} via keys {keys}."
867 )
868 if element.alwaysJoin:
869 raise InconsistentDataIdError(
870 f"Could not fetch record for element {element.name} via keys {keys}, ",
871 "but it is marked alwaysJoin=True; this means one or more dimensions are not "
872 "related.",
873 )
874 for d in element.implied:
875 keys.setdefault(d.name, None)
876 records.setdefault(d.name, None)
877 return DataCoordinate.standardize(keys, graph=standardized.graph).expanded(records=records)
879 def insertDimensionData(
880 self,
881 element: Union[DimensionElement, str],
882 *data: Union[Mapping[str, Any], DimensionRecord],
883 conform: bool = True,
884 replace: bool = False,
885 skip_existing: bool = False,
886 ) -> None:
887 # Docstring inherited from lsst.daf.butler.registry.Registry
888 if conform:
889 if isinstance(element, str):
890 element = self.dimensions[element]
891 records = [
892 row if isinstance(row, DimensionRecord) else element.RecordClass(**row) for row in data
893 ]
894 else:
895 # Ignore typing since caller said to trust them with conform=False.
896 records = data # type: ignore
897 storage = self._managers.dimensions[element]
898 storage.insert(*records, replace=replace, skip_existing=skip_existing)
900 def syncDimensionData(
901 self,
902 element: Union[DimensionElement, str],
903 row: Union[Mapping[str, Any], DimensionRecord],
904 conform: bool = True,
905 update: bool = False,
906 ) -> Union[bool, Dict[str, Any]]:
907 # Docstring inherited from lsst.daf.butler.registry.Registry
908 if conform:
909 if isinstance(element, str):
910 element = self.dimensions[element]
911 record = row if isinstance(row, DimensionRecord) else element.RecordClass(**row)
912 else:
913 # Ignore typing since caller said to trust them with conform=False.
914 record = row # type: ignore
915 storage = self._managers.dimensions[element]
916 return storage.sync(record, update=update)
918 def queryDatasetTypes(
919 self,
920 expression: Any = ...,
921 *,
922 components: Optional[bool] = None,
923 missing: Optional[List[str]] = None,
924 ) -> Iterable[DatasetType]:
925 # Docstring inherited from lsst.daf.butler.registry.Registry
926 wildcard = DatasetTypeWildcard.from_expression(expression)
927 composition_dict = self._managers.datasets.resolve_wildcard(
928 wildcard,
929 components=components,
930 missing=missing,
931 )
932 result: list[DatasetType] = []
933 for parent_dataset_type, components_for_parent in composition_dict.items():
934 result.extend(
935 parent_dataset_type.makeComponentDatasetType(c) if c is not None else parent_dataset_type
936 for c in components_for_parent
937 )
938 return result
940 def queryCollections(
941 self,
942 expression: Any = ...,
943 datasetType: Optional[DatasetType] = None,
944 collectionTypes: Union[Iterable[CollectionType], CollectionType] = CollectionType.all(),
945 flattenChains: bool = False,
946 includeChains: Optional[bool] = None,
947 ) -> Sequence[str]:
948 # Docstring inherited from lsst.daf.butler.registry.Registry
950 # Right now the datasetTypes argument is completely ignored, but that
951 # is consistent with its [lack of] guarantees. DM-24939 or a follow-up
952 # ticket will take care of that.
953 try:
954 wildcard = CollectionWildcard.from_expression(expression)
955 except TypeError as exc:
956 raise CollectionExpressionError(f"Invalid collection expression '{expression}'") from exc
957 collectionTypes = ensure_iterable(collectionTypes)
958 return [
959 record.name
960 for record in self._managers.collections.resolve_wildcard(
961 wildcard,
962 collection_types=frozenset(collectionTypes),
963 flatten_chains=flattenChains,
964 include_chains=includeChains,
965 )
966 ]
968 def _makeQueryBuilder(
969 self,
970 summary: queries.QuerySummary,
971 doomed_by: Iterable[str] = (),
972 ) -> queries.QueryBuilder:
973 """Return a `QueryBuilder` instance capable of constructing and
974 managing more complex queries than those obtainable via `Registry`
975 interfaces.
977 This is an advanced interface; downstream code should prefer
978 `Registry.queryDataIds` and `Registry.queryDatasets` whenever those
979 are sufficient.
981 Parameters
982 ----------
983 summary : `queries.QuerySummary`
984 Object describing and categorizing the full set of dimensions that
985 will be included in the query.
986 doomed_by : `Iterable` of `str`, optional
987 A list of diagnostic messages that indicate why the query is going
988 to yield no results and should not even be executed. If an empty
989 container (default) the query will be executed unless other code
990 determines that it is doomed.
992 Returns
993 -------
994 builder : `queries.QueryBuilder`
995 Object that can be used to construct and perform advanced queries.
996 """
997 doomed_by = list(doomed_by)
998 backend = queries.SqlQueryBackend(self._db, self._managers)
999 context = backend.context()
1000 relation: Relation | None = None
1001 if doomed_by:
1002 relation = LeafRelation.make_doomed(context.sql_engine, set(), doomed_by)
1003 return queries.QueryBuilder(
1004 summary,
1005 backend=backend,
1006 context=context,
1007 relation=relation,
1008 )
1010 def _standardize_query_data_id_args(
1011 self, data_id: DataId | None, *, doomed_by: list[str], **kwargs: Any
1012 ) -> DataCoordinate:
1013 """Preprocess the data ID arguments passed to query* methods.
1015 Parameters
1016 ----------
1017 data_id : `DataId` or `None`
1018 Data ID that constrains the query results.
1019 doomed_by : `list` [ `str` ]
1020 List to append messages indicating why the query is doomed to
1021 yield no results.
1022 **kwargs
1023 Additional data ID key-value pairs, extending and overriding
1024 ``data_id``.
1026 Returns
1027 -------
1028 data_id : `DataCoordinate`
1029 Standardized data ID. Will be fully expanded unless expansion
1030 fails, in which case a message will be appended to ``doomed_by``
1031 on return.
1032 """
1033 try:
1034 return self.expandDataId(data_id, **kwargs)
1035 except DataIdValueError as err:
1036 doomed_by.append(str(err))
1037 return DataCoordinate.standardize(
1038 data_id, **kwargs, universe=self.dimensions, defaults=self.defaults.dataId
1039 )
1041 def _standardize_query_dataset_args(
1042 self,
1043 datasets: Any,
1044 collections: CollectionArgType | None,
1045 components: bool | None,
1046 mode: Literal["find_first"] | Literal["find_all"] | Literal["constrain"] = "constrain",
1047 *,
1048 doomed_by: list[str],
1049 ) -> tuple[dict[DatasetType, list[str | None]], CollectionWildcard | None]:
1050 """Preprocess dataset arguments passed to query* methods.
1052 Parameters
1053 ----------
1054 datasets : `DatasetType`, `str`, `re.Pattern`, or iterable of these
1055 Expression identifying dataset types. See `queryDatasetTypes` for
1056 details.
1057 collections : `str`, `re.Pattern`, or iterable of these
1058 Expression identifying collections to be searched. See
1059 `queryCollections` for details.
1060 components : `bool`, optional
1061 If `True`, apply all expression patterns to component dataset type
1062 names as well. If `False`, never apply patterns to components.
1063 If `None` (default), apply patterns to components only if their
1064 parent datasets were not matched by the expression.
1065 Fully-specified component datasets (`str` or `DatasetType`
1066 instances) are always included.
1068 Values other than `False` are deprecated, and only `False` will be
1069 supported after v26. After v27 this argument will be removed
1070 entirely.
1071 mode : `str`, optional
1072 The way in which datasets are being used in this query; one of:
1074 - "find_first": this is a query for the first dataset in an
1075 ordered list of collections. Prohibits collection wildcards,
1076 but permits dataset type wildcards.
1078 - "find_all": this is a query for all datasets in all matched
1079 collections. Permits collection and dataset type wildcards.
1081 - "constrain": this is a query for something other than datasets,
1082 with results constrained by dataset existence. Permits
1083 collection wildcards and prohibits ``...`` as a dataset type
1084 wildcard.
1085 doomed_by : `list` [ `str` ]
1086 List to append messages indicating why the query is doomed to
1087 yield no results.
1089 Returns
1090 -------
1091 composition : `defaultdict` [ `DatasetType`, `list` [ `str` ] ]
1092 Dictionary mapping parent dataset type to `list` of components
1093 matched for that dataset type (or `None` for the parent itself).
1094 collections : `CollectionWildcard`
1095 Processed collection expression.
1096 """
1097 composition: dict[DatasetType, list[str | None]] = {}
1098 collection_wildcard: CollectionWildcard | None = None
1099 if datasets is not None:
1100 if collections is None:
1101 if not self.defaults.collections:
1102 raise NoDefaultCollectionError("No collections, and no registry default collections.")
1103 collection_wildcard = CollectionWildcard.from_expression(self.defaults.collections)
1104 else:
1105 collection_wildcard = CollectionWildcard.from_expression(collections)
1106 if mode == "find_first" and collection_wildcard.patterns:
1107 raise TypeError(
1108 f"Collection pattern(s) {collection_wildcard.patterns} not allowed in this context."
1109 )
1110 missing: list[str] = []
1111 composition = self._managers.datasets.resolve_wildcard(
1112 datasets, components=components, missing=missing, explicit_only=(mode == "constrain")
1113 )
1114 if missing and mode == "constrain":
1115 # After v26 this should raise MissingDatasetTypeError, to be
1116 # implemented on DM-36303.
1117 warnings.warn(
1118 f"Dataset type(s) {missing} are not registered; this will be an error after v26.",
1119 FutureWarning,
1120 )
1121 doomed_by.extend(f"Dataset type {name} is not registered." for name in missing)
1122 elif collections:
1123 # I think this check should actually be `collections is not None`,
1124 # but it looks like some CLI scripts use empty tuple as default.
1125 raise ArgumentError(f"Cannot pass 'collections' (='{collections}') without 'datasets'.")
1126 return composition, collection_wildcard
1128 def queryDatasets(
1129 self,
1130 datasetType: Any,
1131 *,
1132 collections: CollectionArgType | None = None,
1133 dimensions: Optional[Iterable[Union[Dimension, str]]] = None,
1134 dataId: Optional[DataId] = None,
1135 where: str = "",
1136 findFirst: bool = False,
1137 components: Optional[bool] = None,
1138 bind: Optional[Mapping[str, Any]] = None,
1139 check: bool = True,
1140 **kwargs: Any,
1141 ) -> queries.DatasetQueryResults:
1142 # Docstring inherited from lsst.daf.butler.registry.Registry
1143 doomed_by: list[str] = []
1144 data_id = self._standardize_query_data_id_args(dataId, doomed_by=doomed_by, **kwargs)
1145 dataset_composition, collection_wildcard = self._standardize_query_dataset_args(
1146 datasetType,
1147 collections,
1148 components,
1149 mode="find_first" if findFirst else "find_all",
1150 doomed_by=doomed_by,
1151 )
1152 if collection_wildcard is not None and collection_wildcard.empty():
1153 doomed_by.append("No datasets can be found because collection list is empty.")
1154 return queries.ChainedDatasetQueryResults([], doomed_by=doomed_by)
1155 parent_results: list[queries.ParentDatasetQueryResults] = []
1156 for parent_dataset_type, components_for_parent in dataset_composition.items():
1157 # The full set of dimensions in the query is the combination of
1158 # those needed for the DatasetType and those explicitly requested,
1159 # if any.
1160 dimension_names = set(parent_dataset_type.dimensions.names)
1161 if dimensions is not None:
1162 dimension_names.update(self.dimensions.extract(dimensions).names)
1163 # Construct the summary structure needed to construct a
1164 # QueryBuilder.
1165 summary = queries.QuerySummary(
1166 requested=DimensionGraph(self.dimensions, names=dimension_names),
1167 column_types=self._managers.column_types,
1168 data_id=data_id,
1169 expression=where,
1170 bind=bind,
1171 defaults=self.defaults.dataId,
1172 check=check,
1173 datasets=[parent_dataset_type],
1174 )
1175 builder = self._makeQueryBuilder(summary)
1176 # Add the dataset subquery to the query, telling the QueryBuilder
1177 # to include the rank of the selected collection in the results
1178 # only if we need to findFirst. Note that if any of the
1179 # collections are actually wildcard expressions, and
1180 # findFirst=True, this will raise TypeError for us.
1181 builder.joinDataset(parent_dataset_type, collection_wildcard, isResult=True, findFirst=findFirst)
1182 query = builder.finish()
1183 parent_results.append(
1184 queries.ParentDatasetQueryResults(
1185 query, parent_dataset_type, components=components_for_parent
1186 )
1187 )
1188 if not parent_results:
1189 doomed_by.extend(
1190 f"No registered dataset type matching {t!r} found, so no matching datasets can "
1191 "exist in any collection."
1192 for t in ensure_iterable(datasetType)
1193 )
1194 return queries.ChainedDatasetQueryResults([], doomed_by=doomed_by)
1195 elif len(parent_results) == 1:
1196 return parent_results[0]
1197 else:
1198 return queries.ChainedDatasetQueryResults(parent_results)
1200 def queryDataIds(
1201 self,
1202 dimensions: Union[Iterable[Union[Dimension, str]], Dimension, str],
1203 *,
1204 dataId: Optional[DataId] = None,
1205 datasets: Any = None,
1206 collections: CollectionArgType | None = None,
1207 where: str = "",
1208 components: Optional[bool] = None,
1209 bind: Optional[Mapping[str, Any]] = None,
1210 check: bool = True,
1211 **kwargs: Any,
1212 ) -> queries.DataCoordinateQueryResults:
1213 # Docstring inherited from lsst.daf.butler.registry.Registry
1214 dimensions = ensure_iterable(dimensions)
1215 requestedDimensions = self.dimensions.extract(dimensions)
1216 doomed_by: list[str] = []
1217 data_id = self._standardize_query_data_id_args(dataId, doomed_by=doomed_by, **kwargs)
1218 dataset_composition, collection_wildcard = self._standardize_query_dataset_args(
1219 datasets, collections, components, doomed_by=doomed_by
1220 )
1221 if collection_wildcard is not None and collection_wildcard.empty():
1222 doomed_by.append("No data coordinates can be found because collection list is empty.")
1223 summary = queries.QuerySummary(
1224 requested=requestedDimensions,
1225 column_types=self._managers.column_types,
1226 data_id=data_id,
1227 expression=where,
1228 bind=bind,
1229 defaults=self.defaults.dataId,
1230 check=check,
1231 datasets=dataset_composition.keys(),
1232 )
1233 builder = self._makeQueryBuilder(summary, doomed_by=doomed_by)
1234 for datasetType in dataset_composition.keys():
1235 builder.joinDataset(datasetType, collection_wildcard, isResult=False)
1236 query = builder.finish()
1238 return queries.DataCoordinateQueryResults(query)
1240 def queryDimensionRecords(
1241 self,
1242 element: Union[DimensionElement, str],
1243 *,
1244 dataId: Optional[DataId] = None,
1245 datasets: Any = None,
1246 collections: CollectionArgType | None = None,
1247 where: str = "",
1248 components: Optional[bool] = None,
1249 bind: Optional[Mapping[str, Any]] = None,
1250 check: bool = True,
1251 **kwargs: Any,
1252 ) -> queries.DimensionRecordQueryResults:
1253 # Docstring inherited from lsst.daf.butler.registry.Registry
1254 if not isinstance(element, DimensionElement):
1255 try:
1256 element = self.dimensions[element]
1257 except KeyError as e:
1258 raise DimensionNameError(
1259 f"No such dimension '{element}', available dimensions: "
1260 + str(self.dimensions.getStaticElements())
1261 ) from e
1262 doomed_by: list[str] = []
1263 data_id = self._standardize_query_data_id_args(dataId, doomed_by=doomed_by, **kwargs)
1264 dataset_composition, collection_wildcard = self._standardize_query_dataset_args(
1265 datasets, collections, components, doomed_by=doomed_by
1266 )
1267 if collection_wildcard is not None and collection_wildcard.empty():
1268 doomed_by.append("No dimension records can be found because collection list is empty.")
1269 summary = queries.QuerySummary(
1270 requested=element.graph,
1271 column_types=self._managers.column_types,
1272 data_id=data_id,
1273 expression=where,
1274 bind=bind,
1275 defaults=self.defaults.dataId,
1276 check=check,
1277 datasets=dataset_composition.keys(),
1278 )
1279 builder = self._makeQueryBuilder(summary, doomed_by=doomed_by)
1280 for datasetType in dataset_composition.keys():
1281 builder.joinDataset(datasetType, collection_wildcard, isResult=False)
1282 query = builder.finish().with_record_columns(element)
1283 return queries.DatabaseDimensionRecordQueryResults(query, element)
1285 def queryDatasetAssociations(
1286 self,
1287 datasetType: Union[str, DatasetType],
1288 collections: CollectionArgType | None = Ellipsis,
1289 *,
1290 collectionTypes: Iterable[CollectionType] = CollectionType.all(),
1291 flattenChains: bool = False,
1292 ) -> Iterator[DatasetAssociation]:
1293 # Docstring inherited from lsst.daf.butler.registry.Registry
1294 if collections is None:
1295 if not self.defaults.collections:
1296 raise NoDefaultCollectionError(
1297 "No collections provided to queryDatasetAssociations, "
1298 "and no defaults from registry construction."
1299 )
1300 collections = self.defaults.collections
1301 collection_wildcard = CollectionWildcard.from_expression(collections)
1302 backend = queries.SqlQueryBackend(self._db, self._managers)
1303 parent_dataset_type, _ = backend.resolve_single_dataset_type_wildcard(datasetType, components=False)
1304 timespan_tag = DatasetColumnTag(parent_dataset_type.name, "timespan")
1305 collection_tag = DatasetColumnTag(parent_dataset_type.name, "collection")
1306 for parent_collection_record in backend.resolve_collection_wildcard(
1307 collection_wildcard,
1308 collection_types=frozenset(collectionTypes),
1309 flatten_chains=flattenChains,
1310 ):
1311 # Resolve this possibly-chained collection into a list of
1312 # non-CHAINED collections that actually hold datasets of this
1313 # type.
1314 candidate_collection_records = backend.resolve_dataset_collections(
1315 parent_dataset_type,
1316 CollectionWildcard.from_names([parent_collection_record.name]),
1317 allow_calibration_collections=True,
1318 governor_constraints={},
1319 )
1320 if not candidate_collection_records:
1321 continue
1322 with backend.context() as context:
1323 relation = backend.make_dataset_query_relation(
1324 parent_dataset_type,
1325 candidate_collection_records,
1326 columns={"dataset_id", "run", "timespan", "collection"},
1327 context=context,
1328 )
1329 reader = queries.DatasetRefReader(
1330 parent_dataset_type,
1331 translate_collection=lambda k: self._managers.collections[k].name,
1332 full=False,
1333 )
1334 for row in context.fetch_iterable(relation):
1335 ref = reader.read(row)
1336 collection_record = self._managers.collections[row[collection_tag]]
1337 if collection_record.type is CollectionType.CALIBRATION:
1338 timespan = row[timespan_tag]
1339 else:
1340 # For backwards compatibility and (possibly?) user
1341 # convenience we continue to define the timespan of a
1342 # DatasetAssociation row for a non-CALIBRATION
1343 # collection to be None rather than a fully unbounded
1344 # timespan.
1345 timespan = None
1346 yield DatasetAssociation(ref=ref, collection=collection_record.name, timespan=timespan)
1348 @property
1349 def obsCoreTableManager(self) -> ObsCoreTableManager | None:
1350 # Docstring inherited from lsst.daf.butler.registry.Registry
1351 return self._managers.obscore
1353 storageClasses: StorageClassFactory
1354 """All storage classes known to the registry (`StorageClassFactory`).
1355 """