Coverage for python/lsst/daf/butler/registries/sql.py: 16%
515 statements
« prev ^ index » next coverage.py v7.3.1, created at 2023-10-02 08:00 +0000
« prev ^ index » next coverage.py v7.3.1, created at 2023-10-02 08:00 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28from __future__ import annotations
30__all__ = ("SqlRegistry",)
32import contextlib
33import logging
34import warnings
35from collections.abc import Iterable, Iterator, Mapping, Sequence
36from typing import TYPE_CHECKING, Any, Literal, cast
38import sqlalchemy
39from lsst.daf.relation import LeafRelation, Relation
40from lsst.resources import ResourcePathExpression
41from lsst.utils.introspection import find_outside_stacklevel
42from lsst.utils.iteration import ensure_iterable
44from ..core import (
45 Config,
46 DataCoordinate,
47 DataId,
48 DatasetAssociation,
49 DatasetColumnTag,
50 DatasetId,
51 DatasetIdGenEnum,
52 DatasetRef,
53 DatasetType,
54 Dimension,
55 DimensionConfig,
56 DimensionElement,
57 DimensionGraph,
58 DimensionRecord,
59 DimensionUniverse,
60 NamedKeyMapping,
61 NameLookupMapping,
62 Progress,
63 StorageClassFactory,
64 Timespan,
65 ddl,
66)
67from ..core.utils import transactional
68from ..registry import (
69 ArgumentError,
70 CollectionExpressionError,
71 CollectionSummary,
72 CollectionType,
73 CollectionTypeError,
74 ConflictingDefinitionError,
75 DataIdValueError,
76 DatasetTypeError,
77 DimensionNameError,
78 InconsistentDataIdError,
79 NoDefaultCollectionError,
80 OrphanedRecordError,
81 RegistryConfig,
82 RegistryConsistencyError,
83 RegistryDefaults,
84 _ButlerRegistry,
85 queries,
86)
87from ..registry.interfaces import ChainedCollectionRecord, RunRecord
88from ..registry.managers import RegistryManagerInstances, RegistryManagerTypes
89from ..registry.wildcards import CollectionWildcard, DatasetTypeWildcard
91if TYPE_CHECKING:
92 from .._butlerConfig import ButlerConfig
93 from ..registry._registry import CollectionArgType
94 from ..registry.interfaces import (
95 CollectionRecord,
96 Database,
97 DatastoreRegistryBridgeManager,
98 ObsCoreTableManager,
99 )
102_LOG = logging.getLogger(__name__)
105class SqlRegistry(_ButlerRegistry):
106 """Registry implementation based on SQLAlchemy.
108 Parameters
109 ----------
110 database : `Database`
111 Database instance to store Registry.
112 defaults : `RegistryDefaults`
113 Default collection search path and/or output `~CollectionType.RUN`
114 collection.
115 managers : `RegistryManagerInstances`
116 All the managers required for this registry.
117 """
119 defaultConfigFile: str | None = None
120 """Path to configuration defaults. Accessed within the ``configs`` resource
121 or relative to a search path. Can be None if no defaults specified.
122 """
124 @classmethod
125 def createFromConfig(
126 cls,
127 config: RegistryConfig | str | None = None,
128 dimensionConfig: DimensionConfig | str | None = None,
129 butlerRoot: ResourcePathExpression | None = None,
130 ) -> _ButlerRegistry:
131 """Create registry database and return `SqlRegistry` instance.
133 This method initializes database contents, database must be empty
134 prior to calling this method.
136 Parameters
137 ----------
138 config : `RegistryConfig` or `str`, optional
139 Registry configuration, if missing then default configuration will
140 be loaded from registry.yaml.
141 dimensionConfig : `DimensionConfig` or `str`, optional
142 Dimensions configuration, if missing then default configuration
143 will be loaded from dimensions.yaml.
144 butlerRoot : convertible to `lsst.resources.ResourcePath`, optional
145 Path to the repository root this `SqlRegistry` will manage.
147 Returns
148 -------
149 registry : `SqlRegistry`
150 A new `SqlRegistry` instance.
151 """
152 config = cls.forceRegistryConfig(config)
153 config.replaceRoot(butlerRoot)
155 if isinstance(dimensionConfig, str):
156 dimensionConfig = DimensionConfig(dimensionConfig)
157 elif dimensionConfig is None:
158 dimensionConfig = DimensionConfig()
159 elif not isinstance(dimensionConfig, DimensionConfig):
160 raise TypeError(f"Incompatible Dimension configuration type: {type(dimensionConfig)}")
162 DatabaseClass = config.getDatabaseClass()
163 database = DatabaseClass.fromUri(
164 config.connectionString, origin=config.get("origin", 0), namespace=config.get("namespace")
165 )
166 managerTypes = RegistryManagerTypes.fromConfig(config)
167 managers = managerTypes.makeRepo(database, dimensionConfig)
168 return cls(database, RegistryDefaults(), managers)
170 @classmethod
171 def fromConfig(
172 cls,
173 config: ButlerConfig | RegistryConfig | Config | str,
174 butlerRoot: ResourcePathExpression | None = None,
175 writeable: bool = True,
176 defaults: RegistryDefaults | None = None,
177 ) -> _ButlerRegistry:
178 """Create `Registry` subclass instance from `config`.
180 Registry database must be initialized prior to calling this method.
182 Parameters
183 ----------
184 config : `ButlerConfig`, `RegistryConfig`, `Config` or `str`
185 Registry configuration
186 butlerRoot : `lsst.resources.ResourcePathExpression`, optional
187 Path to the repository root this `Registry` will manage.
188 writeable : `bool`, optional
189 If `True` (default) create a read-write connection to the database.
190 defaults : `RegistryDefaults`, optional
191 Default collection search path and/or output `~CollectionType.RUN`
192 collection.
194 Returns
195 -------
196 registry : `SqlRegistry` (subclass)
197 A new `SqlRegistry` subclass instance.
198 """
199 config = cls.forceRegistryConfig(config)
200 config.replaceRoot(butlerRoot)
201 DatabaseClass = config.getDatabaseClass()
202 database = DatabaseClass.fromUri(
203 config.connectionString,
204 origin=config.get("origin", 0),
205 namespace=config.get("namespace"),
206 writeable=writeable,
207 )
208 managerTypes = RegistryManagerTypes.fromConfig(config)
209 with database.session():
210 managers = managerTypes.loadRepo(database)
211 if defaults is None:
212 defaults = RegistryDefaults()
213 return cls(database, defaults, managers)
215 def __init__(self, database: Database, defaults: RegistryDefaults, managers: RegistryManagerInstances):
216 self._db = database
217 self._managers = managers
218 self.storageClasses = StorageClassFactory()
219 # Intentionally invoke property setter to initialize defaults. This
220 # can only be done after most of the rest of Registry has already been
221 # initialized, and must be done before the property getter is used.
222 self.defaults = defaults
224 def __str__(self) -> str:
225 return str(self._db)
227 def __repr__(self) -> str:
228 return f"SqlRegistry({self._db!r}, {self.dimensions!r})"
230 def isWriteable(self) -> bool:
231 # Docstring inherited from lsst.daf.butler.registry.Registry
232 return self._db.isWriteable()
234 def copy(self, defaults: RegistryDefaults | None = None) -> _ButlerRegistry:
235 # Docstring inherited from lsst.daf.butler.registry.Registry
236 if defaults is None:
237 # No need to copy, because `RegistryDefaults` is immutable; we
238 # effectively copy on write.
239 defaults = self.defaults
240 return type(self)(self._db, defaults, self._managers)
242 @property
243 def dimensions(self) -> DimensionUniverse:
244 # Docstring inherited from lsst.daf.butler.registry.Registry
245 return self._managers.dimensions.universe
247 def refresh(self) -> None:
248 # Docstring inherited from lsst.daf.butler.registry.Registry
249 with self._db.transaction():
250 self._managers.refresh()
252 @contextlib.contextmanager
253 def transaction(self, *, savepoint: bool = False) -> Iterator[None]:
254 # Docstring inherited from lsst.daf.butler.registry.Registry
255 try:
256 with self._db.transaction(savepoint=savepoint):
257 yield
258 except BaseException:
259 # TODO: this clears the caches sometimes when we wouldn't actually
260 # need to. Can we avoid that?
261 self._managers.dimensions.clearCaches()
262 raise
264 def resetConnectionPool(self) -> None:
265 """Reset SQLAlchemy connection pool for `SqlRegistry` database.
267 This operation is useful when using registry with fork-based
268 multiprocessing. To use registry across fork boundary one has to make
269 sure that there are no currently active connections (no session or
270 transaction is in progress) and connection pool is reset using this
271 method. This method should be called by the child process immediately
272 after the fork.
273 """
274 self._db._engine.dispose()
276 def registerOpaqueTable(self, tableName: str, spec: ddl.TableSpec) -> None:
277 """Add an opaque (to the `Registry`) table for use by a `Datastore` or
278 other data repository client.
280 Opaque table records can be added via `insertOpaqueData`, retrieved via
281 `fetchOpaqueData`, and removed via `deleteOpaqueData`.
283 Parameters
284 ----------
285 tableName : `str`
286 Logical name of the opaque table. This may differ from the
287 actual name used in the database by a prefix and/or suffix.
288 spec : `ddl.TableSpec`
289 Specification for the table to be added.
290 """
291 self._managers.opaque.register(tableName, spec)
293 @transactional
294 def insertOpaqueData(self, tableName: str, *data: dict) -> None:
295 """Insert records into an opaque table.
297 Parameters
298 ----------
299 tableName : `str`
300 Logical name of the opaque table. Must match the name used in a
301 previous call to `registerOpaqueTable`.
302 data
303 Each additional positional argument is a dictionary that represents
304 a single row to be added.
305 """
306 self._managers.opaque[tableName].insert(*data)
308 def fetchOpaqueData(self, tableName: str, **where: Any) -> Iterator[Mapping[str, Any]]:
309 """Retrieve records from an opaque table.
311 Parameters
312 ----------
313 tableName : `str`
314 Logical name of the opaque table. Must match the name used in a
315 previous call to `registerOpaqueTable`.
316 where
317 Additional keyword arguments are interpreted as equality
318 constraints that restrict the returned rows (combined with AND);
319 keyword arguments are column names and values are the values they
320 must have.
322 Yields
323 ------
324 row : `dict`
325 A dictionary representing a single result row.
326 """
327 yield from self._managers.opaque[tableName].fetch(**where)
329 @transactional
330 def deleteOpaqueData(self, tableName: str, **where: Any) -> None:
331 """Remove records from an opaque table.
333 Parameters
334 ----------
335 tableName : `str`
336 Logical name of the opaque table. Must match the name used in a
337 previous call to `registerOpaqueTable`.
338 where
339 Additional keyword arguments are interpreted as equality
340 constraints that restrict the deleted rows (combined with AND);
341 keyword arguments are column names and values are the values they
342 must have.
343 """
344 self._managers.opaque[tableName].delete(where.keys(), where)
346 def registerCollection(
347 self, name: str, type: CollectionType = CollectionType.TAGGED, doc: str | None = None
348 ) -> bool:
349 # Docstring inherited from lsst.daf.butler.registry.Registry
350 _, registered = self._managers.collections.register(name, type, doc=doc)
351 return registered
353 def getCollectionType(self, name: str) -> CollectionType:
354 # Docstring inherited from lsst.daf.butler.registry.Registry
355 return self._managers.collections.find(name).type
357 def _get_collection_record(self, name: str) -> CollectionRecord:
358 # Docstring inherited from lsst.daf.butler.registry.Registry
359 return self._managers.collections.find(name)
361 def registerRun(self, name: str, doc: str | None = None) -> bool:
362 # Docstring inherited from lsst.daf.butler.registry.Registry
363 _, registered = self._managers.collections.register(name, CollectionType.RUN, doc=doc)
364 return registered
366 @transactional
367 def removeCollection(self, name: str) -> None:
368 # Docstring inherited from lsst.daf.butler.registry.Registry
369 self._managers.collections.remove(name)
371 def getCollectionChain(self, parent: str) -> tuple[str, ...]:
372 # Docstring inherited from lsst.daf.butler.registry.Registry
373 record = self._managers.collections.find(parent)
374 if record.type is not CollectionType.CHAINED:
375 raise CollectionTypeError(f"Collection '{parent}' has type {record.type.name}, not CHAINED.")
376 assert isinstance(record, ChainedCollectionRecord)
377 return record.children
379 @transactional
380 def setCollectionChain(self, parent: str, children: Any, *, flatten: bool = False) -> None:
381 # Docstring inherited from lsst.daf.butler.registry.Registry
382 record = self._managers.collections.find(parent)
383 if record.type is not CollectionType.CHAINED:
384 raise CollectionTypeError(f"Collection '{parent}' has type {record.type.name}, not CHAINED.")
385 assert isinstance(record, ChainedCollectionRecord)
386 children = CollectionWildcard.from_expression(children).require_ordered()
387 if children != record.children or flatten:
388 record.update(self._managers.collections, children, flatten=flatten)
390 def getCollectionParentChains(self, collection: str) -> set[str]:
391 # Docstring inherited from lsst.daf.butler.registry.Registry
392 return {
393 record.name
394 for record in self._managers.collections.getParentChains(
395 self._managers.collections.find(collection).key
396 )
397 }
399 def getCollectionDocumentation(self, collection: str) -> str | None:
400 # Docstring inherited from lsst.daf.butler.registry.Registry
401 return self._managers.collections.getDocumentation(self._managers.collections.find(collection).key)
403 def setCollectionDocumentation(self, collection: str, doc: str | None) -> None:
404 # Docstring inherited from lsst.daf.butler.registry.Registry
405 self._managers.collections.setDocumentation(self._managers.collections.find(collection).key, doc)
407 def getCollectionSummary(self, collection: str) -> CollectionSummary:
408 # Docstring inherited from lsst.daf.butler.registry.Registry
409 record = self._managers.collections.find(collection)
410 return self._managers.datasets.getCollectionSummary(record)
412 def registerDatasetType(self, datasetType: DatasetType) -> bool:
413 # Docstring inherited from lsst.daf.butler.registry.Registry
414 _, inserted = self._managers.datasets.register(datasetType)
415 return inserted
417 def removeDatasetType(self, name: str | tuple[str, ...]) -> None:
418 # Docstring inherited from lsst.daf.butler.registry.Registry
420 for datasetTypeExpression in ensure_iterable(name):
421 # Catch any warnings from the caller specifying a component
422 # dataset type. This will result in an error later but the
423 # warning could be confusing when the caller is not querying
424 # anything.
425 with warnings.catch_warnings():
426 warnings.simplefilter("ignore", category=FutureWarning)
427 datasetTypes = list(self.queryDatasetTypes(datasetTypeExpression))
428 if not datasetTypes:
429 _LOG.info("Dataset type %r not defined", datasetTypeExpression)
430 else:
431 for datasetType in datasetTypes:
432 self._managers.datasets.remove(datasetType.name)
433 _LOG.info("Removed dataset type %r", datasetType.name)
435 def getDatasetType(self, name: str) -> DatasetType:
436 # Docstring inherited from lsst.daf.butler.registry.Registry
437 parent_name, component = DatasetType.splitDatasetTypeName(name)
438 storage = self._managers.datasets[parent_name]
439 if component is None:
440 return storage.datasetType
441 else:
442 return storage.datasetType.makeComponentDatasetType(component)
444 def supportsIdGenerationMode(self, mode: DatasetIdGenEnum) -> bool:
445 # Docstring inherited from lsst.daf.butler.registry.Registry
446 return self._managers.datasets.supportsIdGenerationMode(mode)
448 def findDataset(
449 self,
450 datasetType: DatasetType | str,
451 dataId: DataId | None = None,
452 *,
453 collections: CollectionArgType | None = None,
454 timespan: Timespan | None = None,
455 **kwargs: Any,
456 ) -> DatasetRef | None:
457 # Docstring inherited from lsst.daf.butler.registry.Registry
458 if collections is None:
459 if not self.defaults.collections:
460 raise NoDefaultCollectionError(
461 "No collections provided to findDataset, and no defaults from registry construction."
462 )
463 collections = self.defaults.collections
464 backend = queries.SqlQueryBackend(self._db, self._managers)
465 collection_wildcard = CollectionWildcard.from_expression(collections, require_ordered=True)
466 if collection_wildcard.empty():
467 return None
468 matched_collections = backend.resolve_collection_wildcard(collection_wildcard)
469 parent_dataset_type, components = backend.resolve_single_dataset_type_wildcard(
470 datasetType, components_deprecated=False
471 )
472 if len(components) > 1:
473 raise DatasetTypeError(
474 f"findDataset requires exactly one dataset type; got multiple components {components} "
475 f"for parent dataset type {parent_dataset_type.name}."
476 )
477 component = components[0]
478 dataId = DataCoordinate.standardize(
479 dataId,
480 graph=parent_dataset_type.dimensions,
481 universe=self.dimensions,
482 defaults=self.defaults.dataId,
483 **kwargs,
484 )
485 governor_constraints = {name: {cast(str, dataId[name])} for name in dataId.graph.governors.names}
486 (filtered_collections,) = backend.filter_dataset_collections(
487 [parent_dataset_type],
488 matched_collections,
489 governor_constraints=governor_constraints,
490 ).values()
491 if not filtered_collections:
492 return None
493 if timespan is None:
494 filtered_collections = [
495 collection_record
496 for collection_record in filtered_collections
497 if collection_record.type is not CollectionType.CALIBRATION
498 ]
499 if filtered_collections:
500 requested_columns = {"dataset_id", "run", "collection"}
501 with backend.context() as context:
502 predicate = context.make_data_coordinate_predicate(
503 dataId.subset(parent_dataset_type.dimensions), full=False
504 )
505 if timespan is not None:
506 requested_columns.add("timespan")
507 predicate = predicate.logical_and(
508 context.make_timespan_overlap_predicate(
509 DatasetColumnTag(parent_dataset_type.name, "timespan"), timespan
510 )
511 )
512 relation = backend.make_dataset_query_relation(
513 parent_dataset_type, filtered_collections, requested_columns, context
514 ).with_rows_satisfying(predicate)
515 rows = list(context.fetch_iterable(relation))
516 else:
517 rows = []
518 if not rows:
519 return None
520 elif len(rows) == 1:
521 best_row = rows[0]
522 else:
523 rank_by_collection_key = {record.key: n for n, record in enumerate(filtered_collections)}
524 collection_tag = DatasetColumnTag(parent_dataset_type.name, "collection")
525 row_iter = iter(rows)
526 best_row = next(row_iter)
527 best_rank = rank_by_collection_key[best_row[collection_tag]]
528 have_tie = False
529 for row in row_iter:
530 if (rank := rank_by_collection_key[row[collection_tag]]) < best_rank:
531 best_row = row
532 best_rank = rank
533 have_tie = False
534 elif rank == best_rank:
535 have_tie = True
536 assert timespan is not None, "Rank ties should be impossible given DB constraints."
537 if have_tie:
538 raise LookupError(
539 f"Ambiguous calibration lookup for {parent_dataset_type.name} in collections "
540 f"{collection_wildcard.strings} with timespan {timespan}."
541 )
542 reader = queries.DatasetRefReader(
543 parent_dataset_type,
544 translate_collection=lambda k: self._managers.collections[k].name,
545 )
546 ref = reader.read(best_row, data_id=dataId)
547 if component is not None:
548 ref = ref.makeComponentRef(component)
549 return ref
551 @transactional
552 def insertDatasets(
553 self,
554 datasetType: DatasetType | str,
555 dataIds: Iterable[DataId],
556 run: str | None = None,
557 expand: bool = True,
558 idGenerationMode: DatasetIdGenEnum = DatasetIdGenEnum.UNIQUE,
559 ) -> list[DatasetRef]:
560 # Docstring inherited from lsst.daf.butler.registry.Registry
561 if isinstance(datasetType, DatasetType):
562 storage = self._managers.datasets.find(datasetType.name)
563 if storage is None:
564 raise DatasetTypeError(f"DatasetType '{datasetType}' has not been registered.")
565 else:
566 storage = self._managers.datasets.find(datasetType)
567 if storage is None:
568 raise DatasetTypeError(f"DatasetType with name '{datasetType}' has not been registered.")
569 if run is None:
570 if self.defaults.run is None:
571 raise NoDefaultCollectionError(
572 "No run provided to insertDatasets, and no default from registry construction."
573 )
574 run = self.defaults.run
575 runRecord = self._managers.collections.find(run)
576 if runRecord.type is not CollectionType.RUN:
577 raise CollectionTypeError(
578 f"Given collection is of type {runRecord.type.name}; RUN collection required."
579 )
580 assert isinstance(runRecord, RunRecord)
581 progress = Progress("daf.butler.Registry.insertDatasets", level=logging.DEBUG)
582 if expand:
583 expandedDataIds = [
584 self.expandDataId(dataId, graph=storage.datasetType.dimensions)
585 for dataId in progress.wrap(dataIds, f"Expanding {storage.datasetType.name} data IDs")
586 ]
587 else:
588 expandedDataIds = [
589 DataCoordinate.standardize(dataId, graph=storage.datasetType.dimensions) for dataId in dataIds
590 ]
591 try:
592 refs = list(storage.insert(runRecord, expandedDataIds, idGenerationMode))
593 if self._managers.obscore:
594 context = queries.SqlQueryContext(self._db, self._managers.column_types)
595 self._managers.obscore.add_datasets(refs, context)
596 except sqlalchemy.exc.IntegrityError as err:
597 raise ConflictingDefinitionError(
598 "A database constraint failure was triggered by inserting "
599 f"one or more datasets of type {storage.datasetType} into "
600 f"collection '{run}'. "
601 "This probably means a dataset with the same data ID "
602 "and dataset type already exists, but it may also mean a "
603 "dimension row is missing."
604 ) from err
605 return refs
607 @transactional
608 def _importDatasets(
609 self,
610 datasets: Iterable[DatasetRef],
611 expand: bool = True,
612 ) -> list[DatasetRef]:
613 # Docstring inherited from lsst.daf.butler.registry.Registry
614 datasets = list(datasets)
615 if not datasets:
616 # nothing to do
617 return []
619 # find dataset type
620 datasetTypes = {dataset.datasetType for dataset in datasets}
621 if len(datasetTypes) != 1:
622 raise DatasetTypeError(f"Multiple dataset types in input datasets: {datasetTypes}")
623 datasetType = datasetTypes.pop()
625 # get storage handler for this dataset type
626 storage = self._managers.datasets.find(datasetType.name)
627 if storage is None:
628 raise DatasetTypeError(f"DatasetType '{datasetType}' has not been registered.")
630 # find run name
631 runs = {dataset.run for dataset in datasets}
632 if len(runs) != 1:
633 raise ValueError(f"Multiple run names in input datasets: {runs}")
634 run = runs.pop()
636 runRecord = self._managers.collections.find(run)
637 if runRecord.type is not CollectionType.RUN:
638 raise CollectionTypeError(
639 f"Given collection '{runRecord.name}' is of type {runRecord.type.name};"
640 " RUN collection required."
641 )
642 assert isinstance(runRecord, RunRecord)
644 progress = Progress("daf.butler.Registry.insertDatasets", level=logging.DEBUG)
645 if expand:
646 expandedDatasets = [
647 dataset.expanded(self.expandDataId(dataset.dataId, graph=storage.datasetType.dimensions))
648 for dataset in progress.wrap(datasets, f"Expanding {storage.datasetType.name} data IDs")
649 ]
650 else:
651 expandedDatasets = [
652 DatasetRef(datasetType, dataset.dataId, id=dataset.id, run=dataset.run, conform=True)
653 for dataset in datasets
654 ]
656 try:
657 refs = list(storage.import_(runRecord, expandedDatasets))
658 if self._managers.obscore:
659 context = queries.SqlQueryContext(self._db, self._managers.column_types)
660 self._managers.obscore.add_datasets(refs, context)
661 except sqlalchemy.exc.IntegrityError as err:
662 raise ConflictingDefinitionError(
663 "A database constraint failure was triggered by inserting "
664 f"one or more datasets of type {storage.datasetType} into "
665 f"collection '{run}'. "
666 "This probably means a dataset with the same data ID "
667 "and dataset type already exists, but it may also mean a "
668 "dimension row is missing."
669 ) from err
670 # Check that imported dataset IDs match the input
671 for imported_ref, input_ref in zip(refs, datasets, strict=True):
672 if imported_ref.id != input_ref.id:
673 raise RegistryConsistencyError(
674 "Imported dataset ID differs from input dataset ID, "
675 f"input ref: {input_ref}, imported ref: {imported_ref}"
676 )
677 return refs
679 def getDataset(self, id: DatasetId) -> DatasetRef | None:
680 # Docstring inherited from lsst.daf.butler.registry.Registry
681 return self._managers.datasets.getDatasetRef(id)
683 @transactional
684 def removeDatasets(self, refs: Iterable[DatasetRef]) -> None:
685 # Docstring inherited from lsst.daf.butler.registry.Registry
686 progress = Progress("lsst.daf.butler.Registry.removeDatasets", level=logging.DEBUG)
687 for datasetType, refsForType in progress.iter_item_chunks(
688 DatasetRef.iter_by_type(refs), desc="Removing datasets by type"
689 ):
690 storage = self._managers.datasets[datasetType.name]
691 try:
692 storage.delete(refsForType)
693 except sqlalchemy.exc.IntegrityError as err:
694 raise OrphanedRecordError(
695 "One or more datasets is still present in one or more Datastores."
696 ) from err
698 @transactional
699 def associate(self, collection: str, refs: Iterable[DatasetRef]) -> None:
700 # Docstring inherited from lsst.daf.butler.registry.Registry
701 progress = Progress("lsst.daf.butler.Registry.associate", level=logging.DEBUG)
702 collectionRecord = self._managers.collections.find(collection)
703 if collectionRecord.type is not CollectionType.TAGGED:
704 raise CollectionTypeError(
705 f"Collection '{collection}' has type {collectionRecord.type.name}, not TAGGED."
706 )
707 for datasetType, refsForType in progress.iter_item_chunks(
708 DatasetRef.iter_by_type(refs), desc="Associating datasets by type"
709 ):
710 storage = self._managers.datasets[datasetType.name]
711 try:
712 storage.associate(collectionRecord, refsForType)
713 if self._managers.obscore:
714 # If a TAGGED collection is being monitored by ObsCore
715 # manager then we may need to save the dataset.
716 context = queries.SqlQueryContext(self._db, self._managers.column_types)
717 self._managers.obscore.associate(refsForType, collectionRecord, context)
718 except sqlalchemy.exc.IntegrityError as err:
719 raise ConflictingDefinitionError(
720 f"Constraint violation while associating dataset of type {datasetType.name} with "
721 f"collection {collection}. This probably means that one or more datasets with the same "
722 "dataset type and data ID already exist in the collection, but it may also indicate "
723 "that the datasets do not exist."
724 ) from err
726 @transactional
727 def disassociate(self, collection: str, refs: Iterable[DatasetRef]) -> None:
728 # Docstring inherited from lsst.daf.butler.registry.Registry
729 progress = Progress("lsst.daf.butler.Registry.disassociate", level=logging.DEBUG)
730 collectionRecord = self._managers.collections.find(collection)
731 if collectionRecord.type is not CollectionType.TAGGED:
732 raise CollectionTypeError(
733 f"Collection '{collection}' has type {collectionRecord.type.name}; expected TAGGED."
734 )
735 for datasetType, refsForType in progress.iter_item_chunks(
736 DatasetRef.iter_by_type(refs), desc="Disassociating datasets by type"
737 ):
738 storage = self._managers.datasets[datasetType.name]
739 storage.disassociate(collectionRecord, refsForType)
740 if self._managers.obscore:
741 self._managers.obscore.disassociate(refsForType, collectionRecord)
743 @transactional
744 def certify(self, collection: str, refs: Iterable[DatasetRef], timespan: Timespan) -> None:
745 # Docstring inherited from lsst.daf.butler.registry.Registry
746 progress = Progress("lsst.daf.butler.Registry.certify", level=logging.DEBUG)
747 collectionRecord = self._managers.collections.find(collection)
748 for datasetType, refsForType in progress.iter_item_chunks(
749 DatasetRef.iter_by_type(refs), desc="Certifying datasets by type"
750 ):
751 storage = self._managers.datasets[datasetType.name]
752 storage.certify(
753 collectionRecord,
754 refsForType,
755 timespan,
756 context=queries.SqlQueryContext(self._db, self._managers.column_types),
757 )
759 @transactional
760 def decertify(
761 self,
762 collection: str,
763 datasetType: str | DatasetType,
764 timespan: Timespan,
765 *,
766 dataIds: Iterable[DataId] | None = None,
767 ) -> None:
768 # Docstring inherited from lsst.daf.butler.registry.Registry
769 collectionRecord = self._managers.collections.find(collection)
770 if isinstance(datasetType, str):
771 storage = self._managers.datasets[datasetType]
772 else:
773 storage = self._managers.datasets[datasetType.name]
774 standardizedDataIds = None
775 if dataIds is not None:
776 standardizedDataIds = [
777 DataCoordinate.standardize(d, graph=storage.datasetType.dimensions) for d in dataIds
778 ]
779 storage.decertify(
780 collectionRecord,
781 timespan,
782 dataIds=standardizedDataIds,
783 context=queries.SqlQueryContext(self._db, self._managers.column_types),
784 )
786 def getDatastoreBridgeManager(self) -> DatastoreRegistryBridgeManager:
787 """Return an object that allows a new `Datastore` instance to
788 communicate with this `Registry`.
790 Returns
791 -------
792 manager : `DatastoreRegistryBridgeManager`
793 Object that mediates communication between this `Registry` and its
794 associated datastores.
795 """
796 return self._managers.datastores
798 def getDatasetLocations(self, ref: DatasetRef) -> Iterable[str]:
799 # Docstring inherited from lsst.daf.butler.registry.Registry
800 return self._managers.datastores.findDatastores(ref)
802 def expandDataId(
803 self,
804 dataId: DataId | None = None,
805 *,
806 graph: DimensionGraph | None = None,
807 records: NameLookupMapping[DimensionElement, DimensionRecord | None] | None = None,
808 withDefaults: bool = True,
809 **kwargs: Any,
810 ) -> DataCoordinate:
811 # Docstring inherited from lsst.daf.butler.registry.Registry
812 if not withDefaults:
813 defaults = None
814 else:
815 defaults = self.defaults.dataId
816 try:
817 standardized = DataCoordinate.standardize(
818 dataId, graph=graph, universe=self.dimensions, defaults=defaults, **kwargs
819 )
820 except KeyError as exc:
821 # This means either kwargs have some odd name or required
822 # dimension is missing.
823 raise DimensionNameError(str(exc)) from exc
824 if standardized.hasRecords():
825 return standardized
826 if records is None:
827 records = {}
828 elif isinstance(records, NamedKeyMapping):
829 records = records.byName()
830 else:
831 records = dict(records)
832 if isinstance(dataId, DataCoordinate) and dataId.hasRecords():
833 records.update(dataId.records.byName())
834 keys = standardized.byName()
835 context = queries.SqlQueryContext(self._db, self._managers.column_types)
836 for element in standardized.graph.primaryKeyTraversalOrder:
837 record = records.get(element.name, ...) # Use ... to mean not found; None might mean NULL
838 if record is ...:
839 if isinstance(element, Dimension) and keys.get(element.name) is None:
840 if element in standardized.graph.required:
841 raise DimensionNameError(
842 f"No value or null value for required dimension {element.name}."
843 )
844 keys[element.name] = None
845 record = None
846 else:
847 storage = self._managers.dimensions[element]
848 record = storage.fetch_one(DataCoordinate.standardize(keys, graph=element.graph), context)
849 records[element.name] = record
850 if record is not None:
851 for d in element.implied:
852 value = getattr(record, d.name)
853 if keys.setdefault(d.name, value) != value:
854 raise InconsistentDataIdError(
855 f"Data ID {standardized} has {d.name}={keys[d.name]!r}, "
856 f"but {element.name} implies {d.name}={value!r}."
857 )
858 else:
859 if element in standardized.graph.required:
860 raise DataIdValueError(
861 f"Could not fetch record for required dimension {element.name} via keys {keys}."
862 )
863 if element.alwaysJoin:
864 raise InconsistentDataIdError(
865 f"Could not fetch record for element {element.name} via keys {keys}, ",
866 "but it is marked alwaysJoin=True; this means one or more dimensions are not "
867 "related.",
868 )
869 for d in element.implied:
870 keys.setdefault(d.name, None)
871 records.setdefault(d.name, None)
872 return DataCoordinate.standardize(keys, graph=standardized.graph).expanded(records=records)
874 def insertDimensionData(
875 self,
876 element: DimensionElement | str,
877 *data: Mapping[str, Any] | DimensionRecord,
878 conform: bool = True,
879 replace: bool = False,
880 skip_existing: bool = False,
881 ) -> None:
882 # Docstring inherited from lsst.daf.butler.registry.Registry
883 if conform:
884 if isinstance(element, str):
885 element = self.dimensions[element]
886 records = [
887 row if isinstance(row, DimensionRecord) else element.RecordClass(**row) for row in data
888 ]
889 else:
890 # Ignore typing since caller said to trust them with conform=False.
891 records = data # type: ignore
892 storage = self._managers.dimensions[element]
893 storage.insert(*records, replace=replace, skip_existing=skip_existing)
895 def syncDimensionData(
896 self,
897 element: DimensionElement | str,
898 row: Mapping[str, Any] | DimensionRecord,
899 conform: bool = True,
900 update: bool = False,
901 ) -> bool | dict[str, Any]:
902 # Docstring inherited from lsst.daf.butler.registry.Registry
903 if conform:
904 if isinstance(element, str):
905 element = self.dimensions[element]
906 record = row if isinstance(row, DimensionRecord) else element.RecordClass(**row)
907 else:
908 # Ignore typing since caller said to trust them with conform=False.
909 record = row # type: ignore
910 storage = self._managers.dimensions[element]
911 return storage.sync(record, update=update)
913 def queryDatasetTypes(
914 self,
915 expression: Any = ...,
916 *,
917 components: bool | None = False,
918 missing: list[str] | None = None,
919 ) -> Iterable[DatasetType]:
920 # Docstring inherited from lsst.daf.butler.registry.Registry
921 wildcard = DatasetTypeWildcard.from_expression(expression)
922 composition_dict = self._managers.datasets.resolve_wildcard(
923 wildcard,
924 components=components,
925 missing=missing,
926 )
927 result: list[DatasetType] = []
928 for parent_dataset_type, components_for_parent in composition_dict.items():
929 result.extend(
930 parent_dataset_type.makeComponentDatasetType(c) if c is not None else parent_dataset_type
931 for c in components_for_parent
932 )
933 return result
935 def queryCollections(
936 self,
937 expression: Any = ...,
938 datasetType: DatasetType | None = None,
939 collectionTypes: Iterable[CollectionType] | CollectionType = CollectionType.all(),
940 flattenChains: bool = False,
941 includeChains: bool | None = None,
942 ) -> Sequence[str]:
943 # Docstring inherited from lsst.daf.butler.registry.Registry
945 # Right now the datasetTypes argument is completely ignored, but that
946 # is consistent with its [lack of] guarantees. DM-24939 or a follow-up
947 # ticket will take care of that.
948 try:
949 wildcard = CollectionWildcard.from_expression(expression)
950 except TypeError as exc:
951 raise CollectionExpressionError(f"Invalid collection expression '{expression}'") from exc
952 collectionTypes = ensure_iterable(collectionTypes)
953 return [
954 record.name
955 for record in self._managers.collections.resolve_wildcard(
956 wildcard,
957 collection_types=frozenset(collectionTypes),
958 flatten_chains=flattenChains,
959 include_chains=includeChains,
960 )
961 ]
963 def _makeQueryBuilder(
964 self,
965 summary: queries.QuerySummary,
966 doomed_by: Iterable[str] = (),
967 ) -> queries.QueryBuilder:
968 """Return a `QueryBuilder` instance capable of constructing and
969 managing more complex queries than those obtainable via `Registry`
970 interfaces.
972 This is an advanced interface; downstream code should prefer
973 `Registry.queryDataIds` and `Registry.queryDatasets` whenever those
974 are sufficient.
976 Parameters
977 ----------
978 summary : `queries.QuerySummary`
979 Object describing and categorizing the full set of dimensions that
980 will be included in the query.
981 doomed_by : `~collections.abc.Iterable` of `str`, optional
982 A list of diagnostic messages that indicate why the query is going
983 to yield no results and should not even be executed. If an empty
984 container (default) the query will be executed unless other code
985 determines that it is doomed.
987 Returns
988 -------
989 builder : `queries.QueryBuilder`
990 Object that can be used to construct and perform advanced queries.
991 """
992 doomed_by = list(doomed_by)
993 backend = queries.SqlQueryBackend(self._db, self._managers)
994 context = backend.context()
995 relation: Relation | None = None
996 if doomed_by:
997 relation = LeafRelation.make_doomed(context.sql_engine, set(), doomed_by)
998 return queries.QueryBuilder(
999 summary,
1000 backend=backend,
1001 context=context,
1002 relation=relation,
1003 )
1005 def _standardize_query_data_id_args(
1006 self, data_id: DataId | None, *, doomed_by: list[str], **kwargs: Any
1007 ) -> DataCoordinate:
1008 """Preprocess the data ID arguments passed to query* methods.
1010 Parameters
1011 ----------
1012 data_id : `DataId` or `None`
1013 Data ID that constrains the query results.
1014 doomed_by : `list` [ `str` ]
1015 List to append messages indicating why the query is doomed to
1016 yield no results.
1017 **kwargs
1018 Additional data ID key-value pairs, extending and overriding
1019 ``data_id``.
1021 Returns
1022 -------
1023 data_id : `DataCoordinate`
1024 Standardized data ID. Will be fully expanded unless expansion
1025 fails, in which case a message will be appended to ``doomed_by``
1026 on return.
1027 """
1028 try:
1029 return self.expandDataId(data_id, **kwargs)
1030 except DataIdValueError as err:
1031 doomed_by.append(str(err))
1032 return DataCoordinate.standardize(
1033 data_id, **kwargs, universe=self.dimensions, defaults=self.defaults.dataId
1034 )
1036 def _standardize_query_dataset_args(
1037 self,
1038 datasets: Any,
1039 collections: CollectionArgType | None,
1040 components: bool | None,
1041 mode: Literal["find_first"] | Literal["find_all"] | Literal["constrain"] = "constrain",
1042 *,
1043 doomed_by: list[str],
1044 ) -> tuple[dict[DatasetType, list[str | None]], CollectionWildcard | None]:
1045 """Preprocess dataset arguments passed to query* methods.
1047 Parameters
1048 ----------
1049 datasets : `DatasetType`, `str`, `re.Pattern`, or iterable of these
1050 Expression identifying dataset types. See `queryDatasetTypes` for
1051 details.
1052 collections : `str`, `re.Pattern`, or iterable of these
1053 Expression identifying collections to be searched. See
1054 `queryCollections` for details.
1055 components : `bool`, optional
1056 If `True`, apply all expression patterns to component dataset type
1057 names as well. If `False`, never apply patterns to components.
1058 If `None` (default), apply patterns to components only if their
1059 parent datasets were not matched by the expression.
1060 Fully-specified component datasets (`str` or `DatasetType`
1061 instances) are always included.
1063 Values other than `False` are deprecated, and only `False` will be
1064 supported after v26. After v27 this argument will be removed
1065 entirely.
1066 mode : `str`, optional
1067 The way in which datasets are being used in this query; one of:
1069 - "find_first": this is a query for the first dataset in an
1070 ordered list of collections. Prohibits collection wildcards,
1071 but permits dataset type wildcards.
1073 - "find_all": this is a query for all datasets in all matched
1074 collections. Permits collection and dataset type wildcards.
1076 - "constrain": this is a query for something other than datasets,
1077 with results constrained by dataset existence. Permits
1078 collection wildcards and prohibits ``...`` as a dataset type
1079 wildcard.
1080 doomed_by : `list` [ `str` ]
1081 List to append messages indicating why the query is doomed to
1082 yield no results.
1084 Returns
1085 -------
1086 composition : `defaultdict` [ `DatasetType`, `list` [ `str` ] ]
1087 Dictionary mapping parent dataset type to `list` of components
1088 matched for that dataset type (or `None` for the parent itself).
1089 collections : `CollectionWildcard`
1090 Processed collection expression.
1091 """
1092 composition: dict[DatasetType, list[str | None]] = {}
1093 collection_wildcard: CollectionWildcard | None = None
1094 if datasets is not None:
1095 if collections is None:
1096 if not self.defaults.collections:
1097 raise NoDefaultCollectionError("No collections, and no registry default collections.")
1098 collection_wildcard = CollectionWildcard.from_expression(self.defaults.collections)
1099 else:
1100 collection_wildcard = CollectionWildcard.from_expression(collections)
1101 if mode == "find_first" and collection_wildcard.patterns:
1102 raise TypeError(
1103 f"Collection pattern(s) {collection_wildcard.patterns} not allowed in this context."
1104 )
1105 missing: list[str] = []
1106 composition = self._managers.datasets.resolve_wildcard(
1107 datasets, components=components, missing=missing, explicit_only=(mode == "constrain")
1108 )
1109 if missing and mode == "constrain":
1110 # After v26 this should raise MissingDatasetTypeError, to be
1111 # implemented on DM-36303.
1112 warnings.warn(
1113 f"Dataset type(s) {missing} are not registered; this will be an error after v26.",
1114 FutureWarning,
1115 stacklevel=find_outside_stacklevel("lsst.daf.butler"),
1116 )
1117 doomed_by.extend(f"Dataset type {name} is not registered." for name in missing)
1118 elif collections:
1119 # I think this check should actually be `collections is not None`,
1120 # but it looks like some CLI scripts use empty tuple as default.
1121 raise ArgumentError(f"Cannot pass 'collections' (='{collections}') without 'datasets'.")
1122 return composition, collection_wildcard
1124 def queryDatasets(
1125 self,
1126 datasetType: Any,
1127 *,
1128 collections: CollectionArgType | None = None,
1129 dimensions: Iterable[Dimension | str] | None = None,
1130 dataId: DataId | None = None,
1131 where: str = "",
1132 findFirst: bool = False,
1133 components: bool | None = False,
1134 bind: Mapping[str, Any] | None = None,
1135 check: bool = True,
1136 **kwargs: Any,
1137 ) -> queries.DatasetQueryResults:
1138 # Docstring inherited from lsst.daf.butler.registry.Registry
1139 doomed_by: list[str] = []
1140 data_id = self._standardize_query_data_id_args(dataId, doomed_by=doomed_by, **kwargs)
1141 dataset_composition, collection_wildcard = self._standardize_query_dataset_args(
1142 datasetType,
1143 collections,
1144 components,
1145 mode="find_first" if findFirst else "find_all",
1146 doomed_by=doomed_by,
1147 )
1148 if collection_wildcard is not None and collection_wildcard.empty():
1149 doomed_by.append("No datasets can be found because collection list is empty.")
1150 return queries.ChainedDatasetQueryResults([], doomed_by=doomed_by)
1151 parent_results: list[queries.ParentDatasetQueryResults] = []
1152 for parent_dataset_type, components_for_parent in dataset_composition.items():
1153 # The full set of dimensions in the query is the combination of
1154 # those needed for the DatasetType and those explicitly requested,
1155 # if any.
1156 dimension_names = set(parent_dataset_type.dimensions.names)
1157 if dimensions is not None:
1158 dimension_names.update(self.dimensions.extract(dimensions).names)
1159 # Construct the summary structure needed to construct a
1160 # QueryBuilder.
1161 summary = queries.QuerySummary(
1162 requested=DimensionGraph(self.dimensions, names=dimension_names),
1163 column_types=self._managers.column_types,
1164 data_id=data_id,
1165 expression=where,
1166 bind=bind,
1167 defaults=self.defaults.dataId,
1168 check=check,
1169 datasets=[parent_dataset_type],
1170 )
1171 builder = self._makeQueryBuilder(summary)
1172 # Add the dataset subquery to the query, telling the QueryBuilder
1173 # to include the rank of the selected collection in the results
1174 # only if we need to findFirst. Note that if any of the
1175 # collections are actually wildcard expressions, and
1176 # findFirst=True, this will raise TypeError for us.
1177 builder.joinDataset(parent_dataset_type, collection_wildcard, isResult=True, findFirst=findFirst)
1178 query = builder.finish()
1179 parent_results.append(
1180 queries.ParentDatasetQueryResults(
1181 query, parent_dataset_type, components=components_for_parent
1182 )
1183 )
1184 if not parent_results:
1185 doomed_by.extend(
1186 f"No registered dataset type matching {t!r} found, so no matching datasets can "
1187 "exist in any collection."
1188 for t in ensure_iterable(datasetType)
1189 )
1190 return queries.ChainedDatasetQueryResults([], doomed_by=doomed_by)
1191 elif len(parent_results) == 1:
1192 return parent_results[0]
1193 else:
1194 return queries.ChainedDatasetQueryResults(parent_results)
1196 def queryDataIds(
1197 self,
1198 dimensions: Iterable[Dimension | str] | Dimension | str,
1199 *,
1200 dataId: DataId | None = None,
1201 datasets: Any = None,
1202 collections: CollectionArgType | None = None,
1203 where: str = "",
1204 components: bool | None = None,
1205 bind: Mapping[str, Any] | None = None,
1206 check: bool = True,
1207 **kwargs: Any,
1208 ) -> queries.DataCoordinateQueryResults:
1209 # Docstring inherited from lsst.daf.butler.registry.Registry
1210 dimensions = ensure_iterable(dimensions)
1211 requestedDimensions = self.dimensions.extract(dimensions)
1212 doomed_by: list[str] = []
1213 data_id = self._standardize_query_data_id_args(dataId, doomed_by=doomed_by, **kwargs)
1214 dataset_composition, collection_wildcard = self._standardize_query_dataset_args(
1215 datasets, collections, components, doomed_by=doomed_by
1216 )
1217 if collection_wildcard is not None and collection_wildcard.empty():
1218 doomed_by.append("No data coordinates can be found because collection list is empty.")
1219 summary = queries.QuerySummary(
1220 requested=requestedDimensions,
1221 column_types=self._managers.column_types,
1222 data_id=data_id,
1223 expression=where,
1224 bind=bind,
1225 defaults=self.defaults.dataId,
1226 check=check,
1227 datasets=dataset_composition.keys(),
1228 )
1229 builder = self._makeQueryBuilder(summary, doomed_by=doomed_by)
1230 for datasetType in dataset_composition:
1231 builder.joinDataset(datasetType, collection_wildcard, isResult=False)
1232 query = builder.finish()
1234 return queries.DataCoordinateQueryResults(query)
1236 def queryDimensionRecords(
1237 self,
1238 element: DimensionElement | str,
1239 *,
1240 dataId: DataId | None = None,
1241 datasets: Any = None,
1242 collections: CollectionArgType | None = None,
1243 where: str = "",
1244 components: bool | None = None,
1245 bind: Mapping[str, Any] | None = None,
1246 check: bool = True,
1247 **kwargs: Any,
1248 ) -> queries.DimensionRecordQueryResults:
1249 # Docstring inherited from lsst.daf.butler.registry.Registry
1250 if not isinstance(element, DimensionElement):
1251 try:
1252 element = self.dimensions[element]
1253 except KeyError as e:
1254 raise DimensionNameError(
1255 f"No such dimension '{element}', available dimensions: "
1256 + str(self.dimensions.getStaticElements())
1257 ) from e
1258 doomed_by: list[str] = []
1259 data_id = self._standardize_query_data_id_args(dataId, doomed_by=doomed_by, **kwargs)
1260 dataset_composition, collection_wildcard = self._standardize_query_dataset_args(
1261 datasets, collections, components, doomed_by=doomed_by
1262 )
1263 if collection_wildcard is not None and collection_wildcard.empty():
1264 doomed_by.append("No dimension records can be found because collection list is empty.")
1265 summary = queries.QuerySummary(
1266 requested=element.graph,
1267 column_types=self._managers.column_types,
1268 data_id=data_id,
1269 expression=where,
1270 bind=bind,
1271 defaults=self.defaults.dataId,
1272 check=check,
1273 datasets=dataset_composition.keys(),
1274 )
1275 builder = self._makeQueryBuilder(summary, doomed_by=doomed_by)
1276 for datasetType in dataset_composition:
1277 builder.joinDataset(datasetType, collection_wildcard, isResult=False)
1278 query = builder.finish().with_record_columns(element)
1279 return queries.DatabaseDimensionRecordQueryResults(query, element)
1281 def queryDatasetAssociations(
1282 self,
1283 datasetType: str | DatasetType,
1284 collections: CollectionArgType | None = ...,
1285 *,
1286 collectionTypes: Iterable[CollectionType] = CollectionType.all(),
1287 flattenChains: bool = False,
1288 ) -> Iterator[DatasetAssociation]:
1289 # Docstring inherited from lsst.daf.butler.registry.Registry
1290 if collections is None:
1291 if not self.defaults.collections:
1292 raise NoDefaultCollectionError(
1293 "No collections provided to queryDatasetAssociations, "
1294 "and no defaults from registry construction."
1295 )
1296 collections = self.defaults.collections
1297 collection_wildcard = CollectionWildcard.from_expression(collections)
1298 backend = queries.SqlQueryBackend(self._db, self._managers)
1299 parent_dataset_type, _ = backend.resolve_single_dataset_type_wildcard(datasetType, components=False)
1300 timespan_tag = DatasetColumnTag(parent_dataset_type.name, "timespan")
1301 collection_tag = DatasetColumnTag(parent_dataset_type.name, "collection")
1302 for parent_collection_record in backend.resolve_collection_wildcard(
1303 collection_wildcard,
1304 collection_types=frozenset(collectionTypes),
1305 flatten_chains=flattenChains,
1306 ):
1307 # Resolve this possibly-chained collection into a list of
1308 # non-CHAINED collections that actually hold datasets of this
1309 # type.
1310 candidate_collection_records = backend.resolve_dataset_collections(
1311 parent_dataset_type,
1312 CollectionWildcard.from_names([parent_collection_record.name]),
1313 allow_calibration_collections=True,
1314 governor_constraints={},
1315 )
1316 if not candidate_collection_records:
1317 continue
1318 with backend.context() as context:
1319 relation = backend.make_dataset_query_relation(
1320 parent_dataset_type,
1321 candidate_collection_records,
1322 columns={"dataset_id", "run", "timespan", "collection"},
1323 context=context,
1324 )
1325 reader = queries.DatasetRefReader(
1326 parent_dataset_type,
1327 translate_collection=lambda k: self._managers.collections[k].name,
1328 full=False,
1329 )
1330 for row in context.fetch_iterable(relation):
1331 ref = reader.read(row)
1332 collection_record = self._managers.collections[row[collection_tag]]
1333 if collection_record.type is CollectionType.CALIBRATION:
1334 timespan = row[timespan_tag]
1335 else:
1336 # For backwards compatibility and (possibly?) user
1337 # convenience we continue to define the timespan of a
1338 # DatasetAssociation row for a non-CALIBRATION
1339 # collection to be None rather than a fully unbounded
1340 # timespan.
1341 timespan = None
1342 yield DatasetAssociation(ref=ref, collection=collection_record.name, timespan=timespan)
1344 @property
1345 def obsCoreTableManager(self) -> ObsCoreTableManager | None:
1346 # Docstring inherited from lsst.daf.butler.registry.Registry
1347 return self._managers.obscore
1349 storageClasses: StorageClassFactory
1350 """All storage classes known to the registry (`StorageClassFactory`).
1351 """