Coverage for python/lsst/daf/butler/registry/queries/_query_backend.py: 31%
91 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-06 02:34 -0700
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-06 02:34 -0700
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = ("QueryBackend",)
25from abc import abstractmethod
26from collections.abc import Iterable, Mapping, Sequence, Set
27from typing import TYPE_CHECKING, Any, Generic, TypeVar
29from lsst.daf.relation import (
30 BinaryOperationRelation,
31 ColumnTag,
32 LeafRelation,
33 MarkerRelation,
34 Relation,
35 UnaryOperationRelation,
36)
38from ...core import (
39 DataCoordinate,
40 DatasetColumnTag,
41 DatasetType,
42 DimensionGraph,
43 DimensionKeyColumnTag,
44 DimensionRecord,
45 DimensionUniverse,
46)
47from .._collectionType import CollectionType
48from .._exceptions import DatasetTypeError, MissingDatasetTypeError
49from ..wildcards import CollectionWildcard
50from ._query_context import QueryContext
51from .find_first_dataset import FindFirstDataset
53if TYPE_CHECKING:
54 from ..interfaces import CollectionRecord
57_C = TypeVar("_C", bound=QueryContext)
60class QueryBackend(Generic[_C]):
61 """An interface for constructing and evaluating the
62 `~lsst.daf.relation.Relation` objects that comprise registry queries.
64 This ABC is expected to have a concrete subclass for each concrete registry
65 type, and most subclasses will be paired with a `QueryContext` subclass.
66 See `QueryContext` for the division of responsibilities between these two
67 interfaces.
68 """
70 @property
71 @abstractmethod
72 def universe(self) -> DimensionUniverse:
73 """Definition of all dimensions and dimension elements for this
74 registry (`DimensionUniverse`).
75 """
76 raise NotImplementedError()
78 def context(self) -> _C:
79 """Return a context manager that can be used to execute queries with
80 this backend.
82 Returns
83 -------
84 context : `QueryContext`
85 Context manager that manages state and connections needed to
86 execute queries.
87 """
88 raise NotImplementedError()
90 @abstractmethod
91 def get_collection_name(self, key: Any) -> str:
92 """Return the collection name associated with a collection primary key
93 value.
95 Parameters
96 ----------
97 key
98 Collection primary key value.
100 Returns
101 -------
102 name : `str`
103 Collection name.
104 """
105 raise NotImplementedError()
107 @abstractmethod
108 def resolve_collection_wildcard(
109 self,
110 expression: Any,
111 *,
112 collection_types: Set[CollectionType] = CollectionType.all(),
113 done: set[str] | None = None,
114 flatten_chains: bool = True,
115 include_chains: bool | None = None,
116 ) -> list[CollectionRecord]:
117 """Return the collection records that match a wildcard expression.
119 Parameters
120 ----------
121 expression
122 Names and/or patterns for collections; will be passed to
123 `CollectionWildcard.from_expression`.
124 collection_types : `collections.abc.Set` [ `CollectionType` ], optional
125 If provided, only yield collections of these types.
126 done : `set` [ `str` ], optional
127 A set of collection names that should be skipped, updated to
128 include all processed collection names on return.
129 flatten_chains : `bool`, optional
130 If `True` (default) recursively yield the child collections of
131 `~CollectionType.CHAINED` collections.
132 include_chains : `bool`, optional
133 If `False`, return records for `~CollectionType.CHAINED`
134 collections themselves. The default is the opposite of
135 ``flattenChains``: either return records for CHAINED collections or
136 their children, but not both.
138 Returns
139 -------
140 records : `list` [ `CollectionRecord` ]
141 Matching collection records.
142 """
143 raise NotImplementedError()
145 @abstractmethod
146 def resolve_dataset_type_wildcard(
147 self,
148 expression: Any,
149 components: bool | None = None,
150 missing: list[str] | None = None,
151 explicit_only: bool = False,
152 components_deprecated: bool = True,
153 ) -> dict[DatasetType, list[str | None]]:
154 """Return the dataset types that match a wildcard expression.
156 Parameters
157 ----------
158 expression
159 Names and/or patterns for dataset types; will be passed to
160 `DatasetTypeWildcard.from_expression`.
161 components : `bool`, optional
162 If `True`, apply all expression patterns to component dataset type
163 names as well. If `False`, never apply patterns to components. If
164 `None` (default), apply patterns to components only if their parent
165 datasets were not matched by the expression. Fully-specified
166 component datasets (`str` or `DatasetType` instances) are always
167 included.
168 missing : `list` of `str`, optional
169 String dataset type names that were explicitly given (i.e. not
170 regular expression patterns) but not found will be appended to this
171 list, if it is provided.
172 explicit_only : `bool`, optional
173 If `True`, require explicit `DatasetType` instances or `str` names,
174 with `re.Pattern` instances deprecated and ``...`` prohibited.
175 components_deprecated : `bool`, optional
176 If `True`, this is a context in which component dataset support is
177 deprecated. This will result in a deprecation warning when
178 ``components=True`` or ``components=None`` and a component dataset
179 is matched. In the future this will become an error.
181 Returns
182 -------
183 dataset_types : `dict` [ `DatasetType`, `list` [ `None`, `str` ] ]
184 A mapping with resolved dataset types as keys and lists of
185 matched component names as values, where `None` indicates the
186 parent composite dataset type was matched.
187 """
188 raise NotImplementedError()
190 def resolve_single_dataset_type_wildcard(
191 self,
192 expression: Any,
193 components: bool | None = None,
194 explicit_only: bool = False,
195 components_deprecated: bool = True,
196 ) -> tuple[DatasetType, list[str | None]]:
197 """Return a single dataset type that matches a wildcard expression.
199 Parameters
200 ----------
201 expression
202 Names and/or patterns for the dataset type; will be passed to
203 `DatasetTypeWildcard.from_expression`.
204 components : `bool`, optional
205 If `True`, apply all expression patterns to component dataset type
206 names as well. If `False`, never apply patterns to components. If
207 `None` (default), apply patterns to components only if their parent
208 datasets were not matched by the expression. Fully-specified
209 component datasets (`str` or `DatasetType` instances) are always
210 included.
211 explicit_only : `bool`, optional
212 If `True`, require explicit `DatasetType` instances or `str` names,
213 with `re.Pattern` instances deprecated and ``...`` prohibited.
214 components_deprecated : `bool`, optional
215 If `True`, this is a context in which component dataset support is
216 deprecated. This will result in a deprecation warning when
217 ``components=True`` or ``components=None`` and a component dataset
218 is matched. In the future this will become an error.
220 Returns
221 -------
222 single_parent : `DatasetType`
223 The matched parent dataset type.
224 single_components : `list` [ `str` | `None` ]
225 The matched components that correspond to this parent, or `None` if
226 the parent dataset type itself was matched.
228 Notes
229 -----
230 This method really finds a single parent dataset type and any number of
231 components, because it's only the parent dataset type that's known to
232 registry at all; many callers are expected to discard the
233 ``single_components`` return value.
234 """
235 missing: list[str] = []
236 matching = self.resolve_dataset_type_wildcard(
237 expression,
238 components=components,
239 missing=missing,
240 explicit_only=explicit_only,
241 components_deprecated=components_deprecated,
242 )
243 if not matching:
244 if missing:
245 raise MissingDatasetTypeError(
246 "\n".join(
247 f"Dataset type {t!r} is not registered, so no instances of it can exist."
248 for t in missing
249 )
250 )
251 else:
252 raise MissingDatasetTypeError(
253 f"No registered dataset types matched expression {expression!r}, "
254 "so no datasets will be found."
255 )
256 if len(matching) > 1:
257 raise DatasetTypeError(
258 f"Expression {expression!r} matched multiple parent dataset types: "
259 f"{[t.name for t in matching]}, but only one is allowed."
260 )
261 ((single_parent, single_components),) = matching.items()
262 if missing:
263 raise DatasetTypeError(
264 f"Expression {expression!r} appears to involve multiple dataset types, even though only "
265 f"one ({single_parent.name}) is registered, and only one is allowed here."
266 )
267 return single_parent, single_components
269 @abstractmethod
270 def filter_dataset_collections(
271 self,
272 dataset_types: Iterable[DatasetType],
273 collections: Sequence[CollectionRecord],
274 *,
275 governor_constraints: Mapping[str, Set[str]],
276 rejections: list[str] | None = None,
277 ) -> dict[DatasetType, list[CollectionRecord]]:
278 """Filter a sequence of collections to those for which a dataset query
279 might succeed.
281 Parameters
282 ----------
283 dataset_types : `Iterable` [ `DatasetType` ]
284 Dataset types that are being queried. Must include only parent
285 or standalone dataset types, not components.
286 collections : `Sequence` [ `CollectionRecord` ]
287 Sequence of collections that will be searched.
288 governor_constraints : `Mapping` [ `str`, `~collections.abc.Set` ], \
289 optional
290 Constraints imposed by other aspects of the query on governor
291 dimensions; collections inconsistent with these constraints will be
292 skipped.
293 rejections : `list` [ `str` ], optional
294 If not `None`, a `list` that diagnostic messages will be appended
295 to, for any collection that matches ``collections`` that is not
296 returned. At least one message is guaranteed whenever the result
297 is empty.
299 Returns
300 -------
301 dataset_collections : `dict` [ `DatasetType`, \
302 `list` [ `CollectionRecord` ] ]
303 The collections to search for each dataset. The dictionary's keys
304 are always exactly ``dataset_types`` (in the same order), and each
305 nested `list` of collections is ordered consistently with the
306 given ``collections``.
308 Notes
309 -----
310 This method accepts multiple dataset types and multiple collections at
311 once to enable implementations to batch up the fetching of summary
312 information needed to relate them.
313 """
314 raise NotImplementedError()
316 def resolve_dataset_collections(
317 self,
318 dataset_type: DatasetType,
319 collections: CollectionWildcard,
320 *,
321 governor_constraints: Mapping[str, Set[str]],
322 rejections: list[str] | None = None,
323 collection_types: Set[CollectionType] = CollectionType.all(),
324 allow_calibration_collections: bool = False,
325 ) -> list[CollectionRecord]:
326 """Resolve the sequence of collections to query for a dataset type.
328 Parameters
329 ----------
330 dataset_type : `DatasetType`
331 Dataset type to be queried in the returned collections.
332 collections : `CollectionWildcard`
333 Expression for the collections to be queried.
334 governor_constraints : `Mapping` [ `str`, `~collections.abc.Set` ], \
335 optional
336 Constraints imposed by other aspects of the query on governor
337 dimensions; collections inconsistent with these constraints will be
338 skipped.
339 rejections : `list` [ `str` ], optional
340 If not `None`, a `list` that diagnostic messages will be appended
341 to, for any collection that matches ``collections`` that is not
342 returned. At least one message is guaranteed whenever the result
343 is empty.
344 collection_types : `~collections.abc.Set` [ `CollectionType` ], \
345 optional
346 Collection types to consider when resolving the collection
347 expression.
348 allow_calibration_collections : `bool`, optional
349 If `False`, skip (with a ``rejections`` message) any calibration
350 collections that match ``collections`` are not given explicitly by
351 name, and raise `NotImplementedError` for any calibration
352 collection that is given explicitly. This is a temporary option
353 that will be removed when the query system can handle temporal
354 joins involving calibration collections.
356 Returns
357 -------
358 records : `list` [ `CollectionRecord` ]
359 A new list of `CollectionRecord` instances, for collections that
360 both match ``collections`` and may have datasets of the given type.
362 Notes
363 -----
364 This is a higher-level driver for `resolve_collection_wildcard` and
365 `filter_dataset_collections` that is mostly concerned with handling
366 queries against `~Collection.Type.CALIBRATION` collections that aren't
367 fully supported yet. Once that support improves, this method may be
368 removed.
369 """
370 if collections == CollectionWildcard() and collection_types == CollectionType.all():
371 collection_types = {CollectionType.RUN}
372 explicit_collections = frozenset(collections.strings)
373 matching_collection_records = self.resolve_collection_wildcard(
374 collections, collection_types=collection_types
375 )
376 ((_, filtered_collection_records),) = self.filter_dataset_collections(
377 [dataset_type],
378 matching_collection_records,
379 governor_constraints=governor_constraints,
380 rejections=rejections,
381 ).items()
382 if not allow_calibration_collections:
383 supported_collection_records: list[CollectionRecord] = []
384 for record in filtered_collection_records:
385 if record.type is CollectionType.CALIBRATION:
386 # If collection name was provided explicitly then raise,
387 # since this is a kind of query we don't support yet;
388 # otherwise collection is a part of a chained one or regex
389 # match, and we skip it to not break queries of other
390 # included collections.
391 if record.name in explicit_collections:
392 raise NotImplementedError(
393 f"Query for dataset type {dataset_type.name!r} in CALIBRATION-type "
394 f"collection {record.name!r} is not yet supported."
395 )
396 else:
397 if rejections is not None:
398 rejections.append(
399 f"Not searching for dataset {dataset_type.name!r} in CALIBRATION "
400 f"collection {record.name!r} because calibration queries aren't fully "
401 "implemented; this is not an error only because the query structure "
402 "implies that searching this collection may be incidental."
403 )
404 supported_collection_records.append(record)
405 else:
406 supported_collection_records.append(record)
407 else:
408 supported_collection_records = filtered_collection_records
409 if not supported_collection_records and rejections is not None and not rejections:
410 rejections.append(f"No collections to search matching expression {collections!r}.")
411 return supported_collection_records
413 @abstractmethod
414 def make_dataset_query_relation(
415 self,
416 dataset_type: DatasetType,
417 collections: Sequence[CollectionRecord],
418 columns: Set[str],
419 context: _C,
420 ) -> Relation:
421 """Construct a relation that represents an unordered query for datasets
422 that returns matching results from all given collections.
424 Parameters
425 ----------
426 dataset_type : `DatasetType`
427 Type for the datasets being queried.
428 collections : `Sequence` [ `CollectionRecord` ]
429 Records for collections to query. Should generally be the result
430 of a call to `resolve_dataset_collections`, and must not be empty.
431 context : `QueryContext`
432 Context that manages per-query state.
433 columns : `~collections.abc.Set` [ `str` ]
434 Columns to include in the relation. See `Query.find_datasets` for
435 details.
437 Returns
438 -------
439 relation : `lsst.daf.relation.Relation`
440 Relation representing a dataset query.
441 """
442 raise NotImplementedError()
444 def make_dataset_search_relation(
445 self,
446 dataset_type: DatasetType,
447 collections: Sequence[CollectionRecord],
448 columns: Set[str],
449 context: _C,
450 *,
451 join_to: Relation | None = None,
452 ) -> Relation:
453 """Construct a relation that represents an order query for datasets
454 that returns results from the first matching collection for each
455 data ID.
457 Parameters
458 ----------
459 dataset_type : `DatasetType`
460 Type for the datasets being search.
461 collections : `Sequence` [ `CollectionRecord` ]
462 Records for collections to search. Should generally be the result
463 of a call to `resolve_dataset_collections`, and must not be empty.
464 columns : `~collections.abc.Set` [ `str` ]
465 Columns to include in the `relation. See
466 `make_dataset_query_relation` for options.
467 context : `QueryContext`
468 Context that manages per-query state.
469 join_to : `Relation`, optional
470 Another relation to join with the query for datasets in all
471 collections before filtering out out shadowed datasets.
473 Returns
474 -------
475 relation : `lsst.daf.relation.Relation`
476 Relation representing a find-first dataset search.
477 """
478 base = self.make_dataset_query_relation(
479 dataset_type,
480 collections,
481 columns | {"rank"},
482 context=context,
483 )
484 if join_to is not None:
485 base = join_to.join(base)
486 # Query-simplification shortcut: if there is only one collection, a
487 # find-first search is just a regular result subquery. Same if there
488 # are no collections.
489 if len(collections) <= 1:
490 return base
491 # We filter the dimension keys in the given relation through
492 # DimensionGraph.required.names to minimize the set we partition on
493 # and order it in a more index-friendly way. More precisely, any
494 # index we define on dimensions will be consistent with this order, but
495 # any particular index may not have the same dimension columns.
496 dimensions = self.universe.extract(
497 [tag.dimension for tag in DimensionKeyColumnTag.filter_from(base.columns)]
498 )
499 find_first = FindFirstDataset(
500 dimensions=DimensionKeyColumnTag.generate(dimensions.required.names),
501 rank=DatasetColumnTag(dataset_type.name, "rank"),
502 )
503 return find_first.apply(
504 base, preferred_engine=context.preferred_engine, require_preferred_engine=True
505 ).with_only_columns(base.columns - {find_first.rank})
507 def make_doomed_dataset_relation(
508 self,
509 dataset_type: DatasetType,
510 columns: Set[str],
511 messages: Iterable[str],
512 context: _C,
513 ) -> Relation:
514 """Construct a relation that represents a doomed query for datasets.
516 Parameters
517 ----------
518 dataset_type : `DatasetType`
519 Dataset type being queried.
520 columns : `AbstractSet` [ `str` ]
521 Dataset columns to include (dimension key columns are always
522 included). See `make_dataset_query_relation` for allowed values.
523 messages : `Iterable` [ `str` ]
524 Diagnostic messages that explain why the query is doomed to yield
525 no rows.
526 context : `QueryContext`
527 Context that manages per-query state.
529 Returns
530 -------
531 relation : `lsst.daf.relation.Relation`
532 Relation with the requested columns and no rows.
533 """
534 column_tags: set[ColumnTag] = set(
535 DimensionKeyColumnTag.generate(dataset_type.dimensions.required.names)
536 )
537 column_tags.update(DatasetColumnTag.generate(dataset_type.name, columns))
538 return context.preferred_engine.make_doomed_relation(columns=column_tags, messages=list(messages))
540 @abstractmethod
541 def make_dimension_relation(
542 self,
543 dimensions: DimensionGraph,
544 columns: Set[ColumnTag],
545 context: _C,
546 *,
547 initial_relation: Relation | None = None,
548 initial_join_max_columns: frozenset[ColumnTag] | None = None,
549 initial_dimension_relationships: Set[frozenset[str]] | None = None,
550 spatial_joins: Iterable[tuple[str, str]] = (),
551 governor_constraints: Mapping[str, Set[str]],
552 ) -> Relation:
553 """Construct a relation that provides columns and constraints from
554 dimension records.
556 Parameters
557 ----------
558 dimensions : `DimensionGraph`
559 Dimensions to include. The key columns for all dimensions (both
560 required and implied) will be included in the returned relation.
561 columns : `~collections.abc.Set` [ `ColumnTag` ]
562 Dimension record columns to include. This set may include key
563 column tags as well, though these may be ignored; the set of key
564 columns to include is determined by the ``dimensions`` argument
565 instead.
566 context : `QueryContext`
567 Context that manages per-query state.
568 initial_relation : `~lsst.daf.relation.Relation`, optional
569 Initial relation to join to the dimension relations. If this
570 relation provides record columns, key columns, and relationships
571 between key columns (see ``initial_dimension_relationships`` below)
572 that would otherwise have been added by joining in a dimension
573 element's relation, that relation may not be joined in at all.
574 initial_join_max_columns : `frozenset` [ `ColumnTag` ], optional
575 Maximum superset of common columns for joins to
576 ``initial_relation`` (i.e. columns in the ``ON`` expression of SQL
577 ``JOIN`` clauses). If provided, this is a subset of the dimension
578 key columns in ``initial_relation``, which are otherwise all
579 considered as potential common columns for joins. Ignored if
580 ``initial_relation`` is not provided.
581 initial_dimension_relationships : `~collections.abc.Set` [ `frozenset`
582 [ `str` ] ], optional
583 A set of sets of dimension names representing relationships between
584 dimensions encoded in the rows of ``initial_relation``. If not
585 provided (and ``initial_relation`` is),
586 `extract_dimension_relationships` will be called on
587 ``initial_relation``.
588 spatial_joins : `collections.abc.Iterable` [ `tuple` [ `str`, `str` ] ]
589 Iterable of dimension element name pairs that should be spatially
590 joined.
591 governor_constraints : `Mapping` [ `str` [ `~collections.abc.Set`
592 [ `str` ] ] ], optional
593 Constraints on governor dimensions that are provided by other parts
594 of the query that either have been included in ``initial_relation``
595 or are guaranteed to be added in the future. This is a mapping from
596 governor dimension name to sets of values that dimension may take.
598 Returns
599 -------
600 relation : `lsst.daf.relation.Relation`
601 Relation containing the given dimension columns and constraints.
602 """
603 raise NotImplementedError()
605 @abstractmethod
606 def resolve_governor_constraints(
607 self, dimensions: DimensionGraph, constraints: Mapping[str, Set[str]], context: _C
608 ) -> Mapping[str, Set[str]]:
609 """Resolve governor dimension constraints provided by user input to
610 a query against the content in the `Registry`.
612 Parameters
613 ----------
614 dimensions : `DimensionGraph`
615 Dimensions that bound the governor dimensions to consider (via
616 ``dimensions.governors``, more specifically).
617 constraints : `Mapping` [ `str`, [ `~collections.abc.Set`
618 [ `str` ] ] ]
619 Constraints from user input to the query (e.g. from data IDs and
620 string expression predicates).
621 context : `QueryContext`
622 Object that manages state for the query; used here to fetch the
623 governor dimension record cache if it has not already been loaded.
625 Returns
626 -------
627 resolved : `Mapping` [ `str`, [ `~collections.abc.Set`
628 [ `str` ] ] ]
629 A shallow copy of ``constraints`` with keys equal to
630 ``dimensions.governors.names` and value sets constrained by the
631 Registry content if they were not already in ``constraints``.
633 Raises
634 ------
635 DataIdValueError
636 Raised if ``constraints`` includes governor dimension values that
637 are not present in the `Registry`.
638 """
639 raise NotImplementedError()
641 @abstractmethod
642 def get_dimension_record_cache(
643 self, element_name: str, context: _C
644 ) -> Mapping[DataCoordinate, DimensionRecord] | None:
645 """Return a local cache of all `DimensionRecord` objects for a
646 dimension element, fetching it if necessary.
648 Parameters
649 ----------
650 element_name : `str`
651 Name of the dimension element.
652 context : `.queries.SqlQueryContext`
653 Context to be used to execute queries when no cached result is
654 available.
656 Returns
657 -------
658 cache : `Mapping` [ `DataCoordinate`, `DimensionRecord` ] or `None`
659 Mapping from data ID to dimension record, or `None` if this
660 element's records are never cached.
661 """
662 raise NotImplementedError()
664 def extract_dimension_relationships(self, relation: Relation) -> set[frozenset[str]]:
665 """Extract the dimension key relationships encoded in a relation tree.
667 Parameters
668 ----------
669 relation : `Relation`
670 Relation tree to process.
672 Returns
673 -------
674 relationships : `set` [ `frozenset` [ `str` ] ]
675 Set of sets of dimension names, where each inner set represents a
676 relationship between dimensions.
678 Notes
679 -----
680 Dimension relationships include both many-to-one implied dependencies
681 and many-to-many joins backed by "always-join" dimension elements, and
682 it's important to join in the dimension table that defines a
683 relationship in any query involving dimensions that are a superset of
684 that relationship. For example, let's consider a relation tree that
685 joins dataset existence-check relations for two dataset types, with
686 dimensions ``{instrument, exposure, detector}`` and ``{instrument,
687 physical_filter}``. The joined relation appears to have all dimension
688 keys in its expanded graph present except ``band``, and the system
689 could easily correct this by joining that dimension in directly. But
690 it's also missing the ``{instrument, exposure, physical_filter}``
691 relationship we'd get from the ``exposure`` dimension's own relation
692 (``exposure`` implies ``phyiscal_filter``) and the similar
693 ``{instrument, physical_filter, band}`` relationship from the
694 ``physical_filter`` dimension relation; we need the relationship logic
695 to recognize that those dimensions need to be joined in as well in
696 order for the full relation to have rows that represent valid data IDs.
698 The implementation of this method relies on the assumption that
699 `LeafRelation` objects always have rows that are consistent with all
700 defined relationships (i.e. are valid data IDs). This is true for not
701 just dimension relations themselves, but anything created from queries
702 based on them, including datasets and query results. It is possible to
703 construct `LeafRelation` objects that don't satisfy this criteria (e.g.
704 when accepting in user-provided data IDs), and in this case
705 higher-level guards or warnings must be provided.``
706 """
707 return {
708 frozenset(
709 tag.dimension
710 for tag in DimensionKeyColumnTag.filter_from(leaf_relation.columns & relation.columns)
711 )
712 for leaf_relation in self._extract_leaf_relations(relation).values()
713 }
715 def _extract_leaf_relations(self, relation: Relation) -> dict[str, LeafRelation]:
716 """Recursively extract leaf relations from a relation tree.
718 Parameters
719 ----------
720 relation : `Relation`
721 Tree to process.
723 Returns
724 -------
725 leaves : `dict` [ `str`, `LeafRelation` ]
726 Leaf relations, keyed and deduplicated by name.
727 """
728 match relation:
729 case LeafRelation() as leaf:
730 return {leaf.name: leaf}
731 case UnaryOperationRelation(target=target):
732 return self._extract_leaf_relations(target)
733 case BinaryOperationRelation(lhs=lhs, rhs=rhs):
734 return self._extract_leaf_relations(lhs) | self._extract_leaf_relations(rhs)
735 case MarkerRelation(target=target):
736 return self._extract_leaf_relations(target)
737 raise AssertionError("Match should be exhaustive and all branches should return.")